Skip to content

Commit 3c82a48

Browse files
committed
Fix incorrect parsing after deleting non-ascii text
1 parent 57fdab0 commit 3c82a48

File tree

2 files changed

+71
-51
lines changed

2 files changed

+71
-51
lines changed

lisp/tree-sitter-tests.el

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,14 @@
3232
(directory-file-name
3333
(file-name-directory (locate-library "tree-sitter")))) relative-path))
3434

35-
(defun ts-test-tree-sexp (sexp)
36-
"Check that the current syntax tree's sexp representation is SEXP."
37-
(should (equal (read (ts-tree-to-sexp tree-sitter-tree)) sexp)))
35+
(defun ts-test-tree-sexp (sexp &optional reset)
36+
"Check that the current syntax tree's sexp representation is SEXP.
37+
If RESET is non-nil, also do another full parse and check again."
38+
(should (equal (read (ts-tree-to-sexp tree-sitter-tree)) sexp))
39+
(when reset
40+
(setq tree-sitter-tree nil)
41+
(tree-sitter--do-parse)
42+
(ts-test-tree-sexp sexp)))
3843

3944
(defun ts-test-use-lang (lang-symbol)
4045
"Turn on `tree-sitter-mode' in the current buffer, using language LANG-SYMBOL."
@@ -151,7 +156,15 @@
151156
(upcase-initials-region beg end)
152157
(ts-test-tree-sexp orig-sexp)
153158
(downcase-region beg end)
154-
(ts-test-tree-sexp orig-sexp))))
159+
(ts-test-tree-sexp orig-sexp :reset))))
160+
161+
(ert-deftest minor-mode::incremental:delete-non-ascii-text ()
162+
(ts-test-lang-with-file 'rust "lisp/test-files/delete-non-ascii-text.rs"
163+
(let* ((orig-sexp (read (ts-tree-to-sexp tree-sitter-tree)))
164+
(end (re-search-forward "ấấấấấấấấ"))
165+
(beg (match-beginning 0)))
166+
(delete-region beg end)
167+
(ts-test-tree-sexp orig-sexp :reset))))
155168

156169
(ert-deftest node::eq ()
157170
(ts-test-with 'rust parser

lisp/tree-sitter.el

Lines changed: 54 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -62,73 +62,80 @@ Use this to enable other minor modes that depends on the syntax tree."
6262
(defvar-local tree-sitter-language nil
6363
"Tree-sitter language.")
6464

65-
(defvar-local tree-sitter--start-byte nil)
66-
(defvar-local tree-sitter--old-end-byte nil)
67-
(defvar-local tree-sitter--new-end-byte nil)
65+
(defvar-local tree-sitter--text-before-change nil)
6866

69-
(defvar-local tree-sitter--start-point nil)
70-
(defvar-local tree-sitter--old-end-point nil)
71-
(defvar-local tree-sitter--new-end-point nil)
67+
(defvar-local tree-sitter--beg-before-change nil)
7268

73-
(defun tree-sitter--before-change (beg end)
69+
(defun tree-sitter--before-change (beg old-end)
7470
"Update relevant editing states. Installed on `before-change-functions'.
75-
BEG and END are the begin and end of the text to be changed."
76-
(setq tree-sitter--start-byte (position-bytes beg)
77-
tree-sitter--old-end-byte (position-bytes end))
78-
(ts--save-context
79-
;; TODO: Keep mutating the same objects instead of creating a new one each time.
80-
(setq tree-sitter--start-point (ts--point-from-position beg)
81-
tree-sitter--old-end-point (ts--point-from-position end))))
71+
BEG and OLD-END are the begin and end positions of the text to be changed."
72+
(setq tree-sitter--beg-before-change beg)
73+
(ts--without-restriction
74+
;; TODO: Fallback to a full parse if this region is too big.
75+
(setq tree-sitter--text-before-change
76+
(buffer-substring-no-properties beg old-end))))
8277

8378
;;; TODO: How do we batch *after* hooks to re-parse only once? Maybe using
8479
;;; `run-with-idle-timer' with 0-second timeout?
8580
;;;
8681
;;; XXX: Figure out how to detect whether it was a text-property-only change.
8782
;;; There's no point in reparsing in these situations.
88-
(defun tree-sitter--after-change (beg end length)
83+
(defun tree-sitter--after-change (beg new-end old-len)
8984
"Update relevant editing states and reparse the buffer (incrementally).
9085
Installed on `after-change-functions'.
9186
92-
END is the end of the changed text."
93-
(ts--save-context
94-
(setq tree-sitter--start-byte (position-bytes beg)
95-
tree-sitter--start-point (ts--point-from-position beg)
96-
tree-sitter--new-end-byte (position-bytes end)
97-
tree-sitter--new-end-point (ts--point-from-position end))
98-
;; The enclosing region passed to `before-change-functions' can be inexact,
99-
;; which can be larger than the actual changes. One example is
100-
;; `upcase-initials-region'. Therefore, we need to compute the exact change
101-
;; here. XXX: This is sometimes incorrect, because `ts--point-from-position'
102-
;; and `position-bytes' here look at other text in the same region, not the
103-
;; changed text. TODO FIX: Either figure out how we can get the exact
104-
;; old-end, or make `ts_tree_edit' accept position ranges.
105-
(let ((old-end (+ beg length)))
106-
;; XXX: Additionally, in case of a deletion at the end of the buffer,
107-
;; trying to compute the old-end position/byte is impossible, because the
108-
;; text is already gone. When that happens, use the old-end previously
109-
;; recorded in `before-change-functions'.
110-
(setq tree-sitter--old-end-point (or (ignore-errors
111-
(ts--point-from-position old-end))
112-
tree-sitter--old-end-point)
113-
tree-sitter--old-end-byte (or (position-bytes old-end)
114-
tree-sitter--old-end-byte))))
87+
BEG is the begin position of the change.
88+
NEW-END is the end position of the changed text.
89+
OLD-LEN is the char length of the old text."
11590
(when tree-sitter-tree
116-
(ts-edit-tree tree-sitter-tree
117-
tree-sitter--start-byte
118-
tree-sitter--old-end-byte
119-
tree-sitter--new-end-byte
120-
tree-sitter--start-point
121-
tree-sitter--old-end-point
122-
tree-sitter--new-end-point)
123-
(tree-sitter--do-parse)))
91+
(let ((beg-byte (position-bytes beg))
92+
(new-end-byte (position-bytes new-end))
93+
old-end-byte
94+
beg-point old-end-point new-end-point)
95+
(ts--save-context
96+
(setq beg-point (ts--point-from-position beg)
97+
new-end-point (ts--point-from-position new-end)))
98+
;; Compute the old text's end byte position, line number, byte column.
99+
;;
100+
;; Tree-sitter works with byte positions, line numbers, byte columns.
101+
;; Emacs primarily works with character positions. Converting the latter
102+
;; to the former, for the end of the old text, requires looking at the
103+
;; actual old text's content. Tree-sitter itself cannot do this, because
104+
;; it is designed to keep track of only the numbers, not a mirror of the
105+
;; buffer's text. Without re-designing Emac's change tracking mechanism,
106+
;; we store the old text through`tree-sitter--before-change', and inspect
107+
;; it here. TODO XXX FIX: Improve Emac's change tracking mechanism.
108+
(if (= old-len 0)
109+
(setq old-end-byte beg-byte
110+
old-end-point beg-point)
111+
(let ((old-text tree-sitter--text-before-change)
112+
(rel-beg (- beg tree-sitter--beg-before-change)))
113+
(with-temp-buffer
114+
(insert old-text)
115+
(pcase-let*
116+
((rel-pos (+ 1 rel-beg old-len))
117+
(rel-byte (position-bytes rel-pos))
118+
(`(,beg-line-number . ,beg-byte-column) beg-point)
119+
(`(,rel-line-number . ,rel-byte-column) (ts--point-from-position rel-pos))
120+
(old-end-line-number (+ beg-line-number
121+
rel-line-number -1))
122+
(old-end-byte-column (if (> rel-line-number 1)
123+
rel-byte-column
124+
(+ beg-byte-column rel-byte-column))))
125+
(setq old-end-byte (+ beg-byte rel-byte -1)
126+
old-end-point `(,old-end-line-number . ,old-end-byte-column))))))
127+
(ts-edit-tree tree-sitter-tree
128+
beg-byte old-end-byte new-end-byte
129+
beg-point old-end-point new-end-point)
130+
(tree-sitter--do-parse))))
124131

125132
(defun tree-sitter--do-parse ()
126133
"Parse the current buffer and update the syntax tree."
127134
(let ((old-tree tree-sitter-tree))
128135
(setq tree-sitter-tree
129136
;; https://github.com/ubolonton/emacs-tree-sitter/issues/3
130137
(ts--without-restriction
131-
(ts-parse-chunks tree-sitter-parser #'ts-buffer-input tree-sitter-tree)))
138+
(ts-parse-chunks tree-sitter-parser #'ts-buffer-input old-tree)))
132139
(run-hook-with-args 'tree-sitter-after-change-functions old-tree)))
133140

134141
(defun tree-sitter--setup ()

0 commit comments

Comments
 (0)