diff --git a/.azure-pipelines/main.yml b/.azure-pipelines/main.yml index 609b000a..2c6e2f27 100644 --- a/.azure-pipelines/main.yml +++ b/.azure-pipelines/main.yml @@ -50,7 +50,10 @@ resources: steps: - template: .azure-pipelines/steps/setup-rust.yml@emacs-module-rs - template: .azure-pipelines/steps/setup-llvm.yml@emacs-module-rs -- template: steps/-build.yml - template: .azure-pipelines/steps/setup-emacs.yml@emacs-module-rs +- template: .azure-pipelines/steps/setup-cask.yml@emacs-module-rs +- template: steps/-build.yml + parameters: + target: release - template: steps/setup-tree-sitter-cli.yml - template: steps/-test.yml diff --git a/.azure-pipelines/release.yml b/.azure-pipelines/release.yml index 367d1a5c..3fcf718d 100644 --- a/.azure-pipelines/release.yml +++ b/.azure-pipelines/release.yml @@ -33,10 +33,11 @@ jobs: steps: - template: .azure-pipelines/steps/setup-rust.yml@emacs-module-rs - template: .azure-pipelines/steps/setup-llvm.yml@emacs-module-rs + - template: .azure-pipelines/steps/setup-emacs.yml@emacs-module-rs + - template: .azure-pipelines/steps/setup-cask.yml@emacs-module-rs - template: steps/-build.yml parameters: target: release - - template: .azure-pipelines/steps/setup-emacs.yml@emacs-module-rs - template: steps/setup-tree-sitter-cli.yml - template: steps/-test.yml - template: steps/-save-binaries.yml diff --git a/.azure-pipelines/steps/-build.yml b/.azure-pipelines/steps/-build.yml index 088608b9..3c996750 100644 --- a/.azure-pipelines/steps/-build.yml +++ b/.azure-pipelines/steps/-build.yml @@ -3,11 +3,13 @@ parameters: steps: - powershell: | + cask install .\bin\build.ps1 ${{ parameters.target }} displayName: Build all packages (Windows) condition: and(succeeded(), eq(variables['Agent.OS'], 'Windows_NT')) - bash: | + cask install ./bin/build ${{ parameters.target }} displayName: Build all packages condition: and(succeeded(), ne(variables['Agent.OS'], 'Windows_NT')) diff --git a/.gitignore b/.gitignore index 0652f0c9..c579c39e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,6 @@ /target **/*.rs.bk -**/*.elc +**/*.elc* **/.cask grammars/ lisp/tree-sitter-dyn.* diff --git a/.travis.yml b/.travis.yml index bd5cc5a7..0b3d19c2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -62,9 +62,12 @@ before_install: - git clone -b master https://github.com/ubolonton/evm.git $HOME/.evm - evm config path /tmp - evm install $EVM_EMACS --use --skip + - curl -fsSL https://raw.githubusercontent.com/cask/cask/master/go | python + - export PATH="$HOME/.cask/bin:$PATH" install: - - make build + - cask install + - ./bin/build before_script: - nvm install 10 diff --git a/Cargo.lock b/Cargo.lock index e9f464e1..d4031a54 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -163,7 +163,7 @@ dependencies = [ [[package]] name = "emacs" -version = "0.13.0" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "ctor 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)", @@ -189,7 +189,7 @@ dependencies = [ name = "emacs-tree-sitter" version = "0.6.0" dependencies = [ - "emacs 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)", + "emacs 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)", "libloading 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", "tree-sitter 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -567,7 +567,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum darling 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)" = "0d706e75d87e35569db781a9b5e2416cff1236a47ed380831f959382ccd5f858" "checksum darling_core 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f0c960ae2da4de88a91b2d920c2a7233b400bc33cb28453a2987822d8392519b" "checksum darling_macro 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)" = "d9b5a2f4ac4969822c62224815d069952656cadc7084fdca9751e6d959189b72" -"checksum emacs 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b3d21b0e6ceb3ab0c1e6624a27cfa76423a29a3d011e016561f5183127311f29" +"checksum emacs 0.14.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bfcd5f11a822b90066349fcd9a589e09449faf47dc9276b34c160b0410c0e2e3" "checksum emacs-macros 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8a891cc603f71c1c7bd58caa99194fbe9074e8012352d3fa4aa16739c6f06414" "checksum emacs_module 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7a1a2ddc45eb97ac4ffba3dc6a6f547c38debdf9a9473bf46223b57b69e8a740" "checksum env_logger 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "aafcde04e90a5226a6443b7aabdb016ba2f8307c847d524724bd9b346dd1a2d3" diff --git a/Cargo.toml b/Cargo.toml index 78bf8643..0875ebee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,7 +11,7 @@ name = "tree_sitter_dyn" crate-type = ["cdylib"] [dependencies] -emacs = "0.13.0" +emacs = "0.14.0" libloading = "0.5.1" tree-sitter = "0.6.3" diff --git a/Cask b/Cask index 0c96185b..03079318 100644 --- a/Cask +++ b/Cask @@ -8,3 +8,8 @@ "Cargo.toml" "Cargo.lock" "src") + +(source melpa) + +(development + (depends-on "rust-mode")) diff --git a/README.md b/README.md index 3c9dcd8e..532f7c3c 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ If you want to hack on `emacs-tree-sitter` itself, see the section [Setup for De - Show the debug view of a buffer's parse tree ```emacs-lisp (require 'tree-sitter-debug) - (tree-sitter-debug-enable) + (tree-sitter-debug-mode) ``` - Get names of all functions in a Rust file: ```emacs-lisp @@ -46,7 +46,7 @@ If you want to hack on `emacs-tree-sitter` itself, see the section [Setup for De (seq-map (lambda (capture) (pcase-let ((`(_ . ,node) capture)) (ts-node-text node))) - (tree-sitter-query [(function_item (identifier) @name)]))) + (tree-sitter-debug-query [(function_item (identifier) @name)]))) ``` - Write a simple extension to `expand-region`: ```emacs-lisp @@ -125,12 +125,13 @@ For consistency with Emacs's conventions, this binding has some differences comp + `ts-make-query`: create a new query. + `ts-make-query-cursor`: create a new query cursor. + `ts-query-matches`, `ts-query-captures`: execute a query, returning matches/captures. - + `ts-set-byte-range`, `ts-set-point-range`: limit query execution to a range. ## Setup for Development Clone this repo and add its `lisp` and `langs` directories to `load-path`. +Install [cask](https://cask.readthedocs.io) and run `cask install` to install dev dependencies. + If you want to hack on the high-level features (in Lisp) only: - Evaluate this (once) to download the necessary binaries: ```emacs-lisp diff --git a/bin/build b/bin/build index ef01e8d6..b9272d1a 100755 --- a/bin/build +++ b/bin/build @@ -20,4 +20,6 @@ source "$here/env.bash" MODULE_DIR="$PROJECT_ROOT/target/$TARGET" cp -f "$MODULE_DIR/$MODULE_ORIGINAL" "./lisp/$MODULE_RENAMED" + + cask build ) diff --git a/bin/build.ps1 b/bin/build.ps1 index 0d376ae6..82bceec0 100644 --- a/bin/build.ps1 +++ b/bin/build.ps1 @@ -17,6 +17,8 @@ Push-Location $project_root cargo build --all $extra -Pop-Location - Copy-Item $module_dir\$module_name.dll $project_root\lisp\$module_renamed.dll + +cask build + +Pop-Location diff --git a/bin/test b/bin/test index 3851a6e1..718dc897 100755 --- a/bin/test +++ b/bin/test @@ -12,7 +12,7 @@ if [[ $* == "watch" ]]; then cargo watch -s bin/build -s bin/test ) else - $EMACS --batch \ + cask emacs --batch \ --directory "$PROJECT_ROOT/lisp" \ --directory "$PROJECT_ROOT/langs" \ -l ert \ diff --git a/bin/test.ps1 b/bin/test.ps1 index 12bb716d..f7f66d6c 100644 --- a/bin/test.ps1 +++ b/bin/test.ps1 @@ -19,7 +19,7 @@ if ($args[0] -eq "watch") { # https://github.com/PowerShell/PowerShell/issues/4002 # https://stackoverflow.com/questions/2095088/error-when-calling-3rd-party-executable-from-powershell-when-using-an-ide $ErrorActionPreference = 'Continue' - emacs --batch ` + cask emacs --batch ` --directory "$project_root\lisp" ` --directory "$project_root\langs" ` -l ert ` diff --git a/langs/tree-sitter-langs-build.el b/langs/tree-sitter-langs-build.el index dd0b86aa..4cfcb7c6 100644 --- a/langs/tree-sitter-langs-build.el +++ b/langs/tree-sitter-langs-build.el @@ -88,7 +88,7 @@ If VERSION and OS are not spcified, use the defaults of (php "v0.16.1") (python "v0.16.0") (ruby "v0.16.1") - (rust "v0.16.0") + (rust "3e5ec5a") (scala "v0.13.0") (swift "a22fa5e") (typescript "v0.16.1" ("typescript" "tsx"))) @@ -181,9 +181,10 @@ This function requires git and tree-sitter CLI." (if (file-directory-p dir) (let ((default-directory dir)) (tree-sitter-langs--call "git" "remote" "-v" "update")) - (tree-sitter-langs--call "git" "clone" "-v" repo dir)) + (tree-sitter-langs--call "git" "clone" "-q" repo dir)) (let ((default-directory dir)) (tree-sitter-langs--call "git" "reset" "--hard" version) + (tree-sitter-langs--call "npm" "set" "progress=false") ;; TODO: Figure out why we need to skip `npm install' for some repos. (ignore-errors (tree-sitter-langs--call "npm" "install")) diff --git a/langs/tree-sitter-langs.el b/langs/tree-sitter-langs.el index bc265f87..393b6da0 100644 --- a/langs/tree-sitter-langs.el +++ b/langs/tree-sitter-langs.el @@ -11,20 +11,37 @@ ;;; Commentary: -;; This is a convenient bundle of language grammars for `tree-sitter'. It serves -;; as an interim distribution mechanism, until `tree-sitter' is widespread -;; enough for language major modes to include the definitions on their own. +;; This is a convenient bundle of language grammars and queries for +;; `tree-sitter'. It serves as an interim distribution mechanism, until +;; `tree-sitter' is widespread enough for language major modes to include these +;; definitions on their own. +;; +;; Basically it's a multi-step process: +;; +;; 1. `tree-sitter-langs' populates global registries of grammars and queries. +;; These global registries are defined by `tree-sitter-mode' and other +;; `tree-sitter'-based language-agnostic minor modes, to extend existing +;; major modes. +;; +;; 2. New `tree-sitter'-based language-specific minor modes use these global +;; registries to extend existing major modes. +;; +;; 3. Major modes adopt new `tree-sitter'-based features, and distribute the +;; grammars and queries on their own. They can either put these definitions +;; in the global registries, or keep using them only internally. ;;; Code: +(require 'cl-lib) + (require 'tree-sitter) (require 'tree-sitter-load) +(require 'tree-sitter-hl) (require 'tree-sitter-langs-build) (eval-when-compile - (require 'pcase) - (require 'cl-lib)) + (require 'pcase)) (defun tree-sitter-langs-ensure (lang-symbol) "Return the language object identified by LANG-SYMBOL. @@ -71,5 +88,26 @@ See `tree-sitter-langs-repos'." (setf (map-elt tree-sitter-major-mode-language-alist major-mode) lang-symbol)) +(defun tree-sitter-langs--hl-default-patterns (lang-symbol) + "Return default syntax highlighting patterns for LANG-SYMBOL." + (let ((query-path (cl-reduce + (lambda (dir name) (expand-file-name name dir)) + `("repos" ,(format "tree-sitter-%s" lang-symbol) + "queries" "highlights.scm") + :initial-value tree-sitter-langs--dir))) + (with-temp-buffer + (insert-file-contents query-path) + (buffer-string)))) + +(defun tree-sitter-langs--set-hl-default-patterns (&rest _args) + "Use syntax highlighting patterns provided by `tree-sitter-langs'." + (unless tree-sitter-hl-default-patterns + (let ((lang-symbol (alist-get major-mode tree-sitter-major-mode-language-alist))) + (setq tree-sitter-hl-default-patterns + (tree-sitter-langs--hl-default-patterns lang-symbol))))) + +(advice-add 'tree-sitter-hl--setup :before + #'tree-sitter-langs--set-hl-default-patterns) + (provide 'tree-sitter-langs) ;;; tree-sitter-langs.el ends here diff --git a/lisp/test-files/change-case-region.rs b/lisp/test-files/change-case-region.rs new file mode 100644 index 00000000..c568e085 --- /dev/null +++ b/lisp/test-files/change-case-region.rs @@ -0,0 +1,5 @@ +// Change case of "this text" repeatedly. +unsafe { + input_function.call_unprotected((bytepos, point.line_number(), point.byte_column())) + .and_then(|v| v.into_rust()) +} diff --git a/lisp/test-files/delete-non-ascii-text.rs b/lisp/test-files/delete-non-ascii-text.rs new file mode 100644 index 00000000..3124cdfe --- /dev/null +++ b/lisp/test-files/delete-non-ascii-text.rs @@ -0,0 +1,5 @@ +// Delete the 8 chars next line. +// ấấấấấấấấ +unsafe { + input_function.call_unprotected((bytepos, point.line_number(), point.byte_column())) +} diff --git a/lisp/test-files/extend-region.rs b/lisp/test-files/extend-region.rs new file mode 100644 index 00000000..8165056f --- /dev/null +++ b/lisp/test-files/extend-region.rs @@ -0,0 +1,6 @@ +macro_rules! impl_pred {} + +// In evil's normal-mode, with point after `i`, or `;`, or at the end of the file, eval +// (font-lock-flush). A correct implementation would highlight `i` and `!`. +abc +impl_pred!(foo, bar); diff --git a/lisp/test-files/types.rs b/lisp/test-files/types.rs new file mode 100644 index 00000000..e7d4eb88 --- /dev/null +++ b/lisp/test-files/types.rs @@ -0,0 +1,385 @@ +use std::{ + os, + cell::{RefCell, Ref, RefMut}, + ops::{Deref, DerefMut}, + rc::Rc, + mem, + marker::PhantomData, +}; + +use emacs::{defun, Env, Value, Result, IntoLisp, FromLisp, Vector, ErrorKind}; + +use tree_sitter::{Tree, Node, TreeCursor, Parser, Query, QueryCursor}; + +pub fn shared(t: T) -> Shared { + Rc::new(RefCell::new(t)) +} + +unsafe fn erase_lifetime<'t, T>(x: &'t T) -> &'static T { + mem::transmute(x) +} + +macro_rules! impl_newtype_traits { + ($newtype:ty, $inner:ty) => { + impl From<$inner> for $newtype { + #[inline(always)] + fn from(inner: $inner) -> Self { + Self(inner) + } + } + + impl Into<$inner> for $newtype { + #[inline(always)] + fn into(self) -> $inner { + self.0 + } + } + }; + ($name:ident) => { + impl_newtype_traits!($name, tree_sitter::$name); + }; +} + +// ------------------------------------------------------------------------------------------------- +// Point + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct Point(tree_sitter::Point); + +impl_newtype_traits!(Point); + +impl IntoLisp<'_> for Point { + fn into_lisp(self, env: &Env) -> Result { + env.cons(self.line_number(), self.byte_column()) + } +} + +impl FromLisp<'_> for Point { + fn from_lisp(value: Value) -> Result { + let row = value.car::()? - 1; + let column = value.cdr()?; + Ok(tree_sitter::Point { row, column }.into()) + } +} + +impl Point { + #[inline(always)] + pub(crate) fn line_number(&self) -> usize { + self.0.row + 1 + } + + #[inline(always)] + pub(crate) fn byte_column(&self) -> usize { + self.0.column + } +} + +// ------------------------------------------------------------------------------------------------- +// Emacs Byte Position (1-based, which is different from byte offset, which is 0-based). + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct BytePos(usize); + +impl From for BytePos { + #[inline(always)] + fn from(byte_offset: usize) -> Self { + Self(byte_offset + 1) + } +} + +impl Into for BytePos { + #[inline(always)] + fn into(self) -> usize { + self.0 - 1 + } +} + +impl FromLisp<'_> for BytePos { + #[inline(always)] + fn from_lisp(value: Value) -> Result { + value.into_rust().map(Self) + } +} + +impl IntoLisp<'_> for BytePos { + #[inline(always)] + fn into_lisp(self, env: &Env) -> Result { + self.0.into_lisp(env) + } +} + +// ------------------------------------------------------------------------------------------------- +// Range + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct Range(pub(crate) tree_sitter::Range); + +impl_newtype_traits!(Range); + +impl IntoLisp<'_> for Range { + fn into_lisp(self, env: &Env) -> Result { + let inner = self.0; + let start_byte_pos: BytePos = inner.start_byte.into(); + let end_byte_pos: BytePos = inner.end_byte.into(); + env.vector(( + start_byte_pos, + end_byte_pos, + Point(inner.start_point), + Point(inner.end_point), + )) + } +} + +impl FromLisp<'_> for Range { + fn from_lisp(value: Value) -> Result { + let vector: Vector = value.into_rust()?; + let start_byte = vector.get::(0)?.into(); + let end_byte = vector.get::(1)?.into(); + let start_point = vector.get::(2)?.into(); + let end_point = vector.get::(3)?.into(); + Ok(tree_sitter::Range { start_byte, end_byte, start_point, end_point }.into()) + } +} + +// ------------------------------------------------------------------------------------------------- +// Language + +#[derive(Copy, Clone)] +#[repr(transparent)] +pub struct Language(pub(crate) tree_sitter::Language); + +impl_newtype_traits!(Language); + +unsafe extern "C" fn no_op(_: *mut os::raw::c_void) {} + +impl IntoLisp<'_> for Language { + fn into_lisp(self, env: &Env) -> Result { + // Safety: Language has the same representation as the opaque pointer type. + let ptr: *mut os::raw::c_void = unsafe { mem::transmute(self) }; + // Safety: The finalizer does nothing. + unsafe { env.make_user_ptr(Some(no_op::), ptr) } + } +} + +impl FromLisp<'_> for Language { + fn from_lisp(value: Value) -> Result { + match value.get_user_finalizer()? { + Some(fin) if fin == no_op:: => { + let ptr = value.get_user_ptr()?; + // Safety: Language has the same representation as the opaque pointer type. + Ok(unsafe { mem::transmute(ptr) }) + } + _ => Err(ErrorKind::WrongTypeUserPtr { expected: "TreeSitterLanguage" }.into()) + } + } +} + +// ------------------------------------------------------------------------------------------------- +// Tree + +pub type Shared = Rc>; + +// XXX: If we pass a &, #[defun] will assume it's refcell-wrapped. If we pass a Value, we need +// .into_rust() boilerplate. This is a trick to avoid both. +pub type Borrowed<'e, T> = &'e Shared; + +// ------------------------------------------------------------------------------------------------- +// Node + +/// Wrapper around `tree_sitter::Node` that can have 'static lifetime, by keeping a ref-counted +/// reference to the underlying tree. +#[derive(Clone)] +pub struct RNode { + tree: Shared, + inner: Node<'static>, +} + +pub struct RNodeBorrow<'e> { + #[allow(unused)] + reft: Ref<'e, Tree>, + node: &'e Node<'e>, +} + +impl<'e> Deref for RNodeBorrow<'e> { + type Target = Node<'e>; + + #[inline] + fn deref(&self) -> &Self::Target { + self.node + } +} + +pub struct RNodeBorrowMut<'e> { + #[allow(unused)] + reft: RefMut<'e, Tree>, + node: Node<'e>, +} + +impl<'e> Deref for RNodeBorrowMut<'e> { + type Target = Node<'e>; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.node + } +} + +impl<'e> DerefMut for RNodeBorrowMut<'e> { + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.node + } +} + +impl PartialEq for RNode { + fn eq(&self, other: &Self) -> bool { + self.inner == other.inner + } +} + +impl IntoLisp<'_> for RNode { + fn into_lisp(self, env: &Env) -> Result { + RefCell::new(self).into_lisp(env) + } +} + +impl RNode { + pub fn new<'e, F: FnOnce(&'e Tree) -> Node<'e>>(tree: Shared, f: F) -> Self { + let rtree = unsafe { erase_lifetime(&*tree.borrow()) }; + let inner = unsafe { mem::transmute(f(rtree)) }; + Self { tree, inner } + } + + pub fn clone_tree(&self) -> Shared { + self.tree.clone() + } + + pub fn map<'e, F: FnOnce(&Node<'e>) -> Node<'e>>(&self, f: F) -> Self { + Self::new(self.clone_tree(), |_| f(&self.inner)) + } + + #[inline] + pub fn borrow(&self) -> RNodeBorrow { + let reft = self.tree.borrow(); + let node = &self.inner; + RNodeBorrow { reft, node } + } + + #[inline] + pub fn borrow_mut(&mut self) -> RNodeBorrowMut { + let reft = self.tree.borrow_mut(); + let node = self.inner; + RNodeBorrowMut { reft, node } + } +} + +// ------------------------------------------------------------------------------------------------- +// Cursor + +/// Wrapper around `tree_sitter::TreeCursor` that can have 'static lifetime, by keeping a +/// ref-counted reference to the underlying tree. +pub struct RCursor { + tree: Shared, + inner: TreeCursor<'static>, +} + +pub struct RCursorBorrow<'e> { + #[allow(unused)] + reft: Ref<'e, Tree>, + cursor: &'e TreeCursor<'e>, +} + +impl<'e> Deref for RCursorBorrow<'e> { + type Target = TreeCursor<'e>; + + #[inline] + fn deref(&self) -> &Self::Target { + self.cursor + } +} + +pub struct RCursorBorrowMut<'e> { + #[allow(unused)] + reft: Ref<'e, Tree>, + cursor: &'e mut TreeCursor<'e>, +} + +impl<'e> Deref for RCursorBorrowMut<'e> { + type Target = TreeCursor<'e>; + + #[inline] + fn deref(&self) -> &Self::Target { + self.cursor + } +} + +impl<'e> DerefMut for RCursorBorrowMut<'e> { + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { + self.cursor + } +} + +impl RCursor { + pub fn new<'e, F: FnOnce(&'e Tree) -> TreeCursor<'e>>(tree: Shared, f: F) -> Self { + let rtree = unsafe { erase_lifetime(&*tree.borrow()) }; + let inner = unsafe { mem::transmute(f(rtree)) }; + Self { tree, inner } + } + + pub fn clone_tree(&self) -> Shared { + self.tree.clone() + } + + #[inline] + pub fn borrow(&self) -> RCursorBorrow { + let reft = self.tree.borrow(); + let cursor = &self.inner; + RCursorBorrow { reft, cursor } + } + + #[inline] + pub fn borrow_mut<'e>(&'e mut self) -> RCursorBorrowMut { + let reft: Ref<'e, Tree> = self.tree.borrow(); + // XXX: Explain the safety here. + let cursor: &'e mut _ = unsafe { mem::transmute(&mut self.inner) }; + RCursorBorrowMut { reft, cursor } + } +} + +// ------------------------------------------------------------------------------------------------- + +pub enum Either<'e, L, R> where L: FromLisp<'e>, R: FromLisp<'e> { + Left(L, PhantomData<&'e ()>), + Right(R, PhantomData<&'e ()>), +} + +impl<'e, L, R> FromLisp<'e> for Either<'e, L, R> where L: FromLisp<'e>, R: FromLisp<'e> { + fn from_lisp(value: Value<'e>) -> Result { + if let Ok(value) = value.into_rust::() { + return Ok(Either::Left(value, PhantomData)); + } + let value = value.into_rust::()?; + Ok(Either::Right(value, PhantomData)) + } +} + +macro_rules! impl_pred { + ($name:ident, $type:ty) => { + #[defun] + fn $name(value: Value) -> Result { + Ok(value.into_rust::<$type>().is_ok()) + } + }; +} + +// TODO: Add docstring for these. +impl_pred!(language_p, Language); +impl_pred!(range_p, Range); +impl_pred!(point_p, Point); +impl_pred!(parser_p, &RefCell); +impl_pred!(tree_p, &Shared); +impl_pred!(node_p, &RefCell); +impl_pred!(cursor_p, &RefCell); +impl_pred!(query_p, &RefCell); +impl_pred!(query_cursor_p, &RefCell); diff --git a/lisp/tree-sitter-core.el b/lisp/tree-sitter-core.el index 98ff1762..4404b6f5 100644 --- a/lisp/tree-sitter-core.el +++ b/lisp/tree-sitter-core.el @@ -25,7 +25,9 @@ (require 'tree-sitter-dyn) (eval-when-compile - (require 'pcase)) + (require 'pcase) + (require 'subr-x) + (require 'cl-lib)) (defmacro ts--without-restriction (&rest body) "Execute BODY with narrowing disabled." @@ -79,7 +81,7 @@ for a more detailed explanation." ;;; Extracting buffer's text. -(defun ts-buffer-input (bytepos _line-number _byte-column) +(defun ts--buffer-input (bytepos _line-number _byte-column) "Return a portion of the current buffer's text, starting from BYTEPOS. BYTEPOS is automatically clamped to the range valid for the current buffer. @@ -94,15 +96,22 @@ This function must be called with narrowing disabled, e.g. within a (end-pos (or (byte-to-position end-byte) max-pos))) (buffer-substring-no-properties beg-pos end-pos))) +(defun ts--buffer-substring-no-properties (beg-byte end-byte) + "Return the current buffer's text from BEG-BYTE to END-BYTE. +This function must be called with narrowing disabled, e.g. within a +`ts--without-restriction' block." + (buffer-substring-no-properties + (byte-to-position beg-byte) + (byte-to-position end-byte))) + (defun ts--node-text (node) "Return NODE's text, assuming it's from the current buffer's syntax tree. Prefer `ts-node-text', unless there's a real bottleneck. -This function must be called within a `ts--without-restriction' block." - (pcase-let ((`[,beg ,end] (ts-node-range node))) - (buffer-substring-no-properties - (byte-to-position beg) - (byte-to-position end)))) +This function must be called with narrowing disabled, e.g. within a +`ts--without-restriction' block." + (pcase-let ((`(,beg . ,end) (ts-node-position-range node))) + (buffer-substring-no-properties beg end))) (defun ts-node-text (node) "Return NODE's text, assuming it's from the current buffer's syntax tree." @@ -136,10 +145,10 @@ This function must be called within a `ts--without-restriction' block." (defun ts-node-position-range (node) "Return NODE's (START-POSITION . END-POSITION)." - (pcase-let ((`[,beg ,end] (ts-node-range node))) - (cons - (byte-to-position beg) - (byte-to-position end)))) + (let ((range (ts-node-byte-range node))) + (cl-callf byte-to-position (car range)) + (cl-callf byte-to-position (cdr range)) + range)) (defun ts-goto-first-child-for-position (cursor position) "Move CURSOR to the first child that extends beyond the given POSITION. @@ -149,50 +158,68 @@ Return the index of the child node if one was found, nil otherwise." ;;; Querying. -(defun ts-make-query (language patterns) +(defun ts--stringify-patterns (patterns) + "Convert PATTERNS into a query string that can be passed to `ts--make-query'." + (cond + ((stringp patterns) patterns) + ((sequencep patterns) + ;; XXX: This is hacky. + (thread-last (mapconcat (lambda (p) (format "%S" p)) patterns "\n") + (replace-regexp-in-string (regexp-quote "\\?") "?") + (replace-regexp-in-string (regexp-quote "\\.") "."))) + (t (error "Invalid patterns")))) + +(defun ts-make-query (language patterns &optional tag-assigner) "Create a new query for LANGUAGE from a sequence of S-expression PATTERNS. The query is associated with LANGUAGE, and can only be run on syntax nodes -parsed with LANGUAGE." - (let ((source (cond - ((stringp patterns) patterns) - ;; FIX: This doesn't work with predicates, in which '?' would be escaped. - ((sequencep patterns) (mapconcat (lambda (p) (format "%S" p)) patterns "\n")) - (t (format "%S" patterns))))) - (ts--make-query language source))) - -(defun ts-query-matches (query node &optional cursor index-only text-function) +parsed with LANGUAGE. + +When the query is executed, each captured node is tagged with a symbol, whose +name is the corresponding capture name defined in PATTERNS. For example, nodes +that are captured as \"@function.builtin\" will be tagged with the symbol +`function.builtin'. This behavior can be customized by the optional function +TAG-ASSIGNER, which should return a tag value when given a capture name (without +the prefix \"@\"). If it returns nil, the associated capture name is disabled. + +See also: `ts-query-captures' and `ts-query-matches'." + (ts--make-query language (ts--stringify-patterns patterns) + (or tag-assigner #'intern))) + +(defun ts-query-matches (query node text-function &optional cursor) "Execute QUERY on NODE and return a sequence of matches. Matches are sorted in the order they were found. Each match has the form (PATTERN-INDEX . MATCH-CAPTURES), where PATTERN-INDEX is -the position of the matched pattern within QUERY, and MATCH-CAPTURES is a -sequence of captures associated with the match, similar to that returned by -`ts-query-captures'. If the optional arg INDEX-ONLY is non-nil, positions of the -capture patterns within QUERY are returned instead of their names. +the 0-based position of the matched pattern within QUERY, and MATCH-CAPTURES is +a sequence of captures associated with the match, similar to that returned by +`ts-query-captures'. -If the optional arg CURSOR is non-nil, it is used as the query-cursor to execute -QUERY. Otherwise a new query-cursor is used. +TEXT-FUNCTION is called to get nodes' texts (for text-based predicates). It +should take 2 parameters: (BEG-BYTE END-BYTE), and return the corresponding +chunk of text in the source code. -If the optional arg TEXT-FUNCTION is non-nil, it is used to get nodes' text. -Otherwise `ts-node-text' is used." +If the optional arg CURSOR is non-nil, it is used as the query-cursor to execute +QUERY. Otherwise, a newly created query-cursor is used." (ts--query-cursor-matches - (or cursor (ts-make-query-cursor)) query node index-only (or text-function #'ts-node-text))) + (or cursor (ts-make-query-cursor)) query node text-function)) -(defun ts-query-captures (query node &optional cursor index-only text-function) +(defun ts-query-captures (query node text-function &optional cursor) "Execute QUERY on NODE and return a sequence of captures. -Matches are sorted in the order they appear. +Captures are sorted in the order they appear. -Each capture has the form (CAPTURE-NAME . CAPTURED-NODE). If the optional arg -INDEX-ONLY is non-nil, the position of the capture pattern within QUERY is -returned instead of its name. +Each capture has the form (CAPTURE-TAG . CAPTURED-NODE), where CAPTURE-TAG is a +symbol, whose name is the corresponding capture name defined in QUERY (without +the prefix \"@\"). If QUERY was created with a custom tag assigner, CAPTURE-TAG +is the value returned by that function instead. See also: `ts-make-query'. -If the optional arg CURSOR is non-nil, it is used as the query-cursor to execute -QUERY. Otherwise a new query-cursor is used. +TEXT-FUNCTION is called to get nodes' texts (for text-based predicates). It +should take 2 parameters: (BEG-BYTE END-BYTE), and return the corresponding +chunk of text in the source code. -If the optional arg TEXT-FUNCTION is non-nil, it is used to get nodes' text. -Otherwise `ts-node-text' is used." +If the optional arg CURSOR is non-nil, it is used as the query-cursor to execute +QUERY. Otherwise, a newly created query-cursor is used." (ts--query-cursor-captures - (or cursor (ts-make-query-cursor)) query node index-only (or text-function #'ts-node-text))) + (or cursor (ts-make-query-cursor)) query node text-function)) ;;; Utilities. diff --git a/lisp/tree-sitter-debug.el b/lisp/tree-sitter-debug.el index c085e65b..f507e54d 100644 --- a/lisp/tree-sitter-debug.el +++ b/lisp/tree-sitter-debug.el @@ -8,7 +8,7 @@ ;; This file contains debug utilities for tree-sitter. ;; -;; (tree-sitter-debug-enable) +;; (tree-sitter-debug-mode) ;;; Code: @@ -33,46 +33,51 @@ (erase-buffer) (tree-sitter-debug--display-node (ts-root-node tree) 0)))) -;;;###autoload -(defun tree-sitter-debug-enable () - "Enable debugging for the current buffer. -This displays the syntax tree in another buffer, and keeps it up-to-date." - (interactive) - (unless tree-sitter-mode - (error "`tree-sitter-mode' is not enabled")) - (unless tree-sitter-debug--tree-buffer +(defun tree-sitter-debug--setup () + "Set up syntax tree debugging in the current buffer." + (unless (buffer-live-p tree-sitter-debug--tree-buffer) (setq tree-sitter-debug--tree-buffer - (generate-new-buffer (format "*tree-sitter-tree %s*" (buffer-name))))) + (get-buffer-create (format "tree-sitter-tree: %s" (buffer-name))))) + (add-hook 'tree-sitter-after-change-functions #'tree-sitter-debug--display-tree nil :local) + (add-hook 'kill-buffer-hook #'tree-sitter-debug--teardown nil :local) (display-buffer tree-sitter-debug--tree-buffer) - (add-hook 'tree-sitter-after-change-functions #'tree-sitter-debug--display-tree nil 'local) (tree-sitter-debug--display-tree nil)) +(defun tree-sitter-debug--teardown () + "Tear down syntax tree debugging in the current buffer." + (remove-hook 'tree-sitter-after-change-functions #'tree-sitter-debug--display-tree :local) + (when (buffer-live-p tree-sitter-debug--tree-buffer) + (kill-buffer tree-sitter-debug--tree-buffer) + (setq tree-sitter-debug--tree-buffer nil))) + ;;;###autoload -(defun tree-sitter-debug-disable () - "Disable debugging for the current buffer." - (interactive) - (when tree-sitter-debug--tree-buffer - (kill-buffer tree-sitter-debug--tree-buffer) - (setq tree-sitter-debug--tree-buffer nil)) - (remove-hook 'tree-sitter-after-change-functions #'tree-sitter-debug--display-tree 'local)) +(define-minor-mode tree-sitter-debug-mode + "Toggle syntax tree debugging for the current buffer. +This mode displays the syntax tree in another buffer, and keeps it up-to-date." + :init-value nil + :group 'tree-sitter + (tree-sitter--handle-dependent tree-sitter-debug-mode + #'tree-sitter-debug--setup + #'tree-sitter-debug--teardown)) ;;;###autoload -(defun tree-sitter-query (patterns &optional matches index-only) +(defun tree-sitter-debug-query (patterns &optional matches tag-assigner) "Execute query PATTERNS against the current syntax tree and return captures. If the optional arg MATCHES is non-nil, matches (from `ts-query-matches') are returned instead of captures (from `ts-query-captures'). -If the optional arg INDEX-ONLY is non-nil, return positions of capture patterns -within the constructed query, instead of their names. +If the optional arg TAG-ASSIGNER is non-nil, it is passed to `ts-make-query' to +assign custom tags to capture names. This function is primarily useful for debugging purpose. Other packages should build queries and cursors once, then reuse them." - (let* ((query (ts-make-query tree-sitter-language patterns)) + (let* ((query (ts-make-query tree-sitter-language patterns tag-assigner)) (root-node (ts-root-node tree-sitter-tree))) - (if matches - (ts-query-matches query root-node nil index-only) - (ts-query-captures query root-node nil index-only)))) + (ts--without-restriction + (if matches + (ts-query-matches query root-node #'ts--buffer-substring-no-properties) + (ts-query-captures query root-node #'ts--buffer-substring-no-properties))))) ;;; TODO: Kill tree-buffer when `tree-sitter' minor mode is turned off. diff --git a/lisp/tree-sitter-hl.el b/lisp/tree-sitter-hl.el new file mode 100644 index 00000000..a752cc5f --- /dev/null +++ b/lisp/tree-sitter-hl.el @@ -0,0 +1,389 @@ +;;; tree-sitter-hl.el --- Syntax highlighting based on tree-sitter -*- lexical-binding: t; coding: utf-8 -*- + +;; Copyright (C) 2020 Tuấn-Anh Nguyễn +;; +;; Author: Tuấn-Anh Nguyễn +;; Timo von Hartz + +;;; Commentary: + +;; This file implements a new syntax highlighting based on `tree-sitter'. + +;;; Code: + +(require 'tree-sitter) + +(eval-when-compile + (require 'cl-lib)) + +;;; ---------------------------------------------------------------------------- +;;; Faces for commonly used highlight names. + +(defgroup tree-sitter-hl nil + "Syntax highlighting using tree-sitter." + :group 'tree-sitter) + +(defgroup tree-sitter-hl-faces nil + "All the faces of tree-sitter." + :group 'tree-sitter-hl) + +(defface tree-sitter-hl-face:attribute '((default :inherit font-lock-preprocessor-face)) + "Face used for attribute" + :group 'tree-sitter-hl-faces) + +(defface tree-sitter-hl-face:comment '((default :inherit font-lock-comment-face)) + "Face used for comment" + :group 'tree-sitter-hl-faces) + +(defface tree-sitter-hl-face:constant '((default :inherit font-lock-constant-face)) + "Face used for constant" + :group 'tree-sitter-hl-faces) + +(defface tree-sitter-hl-face:constant.builtin '((default :inherit font-lock-builtin-face)) + "Face used for constant.builtin" + :group 'tree-sitter-hl-faces) + +(defface tree-sitter-hl-face:constructor '((default :inherit font-lock-type-face)) + "Face used for constructor" + :group 'tree-sitter-hl-faces) + +(defface tree-sitter-hl-face:escape '(()) + "Face used for escape" + :group 'tree-sitter-hl-faces) + +(defface tree-sitter-hl-face:function '((default :inherit font-lock-function-name-face)) + "Face used for function" + :group 'tree-sitter-hl-faces) + +(defface tree-sitter-hl-face:function.builtin '((default :inherit font-lock-builtin-face)) + "Face used for function.builtin" + :group 'tree-sitter-hl-faces) + +(defface tree-sitter-hl-face:function.macro '((default :inherit font-lock-preprocessor-face)) + "Face used for function.macro" + :group 'tree-sitter-hl-faces) + +(defface tree-sitter-hl-face:function.special '((default :inherit font-lock-preprocessor-face)) + "Face used for function.special" + :group 'tree-sitter-hl-faces) + +(defface tree-sitter-hl-face:function.method '((default :inherit font-lock-function-name-face)) + "Face used for function.method" + :group 'tree-sitter-hl-faces) + +(defface tree-sitter-hl-face:function.call '((default :inherit link :underline nil)) + "Face used for function.call" + :group 'tree-sitter-hl-faces) + +(defface tree-sitter-hl-face:identifier '((default :inherit font-lock-function-name-face)) + "Face used for identifier" + :group 'tree-sitter-hl-faces) + +(defface tree-sitter-hl-face:keyword '((default :inherit font-lock-keyword-face)) + "Face used for keyword" + :group 'tree-sitter-hl-faces) + +(defface tree-sitter-hl-face:label '((default :inherit font-lock-preprocessor-face)) + "Face used for label" + :group 'tree-sitter-hl-faces) + +(defface tree-sitter-hl-face:operator '((default :inherit font-lock-keyword-face)) + "Face used for operator" + :group 'tree-sitter-hl-faces) + +(defface tree-sitter-hl-face:property '((default :inherit font-lock-variable-name-face)) + "Face used for property" + :group 'tree-sitter-hl-faces) + +(defface tree-sitter-hl-face:punctuation '(()) + "Face used for punctuation" + :group 'tree-sitter-hl-faces) + +(defface tree-sitter-hl-face:punctuation.bracket '(()) + "Face used for punctuation.bracket" + :group 'tree-sitter-hl-faces) + +(defface tree-sitter-hl-face:punctuation.delimiter '(()) + "Face used for punctuation.delimiter" + :group 'tree-sitter-hl-faces) + +(defface tree-sitter-hl-face:string '((default :inherit font-lock-string-face)) + "Face used for string" + :group 'tree-sitter-hl-faces) + +(defface tree-sitter-hl-face:type '((default :inherit font-lock-type-face)) + "Faced used for type" + :group 'tree-sitter-hl-faces) + +(defface tree-sitter-hl-face:type.builtin '((default :inherit font-lock-builtin-face)) + "Face used for type.builtin" + :group 'tree-sitter-hl-faces) + +(defface tree-sitter-hl-face:variable '((default :inherit font-lock-variable-name-face)) + "Face used for variable" + :group 'tree-sitter-hl-faces) + +(defface tree-sitter-hl-face:variable.builtin '((default :inherit font-lock-builtin-face)) + "Face used for variable.builtin" + :group 'tree-sitter-hl-faces) + +(defface tree-sitter-hl-face:variable.parameter '((default :inherit font-lock-variable-name-face)) + "Faced used for variable.parameter" + :group 'tree-sitter-hl-faces) + +;;; ---------------------------------------------------------------------------- +;;; Interfaces for modes and end users. + +(defcustom tree-sitter-hl-face-mapping-function + #'tree-sitter-hl-face-from-common-scope + "Function used to map capture names in query patterns to highlighting faces. +This can also be used to selectively disable certain capture names. For example, +the following code disables keyword highlighting: + + (add-function :before-while 'tree-sitter-hl-face-mapping-function + (lambda (capture-name) + (not (string= capture-name \"keyword\"))))" + :group 'tree-sitter-hl + :type 'function) + +(defvar-local tree-sitter-hl-default-patterns nil + "Default syntax highlighting patterns. +This should be set by major modes that want to integrate with `tree-sitter-hl'. +It plays a similar role to `font-lock-defaults'.") + +(defvar tree-sitter-hl--patterns-alist nil + "Additional language-specific syntax highlighting patterns. +It plays a similar role to `font-lock-keywords-alist', except that its keys are +language symbols, not major mode symbols.") + +(defvar-local tree-sitter-hl--extra-patterns-list nil + "Additional buffer-local syntax highlighting patterns.") + +(defvar-local tree-sitter-hl--query nil + "Tree query used for syntax highlighting, compiled from patterns.") + +(defun tree-sitter-hl--ensure-query () + "Return the tree query to be used for syntax highlighting in this buffer." + (unless tree-sitter-hl--query + (setq tree-sitter-hl--query + (ts-make-query + tree-sitter-language + (mapconcat #'ts--stringify-patterns + (append tree-sitter-hl--extra-patterns-list + (list tree-sitter-hl-default-patterns)) + "\n") + tree-sitter-hl-face-mapping-function))) + tree-sitter-hl--query) + +(defun tree-sitter-hl-face-from-common-scope (capture-name) + "Return the default face used to highlight CAPTURE-NAME." + ;; TODO: If a scope does not have a corresponding face, check its ancestors. + (intern (format "tree-sitter-hl-face:%s" capture-name))) + +;;; TODO: Support adding/removing language-specific patterns. +(defun tree-sitter-hl-add-patterns (patterns) + "Add buffer-local syntax highlighting PATTERNS. +These will take precedence over `tree-sitter-hl-default-patterns', as well as +previously added patterns." + ;; Do nothing if the patterns are already on top. + (unless (equal patterns (cl-first tree-sitter-hl--extra-patterns-list)) + (let ((old-list tree-sitter-hl--extra-patterns-list) + (old-query tree-sitter-hl--query)) + ;; Update the patterns list and request the query to be rebuilt... + (setq tree-sitter-hl--extra-patterns-list + (append (list patterns) (remove patterns old-list))) + (setq tree-sitter-hl--query nil) + ;; ... and build it if possible. During a major mode's hook, we may not + ;; even know the language, in which case we let `tree-sitter-hl--setup' + ;; build the query later on. + (when tree-sitter-language + (tree-sitter--error-protect (tree-sitter-hl--ensure-query) + ;; When the newly added patterns are invalid, restore the old state. + (setq tree-sitter-hl--query old-query + tree-sitter-hl--extra-patterns-list old-list)) + ;; Everything is in place. Request a re-render. + (when (bound-and-true-p tree-sitter-hl-mode) + (tree-sitter-hl--invalidate)))))) + +;;; ---------------------------------------------------------------------------- +;;; Internal workings. + +(defvar-local tree-sitter-hl--query-cursor nil) + +(defconst tree-sitter-hl--extend-region-limit 2048 + "The max size of the extended region, in characters.") + +(defconst tree-sitter-hl--extend-region-levels 4 + "The max number of levels to walk up the syntax tree to extend the region.") + +(defun tree-sitter-hl--extend-region (beg end) + "Return a \"safe\" region that encloses (BEG . END), to run the query on. +Because a match is returned only when all nodes in the pattern intersect the +query cursor's range, relying on `ts-changed-ranges' alone is insufficient. + +Another pathological case is `jit-lock--run-funtions' being called on a very +small region. An example is when `evil-adjust-cursor' triggers a +`vertical-motion' (outside of a redisplay). + +See https://github.com/tree-sitter/tree-sitter/issues/598." + (pcase-let* ((region `(,beg . ,end)) + (root-node (ts-root-node tree-sitter-tree)) + (node (ts-get-descendant-for-position-range root-node beg end)) + (`(,beg . ,end) (ts-node-position-range node)) + (level 0)) + ;; Repeatedly extend the region, within the limit. TODO: What if the region + ;; of the minimal enclosing node is already too large? + (while (and node + (< (- end beg) tree-sitter-hl--extend-region-limit)) + (setcar region beg) + (setcdr region end) + ;; Walk up to the parent node. + (when (setq node (when (<= (cl-incf level) + tree-sitter-hl--extend-region-levels) + (ts-get-parent node))) + (let ((range (ts-node-position-range node))) + (setf `(,beg . ,end) range)))) + ;; TODO: Extend to whole lines? + region)) + +(defun tree-sitter-hl--append-text-property (start end prop value &optional object) + "Append VALUE to PROP of the text from START to END. +This is similar to `font-lock-append-text-property', but deduplicates values. It +also expects VALUE to be a single value, not a list." + (let (next prev) + (while (/= start end) + (setq next (next-single-property-change start prop object end) + prev (get-text-property start prop object)) + ;; Canonicalize old forms of face property. + (and (memq prop '(face font-lock-face)) + (listp prev) + (or (keywordp (car prev)) + (memq (car prev) '(foreground-color background-color))) + (setq prev (list prev))) + (unless (listp prev) + (setq prev (list prev))) + (unless (memq value prev) + (put-text-property start next prop + (append prev (list value)) + object)) + (setq start next)))) + +(defun tree-sitter-hl--highlight-capture (capture) + "Highlight the given CAPTURE." + (pcase-let* ((`(,face . (,beg-byte . ,end-byte)) capture) + (beg (byte-to-position beg-byte)) + (end (byte-to-position end-byte))) + ;; TODO: Add an option to disable unknown faces earlier, when compiling the + ;; query from patterns. + (when (facep face) + (tree-sitter-hl--append-text-property beg end 'face face)))) + +;;; TODO: Handle embedded DSLs (injections). +(defun tree-sitter-hl--highlight-region (beg end &optional _loudly) + "Highlight the region (BEG . END). +This is intended to be used as a buffer-local override of +`font-lock-fontify-region-function'." + (ts--save-context + ;; Extend the region to be highlighted, based on some heuristics, so that + ;; querying works in certain pathological cases. This is analogous to the + ;; extension done by `font-lock-default-fontify-region'. TODO: Consider + ;; distinguishing region to query from region to fontify. + (let ((region (tree-sitter-hl--extend-region beg end))) + (setf `(,beg . ,end) region)) + (ts--query-cursor-set-byte-range tree-sitter-hl--query-cursor + (position-bytes beg) + (position-bytes end)) + (let* ((root-node (ts-root-node tree-sitter-tree)) + (captures (ts--query-cursor-captures-1 + tree-sitter-hl--query-cursor + tree-sitter-hl--query + root-node + #'ts--buffer-substring-no-properties))) + ;; TODO: Handle quitting. + (let ((inhibit-point-motion-hooks t)) + (with-silent-modifications + (font-lock-unfontify-region beg end) + ;; TODO: Consider giving certain combinations of highlight names their + ;; own faces. For example, it might be desirable for fontification of + ;; a node that matches both "constructor" and "variable" to be + ;; different from the union of "constructor fontification" and + ;; "variable fontification". + (mapc #'tree-sitter-hl--highlight-capture captures))) + ;; TODO: Return the actual region being fontified. + `(jit-lock-bounds ,beg . ,end)))) + +(defun tree-sitter-hl--invalidate (&optional old-tree) + "Mark regions of text to be rehighlighted after a text change. +Installed on `tree-sitter-after-change-functions'. + +OLD-TREE is the tree before the edit." + (if old-tree + ;; Incremental parse. + (seq-doseq (range (ts-changed-ranges old-tree tree-sitter-tree)) + ;; TODO: How about invalidating a single large range? + (pcase-let* ((`[,beg-byte ,end-byte] range) + (beg (byte-to-position beg-byte)) + (end (byte-to-position end-byte))) + ;; TODO: How about calling `jit-lock-refontify' directly? + (font-lock-flush beg end))) + ;; First parse. + (font-lock-flush))) + +;;; ---------------------------------------------------------------------------- +;;; Setup and teardown. + +;;; TODO: We want to work even without `font-lock-mode', right? +(defun tree-sitter-hl--setup () + "Set up `tree-sitter-hl' in the current buffer. +This assumes both `tree-sitter-mode' and `font-lock-mode' were already enabled." + ;; TODO: If there's an error, disable `tree-sitter-hl--extra-patterns-list' + ;; and retry. + (tree-sitter-hl--ensure-query) + (unless tree-sitter-hl--query-cursor + (setq tree-sitter-hl--query-cursor (ts-make-query-cursor)) + ;; Invalidate the buffer, only if we were actually disabled previously. + ;; TODO: Find a way to disable `font-lock-defaults', while keeping + ;; modifications added locally through `font-lock-add-keywords'. The problem + ;; is, `font-lock-mode' itself doesn't seem to be able to do that. (See + ;; `font-lock-refresh-defaults'.) + (tree-sitter-hl--invalidate)) + ;; TODO: Override `font-lock-extend-after-change-region-function', or hook + ;; into `jit-lock-after-change-extend-region-functions' directly. For that to + ;; work, we need to make sure `tree-sitter--after-change' runs before + ;; `jit-lock-after-change'. + (add-hook 'tree-sitter-after-change-functions + #'tree-sitter-hl--invalidate + nil :local) + ;; XXX + (add-function :override (local 'font-lock-fontify-region-function) + #'tree-sitter-hl--highlight-region)) + +(defun tree-sitter-hl--teardown () + "Tear down `tree-sitter-hl' in the current buffer." + (remove-function (local 'font-lock-fontify-region-function) + #'tree-sitter-hl--highlight-region) + (remove-hook 'tree-sitter-after-change-functions + #'tree-sitter-hl--invalidate + :local) + (setq tree-sitter-hl--query nil) + (when tree-sitter-hl--query-cursor + (setq tree-sitter-hl--query-cursor nil) + ;; Invalidate the buffer only if we were actually enabled previously. + (font-lock-flush))) + +;;;###autoload +(define-minor-mode tree-sitter-hl-mode + "Toggle syntax highlighting based on Tree-sitter's syntax tree. +Enabling this automatically enables `tree-sitter-mode' in the buffer. + +To enable this automatically whenever `tree-sitter-mode' is enabled: + + (add-hook 'tree-sitter-after-on-hook #'tree-sitter-hl-mode)" + :init-value nil + :group 'tree-sitter + (tree-sitter--handle-dependent tree-sitter-hl-mode + #'tree-sitter-hl--setup + #'tree-sitter-hl--teardown)) + +(provide 'tree-sitter-hl) +;;; tree-sitter-hl.el ends here diff --git a/lisp/tree-sitter-query.el b/lisp/tree-sitter-query.el index d4c7cb7e..bd25bfde 100644 --- a/lisp/tree-sitter-query.el +++ b/lisp/tree-sitter-query.el @@ -15,7 +15,7 @@ (require 'tree-sitter) (defgroup tree-sitter-query nil - "Tree-Sitter playground." + "Tree-sitter playground." :group 'tree-sitter) (define-derived-mode tree-sitter-query-mode prog-mode "ts-query-builder" @@ -51,13 +51,14 @@ (defun tree-sitter-query--eval-query (patterns) "Evaluate query PATTERNS against the target buffer." (with-current-buffer tree-sitter-query--target-buffer - (remove-overlays) - (let* ((query (ts-make-query tree-sitter-language patterns)) - (root-node (ts-root-node tree-sitter-tree)) - (captures (ts-query-captures query root-node))) - (if (= (length captures) 0) - (message "No matches found") - (mapc #'tree-sitter-query--highlight-capture captures))))) + (ts--without-restriction + (remove-overlays) + (let* ((query (ts-make-query tree-sitter-language patterns #'identity)) + (root-node (ts-root-node tree-sitter-tree)) + (captures (ts-query-captures query root-node #'ts--buffer-substring-no-properties))) + (if (= (length captures) 0) + (message "No matches found") + (mapc #'tree-sitter-query--highlight-capture captures)))))) (defun tree-sitter-query--after-change (&rest _args) "Run query patterns against the target buffer and update highlighted texts." diff --git a/lisp/tree-sitter-tests.el b/lisp/tree-sitter-tests.el index d91d4472..a55ea946 100644 --- a/lisp/tree-sitter-tests.el +++ b/lisp/tree-sitter-tests.el @@ -32,13 +32,23 @@ (directory-file-name (file-name-directory (locate-library "tree-sitter")))) relative-path)) -(defun ts-test-tree-sexp (sexp) - "Check that the current syntax tree's sexp representation is SEXP." - (should (equal (read (ts-tree-to-sexp tree-sitter-tree)) sexp))) +(defun ts-test-tree-sexp (sexp &optional reset) + "Check that the current syntax tree's sexp representation is SEXP. +If RESET is non-nil, also do another full parse and check again." + (should (equal (read (ts-tree-to-sexp tree-sitter-tree)) sexp)) + (when reset + (setq tree-sitter-tree nil) + (tree-sitter--do-parse) + (ts-test-tree-sexp sexp))) (defun ts-test-use-lang (lang-symbol) "Turn on `tree-sitter-mode' in the current buffer, using language LANG-SYMBOL." (setq tree-sitter-language (tree-sitter-require lang-symbol)) + (ignore-errors + (setq tree-sitter-hl-default-patterns + (tree-sitter-langs--hl-default-patterns lang-symbol))) + (add-hook 'tree-sitter-after-first-parse-hook + (lambda () (should (not (null tree-sitter-tree))))) (tree-sitter-mode)) (defmacro ts-test-with (lang-symbol var &rest body) @@ -51,7 +61,8 @@ "Eval BODY in a temp buffer filled with content of the file at RELATIVE-PATH." (declare (indent 1)) `(with-temp-buffer - (insert-file-contents (ts-test-full-path ,relative-path)) + (let ((coding-system-for-read 'utf-8)) + (insert-file-contents (ts-test-full-path ,relative-path))) ,@body)) (defmacro ts-test-lang-with-file (lang-symbol relative-path &rest body) @@ -109,19 +120,18 @@ (ert-deftest parsing::rust-buffer () (ts-test-with 'rust parser (ts-test-with-file "src/types.rs" - (let* ((tree) (old-tree) - (initial (benchmark-run - (setq tree (ts-parse-chunks parser #'ts-buffer-input nil)))) - (reparse (benchmark-run - (progn - (setq old-tree tree) - (setq tree (ts-parse-chunks parser #'ts-buffer-input old-tree)))))) - ;; (message "initial %s" initial) - ;; (message "reparse %s" reparse) - (ert-info ("Same code should result in empty change ranges") - (should (equal [] (ts-changed-ranges old-tree tree)))) - (ert-info ("Incremental parsing shoud be faster than initial") - (should (> (car initial) (car reparse)))))))) + (ts--without-restriction + (let* ((tree) (old-tree) + (initial (benchmark-run + (setq tree (ts-parse-chunks parser #'ts--buffer-input nil)))) + (reparse (benchmark-run + (progn + (setq old-tree tree) + (setq tree (ts-parse-chunks parser #'ts--buffer-input old-tree)))))) + (ert-info ("Same code should result in empty change ranges") + (should (equal [] (ts-changed-ranges old-tree tree)))) + (ert-info ("Incremental parsing shoud be faster than initial") + (should (> (car initial) (car reparse))))))))) (ert-deftest minor-mode::basic-editing () (with-temp-buffer @@ -138,6 +148,24 @@ (kill-region (point-min) (point-max)) (ts-test-tree-sexp '(source_file)))) +(ert-deftest minor-mode::incremental:change-case-region () + (ts-test-lang-with-file 'rust "lisp/test-files/change-case-region.rs" + (let* ((orig-sexp (read (ts-tree-to-sexp tree-sitter-tree))) + (end (re-search-forward "this text")) + (beg (match-beginning 0))) + (upcase-initials-region beg end) + (ts-test-tree-sexp orig-sexp) + (downcase-region beg end) + (ts-test-tree-sexp orig-sexp :reset)))) + +(ert-deftest minor-mode::incremental:delete-non-ascii-text () + (ts-test-lang-with-file 'rust "lisp/test-files/delete-non-ascii-text.rs" + (let* ((orig-sexp (read (ts-tree-to-sexp tree-sitter-tree))) + (end (re-search-forward "ấấấấấấấấ")) + (beg (match-beginning 0))) + (delete-region beg end) + (ts-test-tree-sexp orig-sexp :reset)))) + (ert-deftest node::eq () (ts-test-with 'rust parser (let* ((tree (ts-parse-string parser "fn foo() {}")) @@ -224,7 +252,7 @@ tree is held (since nodes internally reference the tree)." (ts-test-lang-with-file 'rust "src/query.rs" ;; This is to make sure it works correctly with narrowing. (narrow-to-region 1 2) - (let* ((captures (tree-sitter-query + (let* ((captures (tree-sitter-debug-query "((function_item (identifier) @function) (match? @function \"make_query\")) (macro_definition (identifier) @macro)")) @@ -232,20 +260,78 @@ tree is held (since nodes internally reference the tree)." (pcase-let ((`(_ . ,node) capture)) (ts-node-text node))) captures)) - (capture-names (mapcar (lambda (capture) - (pcase-let ((`(,name . _) capture)) name)) + (capture-tags (mapcar (lambda (capture) + (pcase-let ((`(,tag . _) capture)) tag)) captures))) (ert-info ("Should match specified functions and not more") (should (member "_make_query" node-texts)) (should (member "make_query_cursor" node-texts)) (should (not (member "capture_names" node-texts)))) (ert-info ("Should capture some macros") - (should (member "macro" capture-names)))))) + (should (member 'macro capture-tags)))))) (ert-deftest load () (should-error (tree-sitter-require 'abc-xyz)) (tree-sitter-require 'rust)) +(ert-deftest hl::extend-region () + (ts-test-lang-with-file 'rust "lisp/test-files/extend-region.rs" + (tree-sitter-hl-mode) + (let* ((beg (save-excursion + (search-forward "abc") + (backward-char) + (point))) + (end (1+ beg))) + (tree-sitter-hl--highlight-region beg end) + (ert-info ("Highlighting a tiny region") + (should (memq 'tree-sitter-hl-face:function.macro + (get-text-property beg 'face))))))) + +(ert-deftest hl::face-mapping () + (ts-test-lang-with-file 'rust "lisp/test-files/types.rs" + (ert-info ("Keywords should be highlighted by default") + (tree-sitter-hl-mode) + (font-lock-ensure) + (should (memq 'tree-sitter-hl-face:keyword (get-text-property 1 'face)))) + (tree-sitter-hl-mode -1) + (ert-info ("Keywords should not be highlighted if their capture name is disabled") + ;; Disable keyword highlighting. + (add-function :before-while (local 'tree-sitter-hl-face-mapping-function) + (lambda (capture-name) + (not (string= capture-name "keyword")))) + (tree-sitter-hl-mode) + (font-lock-ensure) + (should (null (get-text-property 1 'face))) + (ert-info ("Other elements should still be highlighted") + (should-not (null (next-single-property-change 1 'face))))) + (tree-sitter-hl-mode -1) + (ert-info ("Nothing should be highlighted if all capture names are disabled") + (add-function :override (local 'tree-sitter-hl-face-mapping-function) + (lambda (capture-name) nil)) + (tree-sitter-hl-mode) + (font-lock-ensure) + (ert-info ("`face' should be nil for the whole buffer") + (should (null (get-text-property 1 'face))) + (should (null (next-single-property-change 1 'face))))))) + +(ert-deftest hl::bench () + (ts-test-lang-with-file 'rust "lisp/test-files/types.rs" + (setq tree-sitter-hl-default-patterns (tree-sitter-langs--hl-default-patterns 'rust)) + (require 'rust-mode) + (rust-mode) + (font-lock-mode) + (tree-sitter-hl-mode) + (garbage-collect) + (message "tree-sitter-hl 1 %s" (benchmark-run (font-lock-ensure))) + (garbage-collect) + (message "tree-sitter-hl 10 %s" (benchmark-run 10 (font-lock-ensure))) + (tree-sitter-hl-mode -1) + (font-lock-ensure) + (garbage-collect) + (message " font-lock 1 %s" (benchmark-run (font-lock-ensure))) + (garbage-collect) + (message " font-lock 10 %s" (benchmark-run 10 (font-lock-ensure))))) + ;; Local Variables: ;; no-byte-compile: t ;; End: diff --git a/lisp/tree-sitter.el b/lisp/tree-sitter.el index 4f031eea..3be78c55 100644 --- a/lisp/tree-sitter.el +++ b/lisp/tree-sitter.el @@ -26,7 +26,24 @@ (defcustom tree-sitter-after-change-functions nil "Functions to call each time `tree-sitter-tree' is updated. -Each function will be called with a single argument: the old tree." +Each function will be called with a single argument: the OLD-TREE. This argument +will be nil when the buffer is parsed for the first time. + +For initialization logic that should be run only once, use +`tree-sitter-after-first-parse-hook' instead." + :type 'hook + :group 'tree-sitter) + +(defcustom tree-sitter-after-first-parse-hook nil + "Functions to call after the buffer is parsed for the first time. +This hook should be used for initialization logic that requires inspecting the +syntax tree. It is run after `tree-sitter-mode-hook'." + :type 'hook + :group 'tree-sitter) + +(defcustom tree-sitter-after-on-hook nil + "Functions to call after enabling `tree-sitter-mode'. +Use this to enable other minor modes that depends on the syntax tree." :type 'hook :group 'tree-sitter) @@ -45,55 +62,73 @@ Each function will be called with a single argument: the old tree." (defvar-local tree-sitter-language nil "Tree-sitter language.") -(defvar-local tree-sitter--start-byte nil) -(defvar-local tree-sitter--old-end-byte nil) -(defvar-local tree-sitter--new-end-byte nil) +(defvar-local tree-sitter--text-before-change nil) -(defvar-local tree-sitter--start-point nil) -(defvar-local tree-sitter--old-end-point nil) -(defvar-local tree-sitter--new-end-point nil) +(defvar-local tree-sitter--beg-before-change nil) -(defun tree-sitter--before-change (beg end) +(defun tree-sitter--before-change (beg old-end) "Update relevant editing states. Installed on `before-change-functions'. -BEG and END are the begin and end of the text to be changed." - (setq tree-sitter--start-byte (position-bytes beg) - tree-sitter--old-end-byte (position-bytes end)) - (ts--save-context - ;; TODO: Keep mutating the same objects instead of creating a new one each time. - (setq tree-sitter--start-point (ts--point-from-position beg) - tree-sitter--old-end-point (ts--point-from-position end)))) - -;;; TODO XXX: The doc says that `after-change-functions' can be called multiple times, with -;;; different regions enclosed in the region passed to `before-change-functions'. Therefore what we -;;; are doing may be a bit too naive. Several questions to investigate: -;;; -;;; 1. Are the *after* regions recorded all at once, or one after another? Are they disjointed -;;; (would imply the former)?. -;;; -;;; 2. Are the *after* regions recorded at the same time as the *before* region? If not, how can the -;;; latter possibly enclose the former, e.g. in case of inserting a bunch of text? -;;; -;;; 3. How do we batch *after* hooks to re-parse only once? Maybe using `run-with-idle-timer' with -;;; 0-second timeout? +BEG and OLD-END are the begin and end positions of the text to be changed." + (setq tree-sitter--beg-before-change beg) + (ts--without-restriction + ;; TODO: Fallback to a full parse if this region is too big. + (setq tree-sitter--text-before-change + (when (> old-end beg) + (buffer-substring-no-properties beg old-end))))) + +;;; TODO: How do we batch *after* hooks to re-parse only once? Maybe using +;;; `run-with-idle-timer' with 0-second timeout? ;;; -;;; 4. What's the deal with several primitives calling `after-change-functions' *zero* or more -;;; times? Does that mean we can't rely on this hook at all? -(defun tree-sitter--after-change (_beg end _length) +;;; XXX: Figure out how to detect whether it was a text-property-only change. +;;; There's no point in reparsing in these situations. +(defun tree-sitter--after-change (beg new-end old-len) "Update relevant editing states and reparse the buffer (incrementally). Installed on `after-change-functions'. -END is the end of the changed text." - (setq tree-sitter--new-end-byte (position-bytes end) - tree-sitter--new-end-point (ts-point-from-position end)) +BEG is the begin position of the change. +NEW-END is the end position of the changed text. +OLD-LEN is the char length of the old text." (when tree-sitter-tree - (ts-edit-tree tree-sitter-tree - tree-sitter--start-byte - tree-sitter--old-end-byte - tree-sitter--new-end-byte - tree-sitter--start-point - tree-sitter--old-end-point - tree-sitter--new-end-point) - (tree-sitter--do-parse))) + (let ((beg-byte (position-bytes beg)) + (new-end-byte (position-bytes new-end)) + old-end-byte + beg-point old-end-point new-end-point) + (ts--save-context + (setq beg-point (ts--point-from-position beg) + new-end-point (ts--point-from-position new-end))) + ;; Compute the old text's end byte position, line number, byte column. + ;; + ;; Tree-sitter works with byte positions, line numbers, byte columns. + ;; Emacs primarily works with character positions. Converting the latter + ;; to the former, for the end of the old text, requires looking at the + ;; actual old text's content. Tree-sitter itself cannot do that, because + ;; it is designed to keep track of only the numbers, not a mirror of the + ;; buffer's text. Without re-designing Emacs's change tracking mechanism, + ;; we store the old text through `tree-sitter--before-change', and inspect + ;; it here. TODO XXX FIX: Improve change tracking in Emacs. + (if (= old-len 0) + (setq old-end-byte beg-byte + old-end-point beg-point) + (let ((old-text tree-sitter--text-before-change) + (rel-beg (- beg tree-sitter--beg-before-change))) + (with-temp-buffer + (insert old-text) + (pcase-let* + ((rel-pos (+ 1 rel-beg old-len)) + (rel-byte (position-bytes rel-pos)) + (`(,beg-line-number . ,beg-byte-column) beg-point) + (`(,rel-line-number . ,rel-byte-column) (ts--point-from-position rel-pos)) + (old-end-line-number (+ beg-line-number + rel-line-number -1)) + (old-end-byte-column (if (> rel-line-number 1) + rel-byte-column + (+ beg-byte-column rel-byte-column)))) + (setq old-end-byte (+ beg-byte rel-byte -1) + old-end-point `(,old-end-line-number . ,old-end-byte-column)))))) + (ts-edit-tree tree-sitter-tree + beg-byte old-end-byte new-end-byte + beg-point old-end-point new-end-point) + (tree-sitter--do-parse)))) (defun tree-sitter--do-parse () "Parse the current buffer and update the syntax tree." @@ -101,28 +136,24 @@ END is the end of the changed text." (setq tree-sitter-tree ;; https://github.com/ubolonton/emacs-tree-sitter/issues/3 (ts--without-restriction - (ts-parse-chunks tree-sitter-parser #'ts-buffer-input tree-sitter-tree))) + (ts-parse-chunks tree-sitter-parser #'ts--buffer-input old-tree))) (run-hook-with-args 'tree-sitter-after-change-functions old-tree))) -(defun tree-sitter--enable () +(defun tree-sitter--setup () "Enable `tree-sitter' in the current buffer." (unless tree-sitter-language ;; Determine the language symbol based on `major-mode' . (let ((lang-symbol (alist-get major-mode tree-sitter-major-mode-language-alist))) (unless lang-symbol - ;; TODO: Consider doing nothing if the language is not supported, so - ;; that we can make this a global mode. (error "No language registered for major mode `%s'" major-mode)) (setq tree-sitter-language (tree-sitter-require lang-symbol)))) (unless tree-sitter-parser (setq tree-sitter-parser (ts-make-parser)) (ts-set-language tree-sitter-parser tree-sitter-language)) - (unless tree-sitter-tree - (tree-sitter--do-parse)) (add-hook 'before-change-functions #'tree-sitter--before-change :append :local) (add-hook 'after-change-functions #'tree-sitter--after-change :append :local)) -(defun tree-sitter--disable () +(defun tree-sitter--teardown () "Disable `tree-sitter' in the current buffer." (remove-hook 'after-change-functions #'tree-sitter--after-change :local) (remove-hook 'before-change-functions #'tree-sitter--before-change :local) @@ -130,20 +161,103 @@ END is the end of the changed text." tree-sitter-parser nil tree-sitter-language nil)) +(defmacro tree-sitter--error-protect (body-form &rest error-forms) + "Execute BODY-FORM with ERROR-FORMS as cleanup code that is executed on error. +Unlike `unwind-protect', ERROR-FORMS is not executed if BODY-FORM does not +signal an error." + (declare (indent 1)) + `(let ((err t)) + (unwind-protect + (prog1 ,body-form + (setq err nil)) + (when err + ,@error-forms)))) + ;;;###autoload (define-minor-mode tree-sitter-mode "Minor mode that keeps an up-to-date syntax tree using incremental parsing." :init-value nil :lighter "tree-sitter" + :after-hook (when tree-sitter-mode + (unless tree-sitter-tree + (tree-sitter--do-parse) + (run-hooks 'tree-sitter-after-first-parse-hook))) (if tree-sitter-mode - (let ((err t)) - (unwind-protect - (prog1 (tree-sitter--enable) - (setq err nil)) - (when err - (setq tree-sitter-mode nil)))) - (tree-sitter--disable))) + (tree-sitter--error-protect + (progn (tree-sitter--setup) + (run-hooks 'tree-sitter-after-on-hook)) + (setq tree-sitter-mode nil) + (tree-sitter--teardown)) + (run-hooks 'tree-sitter--before-off-hook) + (tree-sitter--teardown))) + +;;;###autoload +(defun turn-on-tree-sitter-mode () + "Turn on `tree-sitter-mode' in a buffer, if possible." + ;; FIX: Ignore only known errors. Log the rest, at least. + (ignore-errors + (tree-sitter-mode 1))) + +;;;###autoload +(define-globalized-minor-mode global-tree-sitter-mode + tree-sitter-mode turn-on-tree-sitter-mode + :init-value nil + :group 'tree-sitter) + +(defun tree-sitter--funcall-form (func) + "Return an equivalent to (funcall FUNC) that can be used in a macro. +If FUNC is a quoted symbol, skip the `funcall' indirection." + (if (and (consp func) + (memq (car func) '(quote function)) + (symbolp (cadr func))) + `(,(cadr func)) + `(funcall ,func))) + +(defmacro tree-sitter--handle-dependent (mode setup-function teardown-function) + "Build the block of code that handles the enabling/disabling of a dependent mode. +Use this as the body of the `define-minor-mode' block that defines MODE. +When MODE is enabled, it automatically enables `tree-sitter-mode'. When MODE is +disabled, it does not disable `tree-sitter-mode', since the latter may have been +requested by end user, or other dependent modes. + +When `tree-sitter-mode' is disabled, it automatically disables MODE, which will +not function correctly otherwise. This happens before `tree-sitter-mode' cleans +up its own state. + +SETUP-FUNCTION is called when MODE is enabled, after MODE variable has been set +to t, and after `tree-sitter-mode' has already been enabled. However, it must +not assume that `tree-sitter-tree' is non-nil, since the first parse may not +happen yet. It should instead set up hooks to handle parse events. + +TEARDOWN-FUNCTION is called when MODE is disabled, after MODE variable has been +set to nil. It should clean up any state set up by MODE, and should not signal +any error. It is also called when SETUP-FUNCTION signals an error, to undo any +partial setup. + +Both SETUP-FUNCTION and TEARDOWN-FUNCTION should be idempotent." + (declare (indent 1)) + (let ((setup (tree-sitter--funcall-form setup-function)) + (teardown (tree-sitter--funcall-form teardown-function))) + `(if ,mode + (progn + (tree-sitter--error-protect + ;; Make sure `tree-sitter-mode' is enabled before MODE. + (progn + (unless tree-sitter-mode + (tree-sitter-mode)) + ,setup) + ;; Setup failed. Clean things up, leave no trace. + (setq ,mode nil) + ,teardown) + ;; Disable MODE when `tree-sitter-mode' is disabled. Quoting is + ;; important, because we don't want a variable-capturing closure. + (add-hook 'tree-sitter--before-off-hook + '(lambda () (,mode -1)) + nil :local)) + ,teardown))) + +;;;###autoload (defun tree-sitter-node-at-point () "Return the syntax node at point." (let ((root (ts-root-node tree-sitter-tree)) diff --git a/src/node.rs b/src/node.rs index c56f4575..a91dea20 100644 --- a/src/node.rs +++ b/src/node.rs @@ -1,4 +1,4 @@ -use emacs::{defun, Value, Result}; +use emacs::{defun, Value, Result, Env}; use tree_sitter::InputEdit; @@ -93,6 +93,15 @@ defun_node_props! { "count-named-children" fn named_child_count -> usize } +/// Return NODE's (START-BYTEPOS . END-BYTEPOS). +#[defun] +fn node_byte_range<'e>(env: &'e Env, node: &RNode) -> Result> { + let node = node.borrow(); + let beg: BytePos = node.start_byte().into(); + let end: BytePos = node.end_byte().into(); + env.cons(beg, end) +} + /// Return t if two nodes are identical. #[defun] fn node_eq(node1: &RNode, node2: &RNode) -> Result { diff --git a/src/parser.rs b/src/parser.rs index 65b0cfbc..70a97cf3 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -36,7 +36,8 @@ fn language(parser: &Parser) -> Result> { /// /// INPUT-FUNCTION should take 3 parameters: (BYTEPOS LINE-NUMBER BYTE-COLUMN), and /// return a fragment of the source code, starting from the position identified by -/// either BYTEPOS or (LINE-NUMBER . BYTE-COLUMN). +/// either BYTEPOS or (LINE-NUMBER . BYTE-COLUMN). It should return an empty string +/// to signal the end of the source code. /// /// BYTEPOS is Emacs's 1-based byte position. /// @@ -125,9 +126,8 @@ fn _set_timeout_micros(parser: &mut Parser, max_duration: u64) -> Result<()> { /// /// This is useful for parsing multi-language documents. #[defun] -fn set_included_ranges(parser: &mut Parser, ranges: Value) -> Result<()> { - let ranges = Vector(ranges); - let len = ranges.size()?; +fn set_included_ranges(parser: &mut Parser, ranges: Vector) -> Result<()> { + let len = ranges.len(); let included = &mut Vec::with_capacity(len); for i in 0..len { let range: Range = ranges.get(i)?; diff --git a/src/query.rs b/src/query.rs index 1cceee74..f22ca23a 100644 --- a/src/query.rs +++ b/src/query.rs @@ -2,7 +2,7 @@ use std::cell::RefCell; use emacs::{defun, Result, Value, Vector, Error, Env, IntoLisp}; -use tree_sitter::{Query, QueryCursor, Node}; +use tree_sitter::{QueryCursor, Node}; use crate::types::*; @@ -21,10 +21,25 @@ fn vec_to_vector<'e, T: IntoLisp<'e>>(env: &'e Env, vec: Vec) -> Result Result { +fn _make_query(language: Language, source: String, tag_assigner: Value) -> Result { // TODO: Better error message - Ok(Query::new(language.into(), &source).unwrap()) + let mut raw = tree_sitter::Query::new(language.into(), &source).unwrap(); + let capture_names = raw.capture_names().to_vec(); + let mut capture_tags = vec![]; + for name in &capture_names { + let value = tag_assigner.call((name, ))?; + if !value.is_not_nil() { + raw.disable_capture(name); + } + capture_tags.push(value.make_global_ref()) + } + Ok(Query { raw, capture_tags }) } macro_rules! defun_query_methods { @@ -33,7 +48,7 @@ macro_rules! defun_query_methods { #[defun$((name = $lisp_name))?] $(#[$meta])* fn $name(query: &Query, $( $( $param : $type ),* )? ) -> Result<$rtype> { - Ok(query.$name( $( $( $param ),* )? )$(.$into())?) + Ok(query.raw.$name( $( $( $param ),* )? )$(.$into())?) } )* }; @@ -41,18 +56,18 @@ macro_rules! defun_query_methods { defun_query_methods! { /// Return the byte position where the NTH pattern starts in QUERY's source. - "query-start-byte-for-pattern" fn start_byte_for_pattern(nth: usize) -> BytePos; into + "-query-start-byte-for-pattern" fn start_byte_for_pattern(nth: usize) -> BytePos; into /// Return the number of patterns in QUERY. "query-count-patterns" fn pattern_count -> usize } /// Return the names of the captures used in QUERY. -#[defun(mod_in_name = true)] -fn capture_names(query: Value) -> Result { +#[defun] +fn _query_capture_names(query: Value) -> Result { let env = query.env; let query = query.into_ref::()?; - let names = query.capture_names(); + let names = query.raw.capture_names(); let vec = env.make_vector(names.len(), ())?; for (i, name) in names.iter().enumerate() { vec.set(i, name)?; @@ -60,13 +75,24 @@ fn capture_names(query: Value) -> Result { Ok(vec) } +/// Return all of QUERY's available capture tags. +/// See `ts-make-query' for an explanation of capture tagging. +#[defun(mod_in_name = true)] +fn capture_tags<'e>(env: &'e Env, query: &Query) -> Result> { + let symbols = env.make_vector(query.capture_tags.len(), ())?; + for (i, symbol) in query.capture_tags.iter().enumerate() { + symbols.set(i, symbol)?; + } + Ok(symbols) +} + /// Disable a certain capture within QUERY, by specifying its NAME. /// /// This prevents the capture from being returned in matches, and also avoids any /// resource usage associated with recording the capture. #[defun] fn _disable_capture(query: &mut Query, name: String) -> Result<()> { - query.disable_capture(&name); + query.raw.disable_capture(&name); Ok(()) } @@ -82,13 +108,13 @@ fn make_query_cursor() -> Result { } fn text_callback<'e>( - node: &'e RNode, - text_callback: Value<'e>, + text_function: Value<'e>, error: &'e RefCell>, ) -> impl FnMut(Node<'e>) -> String + 'e { move |child| { - let child = node.map(|_| child); - text_callback.call((child,)).and_then(|v| v.into_rust()).unwrap_or_else(|e| { + let beg: BytePos = child.start_byte().into(); + let end: BytePos = child.end_byte().into(); + text_function.call((beg, end)).and_then(|v| v.into_rust()).unwrap_or_else(|e| { error.borrow_mut().replace(e); "".to_owned() }) @@ -100,18 +126,17 @@ fn _query_cursor_matches<'e>( cursor: &mut QueryCursor, query: &Query, node: &RNode, - index_only: Option>, text_function: Value<'e>, ) -> Result> { + let raw = &query.raw; let error = RefCell::new(None); let matches = cursor.matches( - query, + raw, node.borrow().clone(), - text_callback(node, text_function, &error), + text_callback(text_function, &error), ); let mut vec = vec![]; let env = text_function.env; - let capture_names = query.capture_names(); for m in matches { if let Some(error) = error.borrow_mut().take() { return Err(error); @@ -119,11 +144,10 @@ fn _query_cursor_matches<'e>( let captures = env.make_vector(m.captures.len(), ())?; for (ci, c) in m.captures.iter().enumerate() { let captured_node = node.map(|_| c.node); - let capture = if index_only.is_some() { - env.cons(c.index, captured_node)? - } else { - env.cons(&capture_names[c.index as usize], captured_node)? - }; + let capture = env.cons( + &query.capture_tags[c.index as usize], + captured_node + )?; captures.set(ci, capture)?; } let _match = env.cons(m.pattern_index, captures)?; @@ -132,42 +156,87 @@ fn _query_cursor_matches<'e>( vec_to_vector(env, vec) } +// TODO: Make _query_cursor_captures accept a `capture_type` instead, e.g. node type, byte range. +#[defun] +fn _query_cursor_captures_1<'e>( + cursor: &mut QueryCursor, + query: Value<'e>, + node: &RNode, + text_function: Value<'e>, +) -> Result> { + let query = query.into_rust::<&RefCell>()?.borrow(); + let raw = &query.raw; + let error = RefCell::new(None); + let captures = cursor.captures( + raw, + node.borrow().clone(), + text_callback(text_function, &error), + ); + let mut vec = vec![]; + let env = text_function.env; + for (m, capture_index) in captures { + if let Some(error) = error.borrow_mut().take() { + return Err(error); + } + let c = m.captures[capture_index]; + let beg: BytePos = c.node.start_byte().into(); + let end: BytePos = c.node.end_byte().into(); + let capture = env.cons( + &query.capture_tags[c.index as usize], + env.cons(beg, end)?, + )?; + vec.push((m.pattern_index, capture)); + } + // Prioritize captures from earlier patterns. + vec.sort_unstable_by_key(|(i, _)| *i); + let vector = env.make_vector(vec.len(), ())?; + for (i, (_, v)) in vec.into_iter().enumerate() { + vector.set(i, v)?; + } + Ok(vector) +} + #[defun] fn _query_cursor_captures<'e>( cursor: &mut QueryCursor, - query: &Query, + query: Value<'e>, node: &RNode, - index_only: Option>, text_function: Value<'e>, ) -> Result> { + let query = query.into_rust::<&RefCell>()?.borrow(); + let raw = &query.raw; let error = RefCell::new(None); let captures = cursor.captures( - query, + raw, node.borrow().clone(), - text_callback(node, text_function, &error), + text_callback(text_function, &error), ); let mut vec = vec![]; let env = text_function.env; - let capture_names = query.capture_names(); for (m, capture_index) in captures { if let Some(error) = error.borrow_mut().take() { return Err(error); } let c = m.captures[capture_index]; let captured_node = node.map(|_| c.node); - let capture = if index_only.is_some() { - env.cons(c.index, captured_node)? - } else { - env.cons(&capture_names[c.index as usize], captured_node)? - }; + let capture = env.cons( + &query.capture_tags[c.index as usize], + captured_node + )?; vec.push(capture); } - vec_to_vector(env, vec) + + // XXX + let vector = env.make_vector(vec.len(), ())?; + for (i, v) in vec.into_iter().enumerate() { + vector.set(i, v)?; + } + Ok(vector) } /// Limit CURSOR's query executions to the range of byte positions, from BEG to END. #[defun] -fn set_byte_range(cursor: &mut QueryCursor, beg: BytePos, end: BytePos) -> Result<()> { +fn _query_cursor_set_byte_range(cursor: &mut QueryCursor, beg: BytePos, end: BytePos) -> Result<()> { cursor.set_byte_range(beg.into(), end.into()); Ok(()) } @@ -177,7 +246,7 @@ fn set_byte_range(cursor: &mut QueryCursor, beg: BytePos, end: BytePos) -> Resul /// A "point" in this context is a (LINE-NUMBER . BYTE-COLUMN) pair. See /// `ts-parse-chunks' for a more detailed explanation. #[defun] -fn set_point_range(cursor: &mut QueryCursor, beg: Point, end: Point) -> Result<()> { +fn _query_cursor_set_point_range(cursor: &mut QueryCursor, beg: Point, end: Point) -> Result<()> { cursor.set_point_range(beg.into(), end.into()); Ok(()) } diff --git a/src/types.rs b/src/types.rs index c600e45f..4a786297 100644 --- a/src/types.rs +++ b/src/types.rs @@ -7,9 +7,9 @@ use std::{ marker::PhantomData, }; -use emacs::{defun, Env, Value, Result, IntoLisp, FromLisp, Vector, ErrorKind}; +use emacs::{defun, Env, Value, Result, IntoLisp, FromLisp, Vector, ErrorKind, GlobalRef}; -use tree_sitter::{Tree, Node, TreeCursor, Parser, Query, QueryCursor}; +use tree_sitter::{Tree, Node, TreeCursor, Parser, QueryCursor}; pub fn shared(t: T) -> Shared { Rc::new(RefCell::new(t)) @@ -121,7 +121,7 @@ impl IntoLisp<'_> for Range { let inner = self.0; let start_byte_pos: BytePos = inner.start_byte.into(); let end_byte_pos: BytePos = inner.end_byte.into(); - env.call("vector", ( + env.vector(( start_byte_pos, end_byte_pos, Point(inner.start_point), @@ -132,7 +132,7 @@ impl IntoLisp<'_> for Range { impl FromLisp<'_> for Range { fn from_lisp(value: Value) -> Result { - let vector = Vector(value); + let vector: Vector = value.into_rust()?; let start_byte = vector.get::(0)?.into(); let end_byte = vector.get::(1)?.into(); let start_point = vector.get::(2)?.into(); @@ -347,6 +347,12 @@ impl RCursor { } } +// ------------------------------------------------------------------------------------------------- +pub struct Query { + pub(crate) raw: tree_sitter::Query, + pub(crate) capture_tags: Vec, +} + // ------------------------------------------------------------------------------------------------- pub enum Either<'e, L, R> where L: FromLisp<'e>, R: FromLisp<'e> {