Add snippet crate

Max Brunsfeld , Nathan Sobo , and Antonio Scandurra created

Co-Authored-By: Nathan Sobo <nathan@zed.dev>
Co-Authored-By: Antonio Scandurra <me@as-cii.com>

Change summary

Cargo.lock                                      |  18 
crates/snippet/Cargo.toml                       |  13 
crates/snippet/grammar/Cargo.toml               |  26 
crates/snippet/grammar/binding.gyp              |  19 
crates/snippet/grammar/bindings/node/binding.cc |  28 
crates/snippet/grammar/bindings/node/index.js   |  19 
crates/snippet/grammar/bindings/rust/build.rs   |  40 +
crates/snippet/grammar/bindings/rust/lib.rs     |  52 +
crates/snippet/grammar/grammar.js               |  26 
crates/snippet/grammar/package.json             |  19 
crates/snippet/grammar/src/grammar.json         | 133 ++++
crates/snippet/grammar/src/node-types.json      |  84 ++
crates/snippet/grammar/src/parser.c             | 545 +++++++++++++++++++
crates/snippet/grammar/src/tree_sitter/parser.h | 224 +++++++
crates/snippet/src/snippet.rs                   | 139 ++++
15 files changed, 1,385 insertions(+)

Detailed changes

Cargo.lock 🔗

@@ -4418,6 +4418,16 @@ dependencies = [
  "pin-project-lite 0.1.12",
 ]
 
+[[package]]
+name = "snippet"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "smallvec",
+ "tree-sitter",
+ "tree-sitter-snippet",
+]
+
 [[package]]
 name = "socket2"
 version = "0.3.19"
@@ -5203,6 +5213,14 @@ dependencies = [
  "tree-sitter",
 ]
 
+[[package]]
+name = "tree-sitter-snippet"
+version = "0.0.1"
+dependencies = [
+ "cc",
+ "tree-sitter",
+]
+
 [[package]]
 name = "ttf-parser"
 version = "0.9.0"

crates/snippet/Cargo.toml 🔗

@@ -0,0 +1,13 @@
+[package]
+name = "snippet"
+version = "0.1.0"
+edition = "2021"
+
+[lib]
+path = "src/snippet.rs"
+
+[dependencies]
+anyhow = "1.0"
+smallvec = { version = "1.6", features = ["union"] }
+tree-sitter = "0.20"
+tree-sitter-snippet = { path = "./grammar" }

crates/snippet/grammar/Cargo.toml 🔗

@@ -0,0 +1,26 @@
+[package]
+name = "tree-sitter-snippet"
+description = "snippet grammar for the tree-sitter parsing library"
+version = "0.0.1"
+keywords = ["incremental", "parsing", "snippet"]
+categories = ["parsing", "text-editors"]
+repository = "https://github.com/tree-sitter/tree-sitter-snippet"
+edition = "2018"
+license = "MIT"
+
+build = "bindings/rust/build.rs"
+include = [
+  "bindings/rust/*",
+  "grammar.js",
+  "queries/*",
+  "src/*",
+]
+
+[lib]
+path = "bindings/rust/lib.rs"
+
+[dependencies]
+tree-sitter = "~0.20"
+
+[build-dependencies]
+cc = "1.0"

crates/snippet/grammar/binding.gyp 🔗

@@ -0,0 +1,19 @@
+{
+  "targets": [
+    {
+      "target_name": "tree_sitter_snippet_binding",
+      "include_dirs": [
+        "<!(node -e \"require('nan')\")",
+        "src"
+      ],
+      "sources": [
+        "bindings/node/binding.cc",
+        "src/parser.c",
+        # If your language uses an external scanner, add it here.
+      ],
+      "cflags_c": [
+        "-std=c99",
+      ]
+    }
+  ]
+}

crates/snippet/grammar/bindings/node/binding.cc 🔗

@@ -0,0 +1,28 @@
+#include "tree_sitter/parser.h"
+#include <node.h>
+#include "nan.h"
+
+using namespace v8;
+
+extern "C" TSLanguage * tree_sitter_snippet();
+
+namespace {
+
+NAN_METHOD(New) {}
+
+void Init(Local<Object> exports, Local<Object> module) {
+  Local<FunctionTemplate> tpl = Nan::New<FunctionTemplate>(New);
+  tpl->SetClassName(Nan::New("Language").ToLocalChecked());
+  tpl->InstanceTemplate()->SetInternalFieldCount(1);
+
+  Local<Function> constructor = Nan::GetFunction(tpl).ToLocalChecked();
+  Local<Object> instance = constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked();
+  Nan::SetInternalFieldPointer(instance, 0, tree_sitter_snippet());
+
+  Nan::Set(instance, Nan::New("name").ToLocalChecked(), Nan::New("snippet").ToLocalChecked());
+  Nan::Set(module, Nan::New("exports").ToLocalChecked(), instance);
+}
+
+NODE_MODULE(tree_sitter_snippet_binding, Init)
+
+}  // namespace

crates/snippet/grammar/bindings/node/index.js 🔗

@@ -0,0 +1,19 @@
+try {
+  module.exports = require("../../build/Release/tree_sitter_snippet_binding");
+} catch (error1) {
+  if (error1.code !== 'MODULE_NOT_FOUND') {
+    throw error1;
+  }
+  try {
+    module.exports = require("../../build/Debug/tree_sitter_snippet_binding");
+  } catch (error2) {
+    if (error2.code !== 'MODULE_NOT_FOUND') {
+      throw error2;
+    }
+    throw error1
+  }
+}
+
+try {
+  module.exports.nodeTypeInfo = require("../../src/node-types.json");
+} catch (_) {}

crates/snippet/grammar/bindings/rust/build.rs 🔗

@@ -0,0 +1,40 @@
+fn main() {
+    let src_dir = std::path::Path::new("src");
+
+    let mut c_config = cc::Build::new();
+    c_config.include(&src_dir);
+    c_config
+        .flag_if_supported("-Wno-unused-parameter")
+        .flag_if_supported("-Wno-unused-but-set-variable")
+        .flag_if_supported("-Wno-trigraphs");
+    let parser_path = src_dir.join("parser.c");
+    c_config.file(&parser_path);
+
+    // If your language uses an external scanner written in C,
+    // then include this block of code:
+
+    /*
+    let scanner_path = src_dir.join("scanner.c");
+    c_config.file(&scanner_path);
+    println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
+    */
+
+    c_config.compile("parser");
+    println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());
+
+    // If your language uses an external scanner written in C++,
+    // then include this block of code:
+
+    /*
+    let mut cpp_config = cc::Build::new();
+    cpp_config.cpp(true);
+    cpp_config.include(&src_dir);
+    cpp_config
+        .flag_if_supported("-Wno-unused-parameter")
+        .flag_if_supported("-Wno-unused-but-set-variable");
+    let scanner_path = src_dir.join("scanner.cc");
+    cpp_config.file(&scanner_path);
+    cpp_config.compile("scanner");
+    println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
+    */
+}

crates/snippet/grammar/bindings/rust/lib.rs 🔗

@@ -0,0 +1,52 @@
+//! This crate provides snippet language support for the [tree-sitter][] parsing library.
+//!
+//! Typically, you will use the [language][language func] function to add this language to a
+//! tree-sitter [Parser][], and then use the parser to parse some code:
+//!
+//! ```
+//! let code = "";
+//! let mut parser = tree_sitter::Parser::new();
+//! parser.set_language(tree_sitter_snippet::language()).expect("Error loading snippet grammar");
+//! let tree = parser.parse(code, None).unwrap();
+//! ```
+//!
+//! [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
+//! [language func]: fn.language.html
+//! [Parser]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Parser.html
+//! [tree-sitter]: https://tree-sitter.github.io/
+
+use tree_sitter::Language;
+
+extern "C" {
+    fn tree_sitter_snippet() -> Language;
+}
+
+/// Get the tree-sitter [Language][] for this grammar.
+///
+/// [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html
+pub fn language() -> Language {
+    unsafe { tree_sitter_snippet() }
+}
+
+/// The content of the [`node-types.json`][] file for this grammar.
+///
+/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
+pub const NODE_TYPES: &'static str = include_str!("../../src/node-types.json");
+
+// Uncomment these to include any queries that this grammar contains
+
+// pub const HIGHLIGHTS_QUERY: &'static str = include_str!("../../queries/highlights.scm");
+// pub const INJECTIONS_QUERY: &'static str = include_str!("../../queries/injections.scm");
+// pub const LOCALS_QUERY: &'static str = include_str!("../../queries/locals.scm");
+// pub const TAGS_QUERY: &'static str = include_str!("../../queries/tags.scm");
+
+#[cfg(test)]
+mod tests {
+    #[test]
+    fn test_can_load_grammar() {
+        let mut parser = tree_sitter::Parser::new();
+        parser
+            .set_language(super::language())
+            .expect("Error loading snippet language");
+    }
+}

crates/snippet/grammar/grammar.js 🔗

@@ -0,0 +1,26 @@
+module.exports = grammar({
+    name: 'snippet',
+
+    rules: {
+        snippet: $ => repeat1($._any),
+
+        _any: $ => choice(
+            $.tabstop,
+            $.placeholder,
+            $.text
+        ),
+
+        tabstop: $ => choice(
+            seq('$', $.int),
+            seq('${', $.int, '}'),
+        ),
+
+        placeholder: $ => seq('${', $.int, ':', $.snippet, '}'),
+
+        int: $ => /[0-9]+/,
+
+        text: $ => choice($._raw_curly, $._plain_text),
+        _raw_curly: $ => token(prec(-1, /}+/)),
+        _plain_text: $ => /([^$}]|\\[$\\}])+/,
+    }
+})

crates/snippet/grammar/package.json 🔗

@@ -0,0 +1,19 @@
+{
+  "name": "tree-sitter-snippet",
+  "version": "0.0.1",
+  "description": "snippet grammar for tree-sitter",
+  "main": "bindings/node",
+  "keywords": [
+    "parsing",
+    "incremental"
+  ],
+  "dependencies": {
+    "nan": "^2.12.1"
+  },
+  "devDependencies": {
+    "tree-sitter-cli": "^0.20.4"
+  },
+  "scripts": {
+    "test": "tree-sitter test"
+  }
+}

crates/snippet/grammar/src/grammar.json 🔗

@@ -0,0 +1,133 @@
+{
+  "name": "snippet",
+  "rules": {
+    "snippet": {
+      "type": "REPEAT1",
+      "content": {
+        "type": "SYMBOL",
+        "name": "_any"
+      }
+    },
+    "_any": {
+      "type": "CHOICE",
+      "members": [
+        {
+          "type": "SYMBOL",
+          "name": "tabstop"
+        },
+        {
+          "type": "SYMBOL",
+          "name": "placeholder"
+        },
+        {
+          "type": "SYMBOL",
+          "name": "text"
+        }
+      ]
+    },
+    "tabstop": {
+      "type": "CHOICE",
+      "members": [
+        {
+          "type": "SEQ",
+          "members": [
+            {
+              "type": "STRING",
+              "value": "$"
+            },
+            {
+              "type": "SYMBOL",
+              "name": "int"
+            }
+          ]
+        },
+        {
+          "type": "SEQ",
+          "members": [
+            {
+              "type": "STRING",
+              "value": "${"
+            },
+            {
+              "type": "SYMBOL",
+              "name": "int"
+            },
+            {
+              "type": "STRING",
+              "value": "}"
+            }
+          ]
+        }
+      ]
+    },
+    "placeholder": {
+      "type": "SEQ",
+      "members": [
+        {
+          "type": "STRING",
+          "value": "${"
+        },
+        {
+          "type": "SYMBOL",
+          "name": "int"
+        },
+        {
+          "type": "STRING",
+          "value": ":"
+        },
+        {
+          "type": "SYMBOL",
+          "name": "snippet"
+        },
+        {
+          "type": "STRING",
+          "value": "}"
+        }
+      ]
+    },
+    "int": {
+      "type": "PATTERN",
+      "value": "[0-9]+"
+    },
+    "text": {
+      "type": "CHOICE",
+      "members": [
+        {
+          "type": "SYMBOL",
+          "name": "_raw_curly"
+        },
+        {
+          "type": "SYMBOL",
+          "name": "_plain_text"
+        }
+      ]
+    },
+    "_raw_curly": {
+      "type": "TOKEN",
+      "content": {
+        "type": "PREC",
+        "value": -1,
+        "content": {
+          "type": "PATTERN",
+          "value": "}+"
+        }
+      }
+    },
+    "_plain_text": {
+      "type": "PATTERN",
+      "value": "([^$}]|\\\\[$\\\\}])+"
+    }
+  },
+  "extras": [
+    {
+      "type": "PATTERN",
+      "value": "\\s"
+    }
+  ],
+  "conflicts": [],
+  "precedences": [],
+  "externals": [],
+  "inline": [],
+  "supertypes": []
+}
+

crates/snippet/grammar/src/node-types.json 🔗

@@ -0,0 +1,84 @@
+[
+  {
+    "type": "placeholder",
+    "named": true,
+    "fields": {},
+    "children": {
+      "multiple": true,
+      "required": true,
+      "types": [
+        {
+          "type": "int",
+          "named": true
+        },
+        {
+          "type": "snippet",
+          "named": true
+        }
+      ]
+    }
+  },
+  {
+    "type": "snippet",
+    "named": true,
+    "fields": {},
+    "children": {
+      "multiple": true,
+      "required": true,
+      "types": [
+        {
+          "type": "placeholder",
+          "named": true
+        },
+        {
+          "type": "tabstop",
+          "named": true
+        },
+        {
+          "type": "text",
+          "named": true
+        }
+      ]
+    }
+  },
+  {
+    "type": "tabstop",
+    "named": true,
+    "fields": {},
+    "children": {
+      "multiple": false,
+      "required": true,
+      "types": [
+        {
+          "type": "int",
+          "named": true
+        }
+      ]
+    }
+  },
+  {
+    "type": "text",
+    "named": true,
+    "fields": {}
+  },
+  {
+    "type": "$",
+    "named": false
+  },
+  {
+    "type": "${",
+    "named": false
+  },
+  {
+    "type": ":",
+    "named": false
+  },
+  {
+    "type": "int",
+    "named": true
+  },
+  {
+    "type": "}",
+    "named": false
+  }
+]

crates/snippet/grammar/src/parser.c 🔗

@@ -0,0 +1,545 @@
+#include <tree_sitter/parser.h>
+
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
+#endif
+
+#define LANGUAGE_VERSION 13
+#define STATE_COUNT 25
+#define LARGE_STATE_COUNT 8
+#define SYMBOL_COUNT 14
+#define ALIAS_COUNT 0
+#define TOKEN_COUNT 8
+#define EXTERNAL_TOKEN_COUNT 0
+#define FIELD_COUNT 0
+#define MAX_ALIAS_SEQUENCE_LENGTH 5
+#define PRODUCTION_ID_COUNT 1
+
+enum {
+  anon_sym_DOLLAR = 1,
+  anon_sym_DOLLAR_LBRACE = 2,
+  anon_sym_RBRACE = 3,
+  anon_sym_COLON = 4,
+  sym_int = 5,
+  sym__raw_curly = 6,
+  sym__plain_text = 7,
+  sym_snippet = 8,
+  sym__any = 9,
+  sym_tabstop = 10,
+  sym_placeholder = 11,
+  sym_text = 12,
+  aux_sym_snippet_repeat1 = 13,
+};
+
+static const char * const ts_symbol_names[] = {
+  [ts_builtin_sym_end] = "end",
+  [anon_sym_DOLLAR] = "$",
+  [anon_sym_DOLLAR_LBRACE] = "${",
+  [anon_sym_RBRACE] = "}",
+  [anon_sym_COLON] = ":",
+  [sym_int] = "int",
+  [sym__raw_curly] = "_raw_curly",
+  [sym__plain_text] = "_plain_text",
+  [sym_snippet] = "snippet",
+  [sym__any] = "_any",
+  [sym_tabstop] = "tabstop",
+  [sym_placeholder] = "placeholder",
+  [sym_text] = "text",
+  [aux_sym_snippet_repeat1] = "snippet_repeat1",
+};
+
+static const TSSymbol ts_symbol_map[] = {
+  [ts_builtin_sym_end] = ts_builtin_sym_end,
+  [anon_sym_DOLLAR] = anon_sym_DOLLAR,
+  [anon_sym_DOLLAR_LBRACE] = anon_sym_DOLLAR_LBRACE,
+  [anon_sym_RBRACE] = anon_sym_RBRACE,
+  [anon_sym_COLON] = anon_sym_COLON,
+  [sym_int] = sym_int,
+  [sym__raw_curly] = sym__raw_curly,
+  [sym__plain_text] = sym__plain_text,
+  [sym_snippet] = sym_snippet,
+  [sym__any] = sym__any,
+  [sym_tabstop] = sym_tabstop,
+  [sym_placeholder] = sym_placeholder,
+  [sym_text] = sym_text,
+  [aux_sym_snippet_repeat1] = aux_sym_snippet_repeat1,
+};
+
+static const TSSymbolMetadata ts_symbol_metadata[] = {
+  [ts_builtin_sym_end] = {
+    .visible = false,
+    .named = true,
+  },
+  [anon_sym_DOLLAR] = {
+    .visible = true,
+    .named = false,
+  },
+  [anon_sym_DOLLAR_LBRACE] = {
+    .visible = true,
+    .named = false,
+  },
+  [anon_sym_RBRACE] = {
+    .visible = true,
+    .named = false,
+  },
+  [anon_sym_COLON] = {
+    .visible = true,
+    .named = false,
+  },
+  [sym_int] = {
+    .visible = true,
+    .named = true,
+  },
+  [sym__raw_curly] = {
+    .visible = false,
+    .named = true,
+  },
+  [sym__plain_text] = {
+    .visible = false,
+    .named = true,
+  },
+  [sym_snippet] = {
+    .visible = true,
+    .named = true,
+  },
+  [sym__any] = {
+    .visible = false,
+    .named = true,
+  },
+  [sym_tabstop] = {
+    .visible = true,
+    .named = true,
+  },
+  [sym_placeholder] = {
+    .visible = true,
+    .named = true,
+  },
+  [sym_text] = {
+    .visible = true,
+    .named = true,
+  },
+  [aux_sym_snippet_repeat1] = {
+    .visible = false,
+    .named = false,
+  },
+};
+
+static const TSSymbol ts_alias_sequences[PRODUCTION_ID_COUNT][MAX_ALIAS_SEQUENCE_LENGTH] = {
+  [0] = {0},
+};
+
+static const uint16_t ts_non_terminal_alias_map[] = {
+  0,
+};
+
+static bool ts_lex(TSLexer *lexer, TSStateId state) {
+  START_LEXER();
+  eof = lexer->eof(lexer);
+  switch (state) {
+    case 0:
+      if (eof) ADVANCE(3);
+      if (lookahead == '$') ADVANCE(4);
+      if (lookahead == ':') ADVANCE(7);
+      if (lookahead == '}') ADVANCE(6);
+      if (lookahead == '\t' ||
+          lookahead == '\n' ||
+          lookahead == '\r' ||
+          lookahead == ' ') SKIP(0)
+      if (('0' <= lookahead && lookahead <= '9')) ADVANCE(8);
+      END_STATE();
+    case 1:
+      if (lookahead == '$') ADVANCE(4);
+      if (lookahead == '\\') ADVANCE(12);
+      if (lookahead == '}') ADVANCE(6);
+      if (lookahead == '\t' ||
+          lookahead == '\n' ||
+          lookahead == '\r' ||
+          lookahead == ' ') ADVANCE(10);
+      if (lookahead != 0) ADVANCE(11);
+      END_STATE();
+    case 2:
+      if (eof) ADVANCE(3);
+      if (lookahead == '$') ADVANCE(4);
+      if (lookahead == '\\') ADVANCE(12);
+      if (lookahead == '}') ADVANCE(9);
+      if (lookahead == '\t' ||
+          lookahead == '\n' ||
+          lookahead == '\r' ||
+          lookahead == ' ') ADVANCE(10);
+      if (lookahead != 0) ADVANCE(11);
+      END_STATE();
+    case 3:
+      ACCEPT_TOKEN(ts_builtin_sym_end);
+      END_STATE();
+    case 4:
+      ACCEPT_TOKEN(anon_sym_DOLLAR);
+      if (lookahead == '{') ADVANCE(5);
+      END_STATE();
+    case 5:
+      ACCEPT_TOKEN(anon_sym_DOLLAR_LBRACE);
+      END_STATE();
+    case 6:
+      ACCEPT_TOKEN(anon_sym_RBRACE);
+      END_STATE();
+    case 7:
+      ACCEPT_TOKEN(anon_sym_COLON);
+      END_STATE();
+    case 8:
+      ACCEPT_TOKEN(sym_int);
+      if (('0' <= lookahead && lookahead <= '9')) ADVANCE(8);
+      END_STATE();
+    case 9:
+      ACCEPT_TOKEN(sym__raw_curly);
+      if (lookahead == '}') ADVANCE(9);
+      END_STATE();
+    case 10:
+      ACCEPT_TOKEN(sym__plain_text);
+      if (lookahead == '\\') ADVANCE(12);
+      if (lookahead == '\t' ||
+          lookahead == '\n' ||
+          lookahead == '\r' ||
+          lookahead == ' ') ADVANCE(10);
+      if (lookahead != 0 &&
+          lookahead != '$' &&
+          lookahead != '}') ADVANCE(11);
+      END_STATE();
+    case 11:
+      ACCEPT_TOKEN(sym__plain_text);
+      if (lookahead == '\\') ADVANCE(12);
+      if (lookahead != 0 &&
+          lookahead != '$' &&
+          lookahead != '}') ADVANCE(11);
+      END_STATE();
+    case 12:
+      ACCEPT_TOKEN(sym__plain_text);
+      if (lookahead == '\\') ADVANCE(12);
+      if (lookahead != 0) ADVANCE(11);
+      END_STATE();
+    default:
+      return false;
+  }
+}
+
+static const TSLexMode ts_lex_modes[STATE_COUNT] = {
+  [0] = {.lex_state = 0},
+  [1] = {.lex_state = 2},
+  [2] = {.lex_state = 2},
+  [3] = {.lex_state = 2},
+  [4] = {.lex_state = 1},
+  [5] = {.lex_state = 1},
+  [6] = {.lex_state = 2},
+  [7] = {.lex_state = 2},
+  [8] = {.lex_state = 1},
+  [9] = {.lex_state = 2},
+  [10] = {.lex_state = 2},
+  [11] = {.lex_state = 2},
+  [12] = {.lex_state = 1},
+  [13] = {.lex_state = 1},
+  [14] = {.lex_state = 2},
+  [15] = {.lex_state = 1},
+  [16] = {.lex_state = 0},
+  [17] = {.lex_state = 0},
+  [18] = {.lex_state = 0},
+  [19] = {.lex_state = 0},
+  [20] = {.lex_state = 0},
+  [21] = {.lex_state = 0},
+  [22] = {.lex_state = 0},
+  [23] = {.lex_state = 0},
+  [24] = {.lex_state = 0},
+};
+
+static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = {
+  [0] = {
+    [ts_builtin_sym_end] = ACTIONS(1),
+    [anon_sym_DOLLAR] = ACTIONS(1),
+    [anon_sym_DOLLAR_LBRACE] = ACTIONS(1),
+    [anon_sym_RBRACE] = ACTIONS(1),
+    [anon_sym_COLON] = ACTIONS(1),
+    [sym_int] = ACTIONS(1),
+    [sym__raw_curly] = ACTIONS(1),
+  },
+  [1] = {
+    [sym_snippet] = STATE(20),
+    [sym__any] = STATE(6),
+    [sym_tabstop] = STATE(6),
+    [sym_placeholder] = STATE(6),
+    [sym_text] = STATE(6),
+    [aux_sym_snippet_repeat1] = STATE(6),
+    [anon_sym_DOLLAR] = ACTIONS(3),
+    [anon_sym_DOLLAR_LBRACE] = ACTIONS(5),
+    [sym__raw_curly] = ACTIONS(7),
+    [sym__plain_text] = ACTIONS(9),
+  },
+  [2] = {
+    [sym_snippet] = STATE(18),
+    [sym__any] = STATE(5),
+    [sym_tabstop] = STATE(5),
+    [sym_placeholder] = STATE(5),
+    [sym_text] = STATE(5),
+    [aux_sym_snippet_repeat1] = STATE(5),
+    [anon_sym_DOLLAR] = ACTIONS(11),
+    [anon_sym_DOLLAR_LBRACE] = ACTIONS(13),
+    [sym__raw_curly] = ACTIONS(15),
+    [sym__plain_text] = ACTIONS(17),
+  },
+  [3] = {
+    [sym_snippet] = STATE(22),
+    [sym__any] = STATE(5),
+    [sym_tabstop] = STATE(5),
+    [sym_placeholder] = STATE(5),
+    [sym_text] = STATE(5),
+    [aux_sym_snippet_repeat1] = STATE(5),
+    [anon_sym_DOLLAR] = ACTIONS(11),
+    [anon_sym_DOLLAR_LBRACE] = ACTIONS(13),
+    [sym__raw_curly] = ACTIONS(15),
+    [sym__plain_text] = ACTIONS(17),
+  },
+  [4] = {
+    [sym__any] = STATE(4),
+    [sym_tabstop] = STATE(4),
+    [sym_placeholder] = STATE(4),
+    [sym_text] = STATE(4),
+    [aux_sym_snippet_repeat1] = STATE(4),
+    [anon_sym_DOLLAR] = ACTIONS(19),
+    [anon_sym_DOLLAR_LBRACE] = ACTIONS(22),
+    [anon_sym_RBRACE] = ACTIONS(25),
+    [sym__raw_curly] = ACTIONS(27),
+    [sym__plain_text] = ACTIONS(30),
+  },
+  [5] = {
+    [sym__any] = STATE(4),
+    [sym_tabstop] = STATE(4),
+    [sym_placeholder] = STATE(4),
+    [sym_text] = STATE(4),
+    [aux_sym_snippet_repeat1] = STATE(4),
+    [anon_sym_DOLLAR] = ACTIONS(11),
+    [anon_sym_DOLLAR_LBRACE] = ACTIONS(13),
+    [anon_sym_RBRACE] = ACTIONS(33),
+    [sym__raw_curly] = ACTIONS(15),
+    [sym__plain_text] = ACTIONS(17),
+  },
+  [6] = {
+    [sym__any] = STATE(7),
+    [sym_tabstop] = STATE(7),
+    [sym_placeholder] = STATE(7),
+    [sym_text] = STATE(7),
+    [aux_sym_snippet_repeat1] = STATE(7),
+    [ts_builtin_sym_end] = ACTIONS(35),
+    [anon_sym_DOLLAR] = ACTIONS(3),
+    [anon_sym_DOLLAR_LBRACE] = ACTIONS(5),
+    [sym__raw_curly] = ACTIONS(7),
+    [sym__plain_text] = ACTIONS(9),
+  },
+  [7] = {
+    [sym__any] = STATE(7),
+    [sym_tabstop] = STATE(7),
+    [sym_placeholder] = STATE(7),
+    [sym_text] = STATE(7),
+    [aux_sym_snippet_repeat1] = STATE(7),
+    [ts_builtin_sym_end] = ACTIONS(37),
+    [anon_sym_DOLLAR] = ACTIONS(39),
+    [anon_sym_DOLLAR_LBRACE] = ACTIONS(42),
+    [sym__raw_curly] = ACTIONS(45),
+    [sym__plain_text] = ACTIONS(48),
+  },
+};
+
+static const uint16_t ts_small_parse_table[] = {
+  [0] = 2,
+    ACTIONS(53), 1,
+      sym__plain_text,
+    ACTIONS(51), 4,
+      anon_sym_DOLLAR,
+      anon_sym_DOLLAR_LBRACE,
+      anon_sym_RBRACE,
+      sym__raw_curly,
+  [10] = 2,
+    ACTIONS(55), 2,
+      ts_builtin_sym_end,
+      sym__plain_text,
+    ACTIONS(57), 3,
+      anon_sym_DOLLAR,
+      anon_sym_DOLLAR_LBRACE,
+      sym__raw_curly,
+  [20] = 2,
+    ACTIONS(53), 2,
+      ts_builtin_sym_end,
+      sym__plain_text,
+    ACTIONS(51), 3,
+      anon_sym_DOLLAR,
+      anon_sym_DOLLAR_LBRACE,
+      sym__raw_curly,
+  [30] = 2,
+    ACTIONS(59), 2,
+      ts_builtin_sym_end,
+      sym__plain_text,
+    ACTIONS(61), 3,
+      anon_sym_DOLLAR,
+      anon_sym_DOLLAR_LBRACE,
+      sym__raw_curly,
+  [40] = 2,
+    ACTIONS(65), 1,
+      sym__plain_text,
+    ACTIONS(63), 4,
+      anon_sym_DOLLAR,
+      anon_sym_DOLLAR_LBRACE,
+      anon_sym_RBRACE,
+      sym__raw_curly,
+  [50] = 2,
+    ACTIONS(55), 1,
+      sym__plain_text,
+    ACTIONS(57), 4,
+      anon_sym_DOLLAR,
+      anon_sym_DOLLAR_LBRACE,
+      anon_sym_RBRACE,
+      sym__raw_curly,
+  [60] = 2,
+    ACTIONS(65), 2,
+      ts_builtin_sym_end,
+      sym__plain_text,
+    ACTIONS(63), 3,
+      anon_sym_DOLLAR,
+      anon_sym_DOLLAR_LBRACE,
+      sym__raw_curly,
+  [70] = 2,
+    ACTIONS(59), 1,
+      sym__plain_text,
+    ACTIONS(61), 4,
+      anon_sym_DOLLAR,
+      anon_sym_DOLLAR_LBRACE,
+      anon_sym_RBRACE,
+      sym__raw_curly,
+  [80] = 2,
+    ACTIONS(67), 1,
+      anon_sym_RBRACE,
+    ACTIONS(69), 1,
+      anon_sym_COLON,
+  [87] = 2,
+    ACTIONS(71), 1,
+      anon_sym_RBRACE,
+    ACTIONS(73), 1,
+      anon_sym_COLON,
+  [94] = 1,
+    ACTIONS(75), 1,
+      anon_sym_RBRACE,
+  [98] = 1,
+    ACTIONS(77), 1,
+      sym_int,
+  [102] = 1,
+    ACTIONS(79), 1,
+      ts_builtin_sym_end,
+  [106] = 1,
+    ACTIONS(81), 1,
+      sym_int,
+  [110] = 1,
+    ACTIONS(83), 1,
+      anon_sym_RBRACE,
+  [114] = 1,
+    ACTIONS(85), 1,
+      sym_int,
+  [118] = 1,
+    ACTIONS(87), 1,
+      sym_int,
+};
+
+static const uint32_t ts_small_parse_table_map[] = {
+  [SMALL_STATE(8)] = 0,
+  [SMALL_STATE(9)] = 10,
+  [SMALL_STATE(10)] = 20,
+  [SMALL_STATE(11)] = 30,
+  [SMALL_STATE(12)] = 40,
+  [SMALL_STATE(13)] = 50,
+  [SMALL_STATE(14)] = 60,
+  [SMALL_STATE(15)] = 70,
+  [SMALL_STATE(16)] = 80,
+  [SMALL_STATE(17)] = 87,
+  [SMALL_STATE(18)] = 94,
+  [SMALL_STATE(19)] = 98,
+  [SMALL_STATE(20)] = 102,
+  [SMALL_STATE(21)] = 106,
+  [SMALL_STATE(22)] = 110,
+  [SMALL_STATE(23)] = 114,
+  [SMALL_STATE(24)] = 118,
+};
+
+static const TSParseActionEntry ts_parse_actions[] = {
+  [0] = {.entry = {.count = 0, .reusable = false}},
+  [1] = {.entry = {.count = 1, .reusable = false}}, RECOVER(),
+  [3] = {.entry = {.count = 1, .reusable = false}}, SHIFT(19),
+  [5] = {.entry = {.count = 1, .reusable = false}}, SHIFT(24),
+  [7] = {.entry = {.count = 1, .reusable = false}}, SHIFT(14),
+  [9] = {.entry = {.count = 1, .reusable = true}}, SHIFT(14),
+  [11] = {.entry = {.count = 1, .reusable = false}}, SHIFT(21),
+  [13] = {.entry = {.count = 1, .reusable = false}}, SHIFT(23),
+  [15] = {.entry = {.count = 1, .reusable = false}}, SHIFT(12),
+  [17] = {.entry = {.count = 1, .reusable = true}}, SHIFT(12),
+  [19] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_snippet_repeat1, 2), SHIFT_REPEAT(21),
+  [22] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_snippet_repeat1, 2), SHIFT_REPEAT(23),
+  [25] = {.entry = {.count = 1, .reusable = false}}, REDUCE(aux_sym_snippet_repeat1, 2),
+  [27] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_snippet_repeat1, 2), SHIFT_REPEAT(12),
+  [30] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_snippet_repeat1, 2), SHIFT_REPEAT(12),
+  [33] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_snippet, 1),
+  [35] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_snippet, 1),
+  [37] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_snippet_repeat1, 2),
+  [39] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_snippet_repeat1, 2), SHIFT_REPEAT(19),
+  [42] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_snippet_repeat1, 2), SHIFT_REPEAT(24),
+  [45] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_snippet_repeat1, 2), SHIFT_REPEAT(14),
+  [48] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_snippet_repeat1, 2), SHIFT_REPEAT(14),
+  [51] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_tabstop, 3),
+  [53] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_tabstop, 3),
+  [55] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_tabstop, 2),
+  [57] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_tabstop, 2),
+  [59] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_placeholder, 5),
+  [61] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_placeholder, 5),
+  [63] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_text, 1),
+  [65] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_text, 1),
+  [67] = {.entry = {.count = 1, .reusable = true}}, SHIFT(10),
+  [69] = {.entry = {.count = 1, .reusable = true}}, SHIFT(2),
+  [71] = {.entry = {.count = 1, .reusable = true}}, SHIFT(8),
+  [73] = {.entry = {.count = 1, .reusable = true}}, SHIFT(3),
+  [75] = {.entry = {.count = 1, .reusable = true}}, SHIFT(11),
+  [77] = {.entry = {.count = 1, .reusable = true}}, SHIFT(9),
+  [79] = {.entry = {.count = 1, .reusable = true}},  ACCEPT_INPUT(),
+  [81] = {.entry = {.count = 1, .reusable = true}}, SHIFT(13),
+  [83] = {.entry = {.count = 1, .reusable = true}}, SHIFT(15),
+  [85] = {.entry = {.count = 1, .reusable = true}}, SHIFT(17),
+  [87] = {.entry = {.count = 1, .reusable = true}}, SHIFT(16),
+};
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+#ifdef _WIN32
+#define extern __declspec(dllexport)
+#endif
+
+extern const TSLanguage *tree_sitter_snippet(void) {
+  static const TSLanguage language = {
+    .version = LANGUAGE_VERSION,
+    .symbol_count = SYMBOL_COUNT,
+    .alias_count = ALIAS_COUNT,
+    .token_count = TOKEN_COUNT,
+    .external_token_count = EXTERNAL_TOKEN_COUNT,
+    .state_count = STATE_COUNT,
+    .large_state_count = LARGE_STATE_COUNT,
+    .production_id_count = PRODUCTION_ID_COUNT,
+    .field_count = FIELD_COUNT,
+    .max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH,
+    .parse_table = &ts_parse_table[0][0],
+    .small_parse_table = ts_small_parse_table,
+    .small_parse_table_map = ts_small_parse_table_map,
+    .parse_actions = ts_parse_actions,
+    .symbol_names = ts_symbol_names,
+    .symbol_metadata = ts_symbol_metadata,
+    .public_symbol_map = ts_symbol_map,
+    .alias_map = ts_non_terminal_alias_map,
+    .alias_sequences = &ts_alias_sequences[0][0],
+    .lex_modes = ts_lex_modes,
+    .lex_fn = ts_lex,
+  };
+  return &language;
+}
+#ifdef __cplusplus
+}
+#endif

crates/snippet/grammar/src/tree_sitter/parser.h 🔗

@@ -0,0 +1,224 @@
+#ifndef TREE_SITTER_PARSER_H_
+#define TREE_SITTER_PARSER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#define ts_builtin_sym_error ((TSSymbol)-1)
+#define ts_builtin_sym_end 0
+#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
+
+typedef uint16_t TSStateId;
+
+#ifndef TREE_SITTER_API_H_
+typedef uint16_t TSSymbol;
+typedef uint16_t TSFieldId;
+typedef struct TSLanguage TSLanguage;
+#endif
+
+typedef struct {
+  TSFieldId field_id;
+  uint8_t child_index;
+  bool inherited;
+} TSFieldMapEntry;
+
+typedef struct {
+  uint16_t index;
+  uint16_t length;
+} TSFieldMapSlice;
+
+typedef struct {
+  bool visible;
+  bool named;
+  bool supertype;
+} TSSymbolMetadata;
+
+typedef struct TSLexer TSLexer;
+
+struct TSLexer {
+  int32_t lookahead;
+  TSSymbol result_symbol;
+  void (*advance)(TSLexer *, bool);
+  void (*mark_end)(TSLexer *);
+  uint32_t (*get_column)(TSLexer *);
+  bool (*is_at_included_range_start)(const TSLexer *);
+  bool (*eof)(const TSLexer *);
+};
+
+typedef enum {
+  TSParseActionTypeShift,
+  TSParseActionTypeReduce,
+  TSParseActionTypeAccept,
+  TSParseActionTypeRecover,
+} TSParseActionType;
+
+typedef union {
+  struct {
+    uint8_t type;
+    TSStateId state;
+    bool extra;
+    bool repetition;
+  } shift;
+  struct {
+    uint8_t type;
+    uint8_t child_count;
+    TSSymbol symbol;
+    int16_t dynamic_precedence;
+    uint16_t production_id;
+  } reduce;
+  uint8_t type;
+} TSParseAction;
+
+typedef struct {
+  uint16_t lex_state;
+  uint16_t external_lex_state;
+} TSLexMode;
+
+typedef union {
+  TSParseAction action;
+  struct {
+    uint8_t count;
+    bool reusable;
+  } entry;
+} TSParseActionEntry;
+
+struct TSLanguage {
+  uint32_t version;
+  uint32_t symbol_count;
+  uint32_t alias_count;
+  uint32_t token_count;
+  uint32_t external_token_count;
+  uint32_t state_count;
+  uint32_t large_state_count;
+  uint32_t production_id_count;
+  uint32_t field_count;
+  uint16_t max_alias_sequence_length;
+  const uint16_t *parse_table;
+  const uint16_t *small_parse_table;
+  const uint32_t *small_parse_table_map;
+  const TSParseActionEntry *parse_actions;
+  const char * const *symbol_names;
+  const char * const *field_names;
+  const TSFieldMapSlice *field_map_slices;
+  const TSFieldMapEntry *field_map_entries;
+  const TSSymbolMetadata *symbol_metadata;
+  const TSSymbol *public_symbol_map;
+  const uint16_t *alias_map;
+  const TSSymbol *alias_sequences;
+  const TSLexMode *lex_modes;
+  bool (*lex_fn)(TSLexer *, TSStateId);
+  bool (*keyword_lex_fn)(TSLexer *, TSStateId);
+  TSSymbol keyword_capture_token;
+  struct {
+    const bool *states;
+    const TSSymbol *symbol_map;
+    void *(*create)(void);
+    void (*destroy)(void *);
+    bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
+    unsigned (*serialize)(void *, char *);
+    void (*deserialize)(void *, const char *, unsigned);
+  } external_scanner;
+  const TSStateId *primary_state_ids;
+};
+
+/*
+ *  Lexer Macros
+ */
+
+#define START_LEXER()           \
+  bool result = false;          \
+  bool skip = false;            \
+  bool eof = false;             \
+  int32_t lookahead;            \
+  goto start;                   \
+  next_state:                   \
+  lexer->advance(lexer, skip);  \
+  start:                        \
+  skip = false;                 \
+  lookahead = lexer->lookahead;
+
+#define ADVANCE(state_value) \
+  {                          \
+    state = state_value;     \
+    goto next_state;         \
+  }
+
+#define SKIP(state_value) \
+  {                       \
+    skip = true;          \
+    state = state_value;  \
+    goto next_state;      \
+  }
+
+#define ACCEPT_TOKEN(symbol_value)     \
+  result = true;                       \
+  lexer->result_symbol = symbol_value; \
+  lexer->mark_end(lexer);
+
+#define END_STATE() return result;
+
+/*
+ *  Parse Table Macros
+ */
+
+#define SMALL_STATE(id) id - LARGE_STATE_COUNT
+
+#define STATE(id) id
+
+#define ACTIONS(id) id
+
+#define SHIFT(state_value)            \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .state = state_value            \
+    }                                 \
+  }}
+
+#define SHIFT_REPEAT(state_value)     \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .state = state_value,           \
+      .repetition = true              \
+    }                                 \
+  }}
+
+#define SHIFT_EXTRA()                 \
+  {{                                  \
+    .shift = {                        \
+      .type = TSParseActionTypeShift, \
+      .extra = true                   \
+    }                                 \
+  }}
+
+#define REDUCE(symbol_val, child_count_val, ...) \
+  {{                                             \
+    .reduce = {                                  \
+      .type = TSParseActionTypeReduce,           \
+      .symbol = symbol_val,                      \
+      .child_count = child_count_val,            \
+      __VA_ARGS__                                \
+    },                                           \
+  }}
+
+#define RECOVER()                    \
+  {{                                 \
+    .type = TSParseActionTypeRecover \
+  }}
+
+#define ACCEPT_INPUT()              \
+  {{                                \
+    .type = TSParseActionTypeAccept \
+  }}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TREE_SITTER_PARSER_H_

crates/snippet/src/snippet.rs 🔗

@@ -0,0 +1,139 @@
+use anyhow::{anyhow, Result};
+use smallvec::SmallVec;
+use std::{collections::BTreeMap, ops::Range};
+use tree_sitter::{Parser, TreeCursor};
+
+#[derive(Default)]
+pub struct Snippet {
+    pub text: String,
+    pub tabstops: Vec<SmallVec<[Range<usize>; 2]>>,
+}
+
+impl Snippet {
+    pub fn parse(source: &str) -> Result<Self> {
+        let mut parser = Parser::new();
+        parser
+            .set_language(tree_sitter_snippet::language())
+            .unwrap();
+
+        let tree = parser.parse(source, None).unwrap();
+        if tree.root_node().has_error() {
+            return Err(anyhow!("invalid snippet"));
+        }
+
+        let mut text = String::new();
+        let mut tabstops = BTreeMap::new();
+        let mut cursor = tree.root_node().walk();
+        parse_snippet_node(&mut cursor, &mut text, &mut tabstops, source)?;
+
+        Ok(Snippet {
+            text,
+            tabstops: tabstops.into_values().collect(),
+        })
+    }
+}
+
+fn parse_snippet_node(
+    cursor: &mut TreeCursor,
+    text: &mut String,
+    tabstops: &mut BTreeMap<usize, SmallVec<[Range<usize>; 2]>>,
+    source: &str,
+) -> Result<()> {
+    cursor.goto_first_child();
+    loop {
+        let node = cursor.node();
+        match node.kind() {
+            "text" => text.push_str(&source[node.byte_range()]),
+            "tabstop" => {
+                if let Some(int_node) = node.named_child(0) {
+                    let index = source[int_node.byte_range()].parse::<usize>()?;
+                    tabstops
+                        .entry(index)
+                        .or_insert(SmallVec::new())
+                        .push(text.len()..text.len());
+                }
+            }
+            "placeholder" => {
+                cursor.goto_first_child();
+                cursor.goto_next_sibling();
+                let int_node = cursor.node();
+                let index = source[int_node.byte_range()].parse::<usize>()?;
+
+                cursor.goto_next_sibling();
+                cursor.goto_next_sibling();
+                let range_start = text.len();
+
+                parse_snippet_node(cursor, text, tabstops, source)?;
+                tabstops
+                    .entry(index)
+                    .or_insert(SmallVec::new())
+                    .push(range_start..text.len());
+
+                cursor.goto_parent();
+            }
+            _ => {}
+        }
+
+        if !cursor.goto_next_sibling() {
+            break;
+        }
+    }
+    cursor.goto_parent();
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_parse_snippet_with_tabstops() {
+        let snippet = Snippet::parse("one$1two").unwrap();
+        assert_eq!(snippet.text, "onetwo");
+        assert_eq!(
+            snippet
+                .tabstops
+                .iter()
+                .map(SmallVec::as_slice)
+                .collect::<Vec<_>>(),
+            &[vec![3..3]]
+        );
+    }
+
+    #[test]
+    fn test_parse_snippet_with_placeholders() {
+        let snippet = Snippet::parse("one${1:two}three").unwrap();
+        assert_eq!(snippet.text, "onetwothree");
+        assert_eq!(
+            snippet
+                .tabstops
+                .iter()
+                .map(SmallVec::as_slice)
+                .collect::<Vec<_>>(),
+            &[vec![3..6]]
+        );
+    }
+
+    #[test]
+    fn test_parse_snippet_with_nested_placeholders() {
+        let snippet = Snippet::parse(
+            "for (${1:var ${2:i} = 0; ${2:i} < ${3:${4:array}.length}; ${2:i}++}) {$5}",
+        )
+        .unwrap();
+        assert_eq!(snippet.text, "for (var i = 0; i < array.length; i++) {}");
+        assert_eq!(
+            snippet
+                .tabstops
+                .iter()
+                .map(SmallVec::as_slice)
+                .collect::<Vec<_>>(),
+            &[
+                vec![5..37],
+                vec![9..10, 16..17, 34..35],
+                vec![20..32],
+                vec![20..25],
+                vec![40..40],
+            ]
+        );
+    }
+}