add php support for semantic search

KCaverly created

Change summary

Cargo.lock                                        |  65 +----
crates/semantic_index/Cargo.toml                  |  17 
crates/semantic_index/src/parsing.rs              |   7 
crates/semantic_index/src/semantic_index_tests.rs | 205 +++++++++++++++++
crates/zed/src/languages/php/config.toml          |   1 
crates/zed/src/languages/php/embedding.scm        |  36 ++
crates/zed/src/languages/php/outline.scm          |   7 
7 files changed, 275 insertions(+), 63 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -2341,7 +2341,7 @@ dependencies = [
  "tree-sitter",
  "tree-sitter-html",
  "tree-sitter-rust",
- "tree-sitter-typescript 0.20.2 (git+https://github.com/tree-sitter/tree-sitter-typescript?rev=5d20856f34315b068c41edaee2ac8a100081d259)",
+ "tree-sitter-typescript",
  "unindent",
  "util",
  "workspace",
@@ -3851,7 +3851,7 @@ dependencies = [
  "text",
  "theme",
  "tree-sitter",
- "tree-sitter-elixir 0.1.0 (git+https://github.com/elixir-lang/tree-sitter-elixir?rev=4ba9dab6e2602960d95b2b625f3386c27e08084e)",
+ "tree-sitter-elixir",
  "tree-sitter-embedded-template",
  "tree-sitter-heex",
  "tree-sitter-html",
@@ -3860,7 +3860,7 @@ dependencies = [
  "tree-sitter-python",
  "tree-sitter-ruby",
  "tree-sitter-rust",
- "tree-sitter-typescript 0.20.2 (git+https://github.com/tree-sitter/tree-sitter-typescript?rev=5d20856f34315b068c41edaee2ac8a100081d259)",
+ "tree-sitter-typescript",
  "unicase",
  "unindent",
  "util",
@@ -6685,14 +6685,15 @@ dependencies = [
  "theme",
  "tiktoken-rs 0.5.0",
  "tree-sitter",
- "tree-sitter-cpp 0.20.2",
- "tree-sitter-elixir 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "tree-sitter-json 0.19.0",
+ "tree-sitter-cpp",
+ "tree-sitter-elixir",
+ "tree-sitter-json 0.20.0",
  "tree-sitter-lua",
+ "tree-sitter-php",
  "tree-sitter-ruby",
  "tree-sitter-rust",
- "tree-sitter-toml 0.20.0",
- "tree-sitter-typescript 0.20.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "tree-sitter-toml",
+ "tree-sitter-typescript",
  "unindent",
  "util",
  "workspace",
@@ -8257,16 +8258,6 @@ dependencies = [
  "tree-sitter",
 ]
 
-[[package]]
-name = "tree-sitter-cpp"
-version = "0.20.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1c88fd925d0333e63ac64e521f5bd79c53019e569ffbbccfeef346a326f459e9"
-dependencies = [
- "cc",
- "tree-sitter",
-]
-
 [[package]]
 name = "tree-sitter-css"
 version = "0.19.0"
@@ -8276,16 +8267,6 @@ dependencies = [
  "tree-sitter",
 ]
 
-[[package]]
-name = "tree-sitter-elixir"
-version = "0.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9a9916f3e1c80b3c8aab8582604e97e8720cb9b893489b347cf999f80f9d469e"
-dependencies = [
- "cc",
- "tree-sitter",
-]
-
 [[package]]
 name = "tree-sitter-elixir"
 version = "0.1.0"
@@ -8464,26 +8445,6 @@ dependencies = [
  "tree-sitter",
 ]
 
-[[package]]
-name = "tree-sitter-toml"
-version = "0.20.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ca517f578a98b23d20780247cc2688407fa81effad5b627a5a364ec3339b53e8"
-dependencies = [
- "cc",
- "tree-sitter",
-]
-
-[[package]]
-name = "tree-sitter-typescript"
-version = "0.20.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "079c695c32d39ad089101c66393aeaca30e967fba3486a91f573d2f0e12d290a"
-dependencies = [
- "cc",
- "tree-sitter",
-]
-
 [[package]]
 name = "tree-sitter-typescript"
 version = "0.20.2"
@@ -9923,9 +9884,9 @@ dependencies = [
  "tree-sitter",
  "tree-sitter-bash",
  "tree-sitter-c",
- "tree-sitter-cpp 0.20.0",
+ "tree-sitter-cpp",
  "tree-sitter-css",
- "tree-sitter-elixir 0.1.0 (git+https://github.com/elixir-lang/tree-sitter-elixir?rev=4ba9dab6e2602960d95b2b625f3386c27e08084e)",
+ "tree-sitter-elixir",
  "tree-sitter-elm",
  "tree-sitter-embedded-template",
  "tree-sitter-glsl",
@@ -9942,8 +9903,8 @@ dependencies = [
  "tree-sitter-rust",
  "tree-sitter-scheme",
  "tree-sitter-svelte",
- "tree-sitter-toml 0.5.1",
- "tree-sitter-typescript 0.20.2 (git+https://github.com/tree-sitter/tree-sitter-typescript?rev=5d20856f34315b068c41edaee2ac8a100081d259)",
+ "tree-sitter-toml",
+ "tree-sitter-typescript",
  "tree-sitter-yaml",
  "unindent",
  "url",

crates/semantic_index/Cargo.toml 🔗

@@ -54,11 +54,12 @@ tempdir.workspace = true
 ctor.workspace = true
 env_logger.workspace = true
 
-tree-sitter-typescript = "*"
-tree-sitter-json = "*"
-tree-sitter-rust = "*"
-tree-sitter-toml = "*"
-tree-sitter-cpp = "*"
-tree-sitter-elixir = "*"
-tree-sitter-lua = "*"
-tree-sitter-ruby = "*"
+tree-sitter-typescript.workspace = true
+tree-sitter-json.workspace = true
+tree-sitter-rust.workspace = true
+tree-sitter-toml.workspace = true
+tree-sitter-cpp.workspace = true
+tree-sitter-elixir.workspace = true
+tree-sitter-lua.workspace = true
+tree-sitter-ruby.workspace = true
+tree-sitter-php.workspace = true

crates/semantic_index/src/parsing.rs 🔗

@@ -200,7 +200,12 @@ impl CodeContextRetriever {
 
             let mut document_content = String::new();
             for context_range in &context_match.context_ranges {
-                document_content.push_str(&content[context_range.clone()]);
+                add_content_from_range(
+                    &mut document_content,
+                    content,
+                    context_range.clone(),
+                    context_match.start_col,
+                );
                 document_content.push_str("\n");
             }
 

crates/semantic_index/src/semantic_index_tests.rs 🔗

@@ -1017,6 +1017,156 @@ async fn test_code_context_retrieval_ruby() {
     );
 }
 
+#[gpui::test]
+async fn test_code_context_retrieval_php() {
+    let language = php_lang();
+    let mut retriever = CodeContextRetriever::new();
+
+    let text = r#"
+        <?php
+
+        namespace LevelUp\Experience\Concerns;
+
+        /*
+        This is a multiple-lines comment block
+        that spans over multiple
+        lines
+        */
+        function functionName() {
+            echo "Hello world!";
+        }
+
+        trait HasAchievements
+        {
+            /**
+            * @throws \Exception
+            */
+            public function grantAchievement(Achievement $achievement, $progress = null): void
+            {
+                if ($progress > 100) {
+                    throw new Exception(message: 'Progress cannot be greater than 100');
+                }
+
+                if ($this->achievements()->find($achievement->id)) {
+                    throw new Exception(message: 'User already has this Achievement');
+                }
+
+                $this->achievements()->attach($achievement, [
+                    'progress' => $progress ?? null,
+                ]);
+
+                $this->when(value: ($progress === null) || ($progress === 100), callback: fn (): ?array => event(new AchievementAwarded(achievement: $achievement, user: $this)));
+            }
+
+            public function achievements(): BelongsToMany
+            {
+                return $this->belongsToMany(related: Achievement::class)
+                ->withPivot(columns: 'progress')
+                ->where('is_secret', false)
+                ->using(AchievementUser::class);
+            }
+        }
+
+        interface Multiplier
+        {
+            public function qualifies(array $data): bool;
+
+            public function setMultiplier(): int;
+        }
+
+        enum AuditType: string
+        {
+            case Add = 'add';
+            case Remove = 'remove';
+            case Reset = 'reset';
+            case LevelUp = 'level_up';
+        }
+
+        ?>"#
+    .unindent();
+
+    let documents = retriever.parse_file(&text, language.clone()).unwrap();
+
+    assert_documents_eq(
+        &documents,
+        &[
+            (
+                r#"
+        /*
+        This is a multiple-lines comment block
+        that spans over multiple
+        lines
+        */
+        function functionName() {
+            echo "Hello world!";
+        }"#
+                .unindent(),
+                123,
+            ),
+            (
+                r#"
+        trait HasAchievements
+        {
+            /**
+            * @throws \Exception
+            */
+            public function grantAchievement(Achievement $achievement, $progress = null): void
+            {/* ... */}
+
+            public function achievements(): BelongsToMany
+            {/* ... */}
+        }"#
+                .unindent(),
+                177,
+            ),
+            (r#"
+            /**
+            * @throws \Exception
+            */
+            public function grantAchievement(Achievement $achievement, $progress = null): void
+            {
+                if ($progress > 100) {
+                    throw new Exception(message: 'Progress cannot be greater than 100');
+                }
+
+                if ($this->achievements()->find($achievement->id)) {
+                    throw new Exception(message: 'User already has this Achievement');
+                }
+
+                $this->achievements()->attach($achievement, [
+                    'progress' => $progress ?? null,
+                ]);
+
+                $this->when(value: ($progress === null) || ($progress === 100), callback: fn (): ?array => event(new AchievementAwarded(achievement: $achievement, user: $this)));
+            }"#.unindent(), 245),
+            (r#"
+                public function achievements(): BelongsToMany
+                {
+                    return $this->belongsToMany(related: Achievement::class)
+                    ->withPivot(columns: 'progress')
+                    ->where('is_secret', false)
+                    ->using(AchievementUser::class);
+                }"#.unindent(), 902),
+            (r#"
+                interface Multiplier
+                {
+                    public function qualifies(array $data): bool;
+
+                    public function setMultiplier(): int;
+                }"#.unindent(),
+                1146),
+            (r#"
+                enum AuditType: string
+                {
+                    case Add = 'add';
+                    case Remove = 'remove';
+                    case Reset = 'reset';
+                    case LevelUp = 'level_up';
+                }"#.unindent(), 1265)
+        ],
+    );
+}
+
 #[gpui::test]
 fn test_dot_product(mut rng: StdRng) {
     assert_eq!(dot(&[1., 0., 0., 0., 0.], &[0., 1., 0., 0., 0.]), 0.);
@@ -1376,6 +1526,61 @@ fn lua_lang() -> Arc<Language> {
     )
 }
 
+fn php_lang() -> Arc<Language> {
+    Arc::new(
+        Language::new(
+            LanguageConfig {
+                name: "PHP".into(),
+                path_suffixes: vec!["php".into()],
+                collapsed_placeholder: "/* ... */".into(),
+                ..Default::default()
+            },
+            Some(tree_sitter_php::language()),
+        )
+        .with_embedding_query(
+            r#"
+            (
+                (comment)* @context
+                .
+                [
+                    (function_definition
+                        "function" @name
+                        name: (_) @name
+                        body: (_
+                            "{" @keep
+                            "}" @keep) @collapse
+                        )
+
+                    (trait_declaration
+                        "trait" @name
+                        name: (_) @name)
+
+                    (method_declaration
+                        "function" @name
+                        name: (_) @name
+                        body: (_
+                            "{" @keep
+                            "}" @keep) @collapse
+                        )
+
+                    (interface_declaration
+                        "interface" @name
+                        name: (_) @name
+                        )
+
+                    (enum_declaration
+                        "enum" @name
+                        name: (_) @name
+                        )
+
+                ] @item
+            )
+            "#,
+        )
+        .unwrap(),
+    )
+}
+
 fn ruby_lang() -> Arc<Language> {
     Arc::new(
         Language::new(

crates/zed/src/languages/php/config.toml 🔗

@@ -9,3 +9,4 @@ brackets = [
     { start = "(", end = ")", close = true, newline = true },
     { start = "\"", end = "\"", close = true, newline = false, not_in = ["string"] },
 ]
+collapsed_placeholder = "/* ... */"

crates/zed/src/languages/php/embedding.scm 🔗

@@ -0,0 +1,36 @@
+(
+    (comment)* @context
+    .
+    [
+        (function_definition
+            "function" @name
+            name: (_) @name
+            body: (_
+                "{" @keep
+                "}" @keep) @collapse
+            )
+
+        (trait_declaration
+            "trait" @name
+            name: (_) @name)
+
+        (method_declaration
+            "function" @name
+            name: (_) @name
+            body: (_
+                "{" @keep
+                "}" @keep) @collapse
+            )
+
+        (interface_declaration
+            "interface" @name
+            name: (_) @name
+            )
+
+        (enum_declaration
+            "enum" @name
+            name: (_) @name
+            )
+
+        ] @item
+    )

crates/zed/src/languages/php/outline.scm 🔗

@@ -8,8 +8,6 @@
     name: (_) @name
     ) @item
 
-
-
 (method_declaration
     "function" @context
     name: (_) @name
@@ -24,3 +22,8 @@
     "enum" @context
     name: (_) @name
     ) @item
+
+(trait_declaration
+    "trait" @context
+    name: (_) @name
+    ) @item