extension_cli: Add tests for semantic token rules and language tasks (#50750)

Finn Evers created

This adds checks to the extension CLI to ensure that tasks and semantic
token rules are actually valid for the compiled extensions.

Release Notes:

- N/A

Change summary

Cargo.lock                                  |  2 
crates/extension/src/extension_builder.rs   |  3 
crates/extension_cli/Cargo.toml             |  2 
crates/extension_cli/src/main.rs            | 61 +++++++++++++++++-----
crates/extension_host/src/extension_host.rs | 29 ++++------
crates/extension_host/src/headless_host.rs  |  4 +
crates/language/Cargo.toml                  |  1 
crates/language/src/language.rs             |  9 +++
crates/settings_content/src/project.rs      | 26 +++++++++
crates/task/src/task_template.rs            |  1 
10 files changed, 102 insertions(+), 36 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -6082,7 +6082,9 @@ dependencies = [
  "serde",
  "serde_json",
  "serde_json_lenient",
+ "settings_content",
  "snippet_provider",
+ "task",
  "theme",
  "tokio",
  "toml 0.8.23",

crates/extension/src/extension_builder.rs 🔗

@@ -7,6 +7,7 @@ use anyhow::{Context as _, Result, bail};
 use futures::{StreamExt, io};
 use heck::ToSnakeCase;
 use http_client::{self, AsyncBody, HttpClient};
+use language::LanguageConfig;
 use serde::Deserialize;
 use std::{
     env, fs, mem,
@@ -583,7 +584,7 @@ async fn populate_defaults(
 
         while let Some(language_dir) = language_dir_entries.next().await {
             let language_dir = language_dir?;
-            let config_path = language_dir.join("config.toml");
+            let config_path = language_dir.join(LanguageConfig::FILE_NAME);
             if fs.is_file(config_path.as_path()).await {
                 let relative_language_dir =
                     language_dir.strip_prefix(extension_path)?.to_path_buf();

crates/extension_cli/Cargo.toml 🔗

@@ -26,7 +26,9 @@ reqwest_client.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 serde_json_lenient.workspace = true
+settings_content.workspace = true
 snippet_provider.workspace = true
+task.workspace = true
 theme.workspace = true
 tokio = { workspace = true, features = ["full"] }
 toml.workspace = true

crates/extension_cli/src/main.rs 🔗

@@ -11,8 +11,10 @@ use extension::extension_builder::{CompileExtensionOptions, ExtensionBuilder};
 use extension::{ExtensionManifest, ExtensionSnippets};
 use language::LanguageConfig;
 use reqwest_client::ReqwestClient;
+use settings_content::SemanticTokenRules;
 use snippet_provider::file_to_snippets;
 use snippet_provider::format::VsSnippetsFile;
+use task::TaskTemplates;
 use tokio::process::Command;
 use tree_sitter::{Language, Query, WasmStore};
 
@@ -323,9 +325,8 @@ fn test_languages(
 ) -> Result<()> {
     for relative_language_dir in &manifest.languages {
         let language_dir = extension_path.join(relative_language_dir);
-        let config_path = language_dir.join("config.toml");
-        let config_content = fs::read_to_string(&config_path)?;
-        let config: LanguageConfig = toml::from_str(&config_content)?;
+        let config_path = language_dir.join(LanguageConfig::FILE_NAME);
+        let config = LanguageConfig::load(&config_path)?;
         let grammar = if let Some(name) = &config.grammar {
             Some(
                 grammars
@@ -339,18 +340,48 @@ fn test_languages(
         let query_entries = fs::read_dir(&language_dir)?;
         for entry in query_entries {
             let entry = entry?;
-            let query_path = entry.path();
-            if query_path.extension() == Some("scm".as_ref()) {
-                let grammar = grammar.with_context(|| {
-                    format! {
-                        "language {} provides query {} but no grammar",
-                        config.name,
-                        query_path.display()
-                    }
-                })?;
-
-                let query_source = fs::read_to_string(&query_path)?;
-                let _query = Query::new(grammar, &query_source)?;
+            let file_path = entry.path();
+
+            let Some(file_name) = file_path.file_name().and_then(|name| name.to_str()) else {
+                continue;
+            };
+
+            match file_name {
+                LanguageConfig::FILE_NAME => {
+                    // Loaded above
+                }
+                SemanticTokenRules::FILE_NAME => {
+                    let _token_rules = SemanticTokenRules::load(&file_path)?;
+                }
+                TaskTemplates::FILE_NAME => {
+                    let task_file_content = std::fs::read(&file_path).with_context(|| {
+                        anyhow!(
+                            "Failed to read tasks file at {path}",
+                            path = file_path.display()
+                        )
+                    })?;
+                    let _task_templates =
+                        serde_json_lenient::from_slice::<TaskTemplates>(&task_file_content)
+                            .with_context(|| {
+                                anyhow!(
+                                    "Failed to parse tasks file at {path}",
+                                    path = file_path.display()
+                                )
+                            })?;
+                }
+                _ if file_name.ends_with(".scm") => {
+                    let grammar = grammar.with_context(|| {
+                        format! {
+                            "language {} provides query {} but no grammar",
+                            config.name,
+                            file_path.display()
+                        }
+                    })?;
+
+                    let query_source = fs::read_to_string(&file_path)?;
+                    let _query = Query::new(grammar, &query_source)?;
+                }
+                _ => {}
             }
         }
 

crates/extension_host/src/extension_host.rs 🔗

@@ -55,6 +55,7 @@ use std::{
     sync::Arc,
     time::{Duration, Instant},
 };
+use task::TaskTemplates;
 use url::Url;
 use util::{ResultExt, paths::RemotePathBuf};
 use wasm_host::{
@@ -1285,19 +1286,11 @@ impl ExtensionStore {
             ]);
 
             // Load semantic token rules if present in the language directory.
-            let rules_path = language_path.join("semantic_token_rules.json");
-            if let Ok(rules_json) = std::fs::read_to_string(&rules_path) {
-                match serde_json_lenient::from_str::<SemanticTokenRules>(&rules_json) {
-                    Ok(rules) => {
-                        semantic_token_rules_to_add.push((language_name.clone(), rules));
-                    }
-                    Err(err) => {
-                        log::error!(
-                            "Failed to parse semantic token rules from {}: {err:#}",
-                            rules_path.display()
-                        );
-                    }
-                }
+            let rules_path = language_path.join(SemanticTokenRules::FILE_NAME);
+            if std::fs::exists(&rules_path).is_ok_and(|exists| exists)
+                && let Some(rules) = SemanticTokenRules::load(&rules_path).log_err()
+            {
+                semantic_token_rules_to_add.push((language_name.clone(), rules));
             }
 
             self.proxy.register_language(
@@ -1306,11 +1299,11 @@ impl ExtensionStore {
                 language.matcher.clone(),
                 language.hidden,
                 Arc::new(move || {
-                    let config = std::fs::read_to_string(language_path.join("config.toml"))?;
-                    let config: LanguageConfig = ::toml::from_str(&config)?;
+                    let config =
+                        LanguageConfig::load(language_path.join(LanguageConfig::FILE_NAME))?;
                     let queries = load_plugin_queries(&language_path);
                     let context_provider =
-                        std::fs::read_to_string(language_path.join("tasks.json"))
+                        std::fs::read_to_string(language_path.join(TaskTemplates::FILE_NAME))
                             .ok()
                             .and_then(|contents| {
                                 let definitions =
@@ -1580,7 +1573,7 @@ impl ExtensionStore {
                 if !fs_metadata.is_dir {
                     continue;
                 }
-                let language_config_path = language_path.join("config.toml");
+                let language_config_path = language_path.join(LanguageConfig::FILE_NAME);
                 let config = fs.load(&language_config_path).await.with_context(|| {
                     format!("loading language config from {language_config_path:?}")
                 })?;
@@ -1703,7 +1696,7 @@ impl ExtensionStore {
         cx.background_spawn(async move {
             const EXTENSION_TOML: &str = "extension.toml";
             const EXTENSION_WASM: &str = "extension.wasm";
-            const CONFIG_TOML: &str = "config.toml";
+            const CONFIG_TOML: &str = LanguageConfig::FILE_NAME;
 
             if is_dev {
                 let manifest_toml = toml::to_string(&loaded_extension.manifest)?;

crates/extension_host/src/headless_host.rs 🔗

@@ -138,7 +138,9 @@ impl HeadlessExtensionStore {
 
         for language_path in &manifest.languages {
             let language_path = extension_dir.join(language_path);
-            let config = fs.load(&language_path.join("config.toml")).await?;
+            let config = fs
+                .load(&language_path.join(LanguageConfig::FILE_NAME))
+                .await?;
             let mut config = ::toml::from_str::<LanguageConfig>(&config)?;
 
             this.update(cx, |this, _cx| {

crates/language/Cargo.toml 🔗

@@ -62,6 +62,7 @@ sum_tree.workspace = true
 task.workspace = true
 text.workspace = true
 theme.workspace = true
+toml.workspace = true
 tracing.workspace = true
 tree-sitter-md = { workspace = true, optional = true }
 tree-sitter-python = { workspace = true, optional = true }

crates/language/src/language.rs 🔗

@@ -961,6 +961,15 @@ pub struct LanguageConfig {
     pub import_path_strip_regex: Option<Regex>,
 }
 
+impl LanguageConfig {
+    pub const FILE_NAME: &str = "config.toml";
+
+    pub fn load(config_path: impl AsRef<Path>) -> Result<Self> {
+        let config = std::fs::read_to_string(config_path.as_ref())?;
+        toml::from_str(&config).map_err(Into::into)
+    }
+}
+
 #[derive(Clone, Debug, Deserialize, Default, JsonSchema)]
 pub struct DecreaseIndentConfig {
     #[serde(default, deserialize_with = "deserialize_regex")]

crates/settings_content/src/project.rs 🔗

@@ -1,5 +1,9 @@
-use std::{path::PathBuf, sync::Arc};
+use std::{
+    path::{Path, PathBuf},
+    sync::Arc,
+};
 
+use anyhow::Context;
 use collections::{BTreeMap, HashMap};
 use gpui::Rgba;
 use schemars::JsonSchema;
@@ -233,6 +237,26 @@ pub struct SemanticTokenRules {
     pub rules: Vec<SemanticTokenRule>,
 }
 
+impl SemanticTokenRules {
+    pub const FILE_NAME: &'static str = "semantic_token_rules.json";
+
+    pub fn load(file_path: &Path) -> anyhow::Result<Self> {
+        let rules_content = std::fs::read(file_path).with_context(|| {
+            anyhow::anyhow!(
+                "Could not read semantic token rules from {}",
+                file_path.display()
+            )
+        })?;
+
+        serde_json_lenient::from_slice::<SemanticTokenRules>(&rules_content).with_context(|| {
+            anyhow::anyhow!(
+                "Failed to parse semantic token rules from {}",
+                file_path.display()
+            )
+        })
+    }
+}
+
 impl crate::merge_from::MergeFrom for SemanticTokenRules {
     fn merge_from(&mut self, other: &Self) {
         self.rules.splice(0..0, other.rules.iter().cloned());

crates/task/src/task_template.rs 🔗

@@ -114,6 +114,7 @@ pub enum HideStrategy {
 pub struct TaskTemplates(pub Vec<TaskTemplate>);
 
 impl TaskTemplates {
+    pub const FILE_NAME: &str = "tasks.json";
     /// Generates JSON schema of Tasks JSON template format.
     pub fn generate_json_schema() -> serde_json::Value {
         let schema = schemars::generate::SchemaSettings::draft2019_09()