zeta: Update data collection eligibility when license file contents change + add Apache 2.0 (#35900)

Michael Sloan created

Closes #35070

Release Notes:

- Edit Prediction: Made license detection update eligibility for data
collection when license files change.
- Edit Prediction: Added Apache 2.0 license to opensource licenses
eligible for data collection.
- Edit Prediction: Made license detection less sensitive to whitespace
differences and check more files.

Change summary

crates/zeta/src/license_detection.rs           | 674 ++++++++++++++-----
crates/zeta/src/license_detection/apache-text  | 174 +++++
crates/zeta/src/license_detection/apache.regex | 201 +++++
crates/zeta/src/license_detection/isc.regex    |  15 
crates/zeta/src/license_detection/mit-text     |  21 
crates/zeta/src/license_detection/mit.regex    |  21 
crates/zeta/src/license_detection/upl.regex    |  35 +
crates/zeta/src/zeta.rs                        |  66 -
8 files changed, 965 insertions(+), 242 deletions(-)

Detailed changes

crates/zeta/src/license_detection.rs 🔗

@@ -1,204 +1,213 @@
+use std::{
+    collections::BTreeSet,
+    path::{Path, PathBuf},
+    sync::{Arc, LazyLock},
+};
+
+use fs::Fs;
+use futures::StreamExt as _;
+use gpui::{App, AppContext as _, Entity, Subscription, Task};
+use postage::watch;
+use project::Worktree;
 use regex::Regex;
-
-/// The most common license locations, with US and UK English spelling.
-pub const LICENSE_FILES_TO_CHECK: &[&str] = &[
-    "LICENSE",
-    "LICENCE",
-    "LICENSE.txt",
-    "LICENCE.txt",
-    "LICENSE.md",
-    "LICENCE.md",
-];
-
-pub fn is_license_eligible_for_data_collection(license: &str) -> bool {
-    // TODO: Include more licenses later (namely, Apache)
-    for pattern in [MIT_LICENSE_REGEX, ISC_LICENSE_REGEX, UPL_LICENSE_REGEX] {
-        let regex = Regex::new(pattern.trim()).unwrap();
-        if regex.is_match(license.trim()) {
-            return true;
-        }
-    }
-    false
+use util::ResultExt as _;
+use worktree::ChildEntriesOptions;
+
+/// Matches the most common license locations, with US and UK English spelling.
+const LICENSE_FILE_NAME_REGEX: LazyLock<regex::bytes::Regex> = LazyLock::new(|| {
+    regex::bytes::RegexBuilder::new(
+        "^ \
+        (?: license | licence) \
+        (?: [\\-._] (?: apache | isc | mit | upl))? \
+        (?: \\.txt | \\.md)? \
+        $",
+    )
+    .ignore_whitespace(true)
+    .case_insensitive(true)
+    .build()
+    .unwrap()
+});
+
+fn is_license_eligible_for_data_collection(license: &str) -> bool {
+    const LICENSE_REGEXES: LazyLock<Vec<Regex>> = LazyLock::new(|| {
+        [
+            include_str!("license_detection/apache.regex"),
+            include_str!("license_detection/isc.regex"),
+            include_str!("license_detection/mit.regex"),
+            include_str!("license_detection/upl.regex"),
+        ]
+        .into_iter()
+        .map(|pattern| Regex::new(&canonicalize_license_text(pattern)).unwrap())
+        .collect()
+    });
+
+    let license = canonicalize_license_text(license);
+    LICENSE_REGEXES.iter().any(|regex| regex.is_match(&license))
 }
 
-const MIT_LICENSE_REGEX: &str = r#"
-^.*MIT License.*
-
-Copyright.*?
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files \(the "Software"\), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software\.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT\. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE\.$
-"#;
-
-const ISC_LICENSE_REGEX: &str = r#"
-^ISC License
-
-Copyright.*?
-
-Permission to use, copy, modify, and/or distribute this software for any
-purpose with or without fee is hereby granted, provided that the above
-copyright notice and this permission notice appear in all copies\.
-
-THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-MERCHANTABILITY AND FITNESS\. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE\.$
-"#;
-
-const UPL_LICENSE_REGEX: &str = r#"
-Copyright.*?
-
-The Universal Permissive License.*?
-
-Subject to the condition set forth below, permission is hereby granted to any person
-obtaining a copy of this software, associated documentation and/or data \(collectively
-the "Software"\), free of charge and under any and all copyright rights in the
-Software, and any and all patent rights owned or freely licensable by each licensor
-hereunder covering either \(i\) the unmodified Software as contributed to or provided
-by such licensor, or \(ii\) the Larger Works \(as defined below\), to deal in both
-
-\(a\) the Software, and
-
-\(b\) any piece of software and/or hardware listed in the lrgrwrks\.txt file if one is
-    included with the Software \(each a "Larger Work" to which the Software is
-    contributed by such licensors\),
-
-without restriction, including without limitation the rights to copy, create
-derivative works of, display, perform, and distribute the Software and make, use,
-sell, offer for sale, import, export, have made, and have sold the Software and the
-Larger Work\(s\), and to sublicense the foregoing rights on either these or other
-terms\.
-
-This license is subject to the following condition:
-
-The above copyright notice and either this complete permission notice or at a minimum
-a reference to the UPL must be included in all copies or substantial portions of the
-Software\.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
-INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
-PARTICULAR PURPOSE AND NONINFRINGEMENT\. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
-CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
-OR THE USE OR OTHER DEALINGS IN THE SOFTWARE\.$
-"#;
-
-#[cfg(test)]
-mod tests {
-    use unindent::unindent;
-
-    use crate::is_license_eligible_for_data_collection;
-
-    #[test]
-    fn test_mit_positive_detection() {
-        let example_license = unindent(
-            r#"
-                MIT License
+/// Canonicalizes the whitespace of license text and license regexes.
+fn canonicalize_license_text(license: &str) -> String {
+    const PARAGRAPH_SEPARATOR_REGEX: LazyLock<Regex> =
+        LazyLock::new(|| Regex::new(r"\s*\n\s*\n\s*").unwrap());
+
+    PARAGRAPH_SEPARATOR_REGEX
+        .split(license)
+        .filter(|paragraph| !paragraph.trim().is_empty())
+        .map(|paragraph| {
+            paragraph
+                .trim()
+                .split_whitespace()
+                .collect::<Vec<_>>()
+                .join(" ")
+        })
+        .collect::<Vec<_>>()
+        .join("\n\n")
+}
 
-                Copyright (c) 2024 John Doe
+pub enum LicenseDetectionWatcher {
+    Local {
+        is_open_source_rx: watch::Receiver<bool>,
+        _is_open_source_task: Task<()>,
+        _worktree_subscription: Subscription,
+    },
+    SingleFile,
+    Remote,
+}
 
-                Permission is hereby granted, free of charge, to any person obtaining a copy
-                of this software and associated documentation files (the "Software"), to deal
-                in the Software without restriction, including without limitation the rights
-                to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-                copies of the Software, and to permit persons to whom the Software is
-                furnished to do so, subject to the following conditions:
+impl LicenseDetectionWatcher {
+    pub fn new(worktree: &Entity<Worktree>, cx: &mut App) -> Self {
+        let worktree_ref = worktree.read(cx);
+        if worktree_ref.is_single_file() {
+            return Self::SingleFile;
+        }
 
-                The above copyright notice and this permission notice shall be included in all
-                copies or substantial portions of the Software.
+        let (files_to_check_tx, mut files_to_check_rx) = futures::channel::mpsc::unbounded();
+
+        let Worktree::Local(local_worktree) = worktree_ref else {
+            return Self::Remote;
+        };
+        let fs = local_worktree.fs().clone();
+        let worktree_abs_path = local_worktree.abs_path().clone();
+
+        let options = ChildEntriesOptions {
+            include_files: true,
+            include_dirs: false,
+            include_ignored: true,
+        };
+        for top_file in local_worktree.child_entries_with_options(Path::new(""), options) {
+            let path_bytes = top_file.path.as_os_str().as_encoded_bytes();
+            if top_file.is_created() && LICENSE_FILE_NAME_REGEX.is_match(path_bytes) {
+                let rel_path = top_file.path.clone();
+                files_to_check_tx.unbounded_send(rel_path).ok();
+            }
+        }
 
-                THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-                IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-                FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-                AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-                LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-                OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-                SOFTWARE.
-            "#
-            .trim(),
-        );
+        let _worktree_subscription =
+            cx.subscribe(worktree, move |_worktree, event, _cx| match event {
+                worktree::Event::UpdatedEntries(updated_entries) => {
+                    for updated_entry in updated_entries.iter() {
+                        let rel_path = &updated_entry.0;
+                        let path_bytes = rel_path.as_os_str().as_encoded_bytes();
+                        if LICENSE_FILE_NAME_REGEX.is_match(path_bytes) {
+                            files_to_check_tx.unbounded_send(rel_path.clone()).ok();
+                        }
+                    }
+                }
+                worktree::Event::DeletedEntry(_) | worktree::Event::UpdatedGitRepositories(_) => {}
+            });
+
+        let (mut is_open_source_tx, is_open_source_rx) = watch::channel_with::<bool>(false);
+
+        let _is_open_source_task = cx.background_spawn(async move {
+            let mut eligible_licenses = BTreeSet::new();
+            while let Some(rel_path) = files_to_check_rx.next().await {
+                let abs_path = worktree_abs_path.join(&rel_path);
+                let was_open_source = !eligible_licenses.is_empty();
+                if Self::is_path_eligible(&fs, abs_path).await.unwrap_or(false) {
+                    eligible_licenses.insert(rel_path);
+                } else {
+                    eligible_licenses.remove(&rel_path);
+                }
+                let is_open_source = !eligible_licenses.is_empty();
+                if is_open_source != was_open_source {
+                    *is_open_source_tx.borrow_mut() = is_open_source;
+                }
+            }
+        });
+
+        Self::Local {
+            is_open_source_rx,
+            _is_open_source_task,
+            _worktree_subscription,
+        }
+    }
 
-        assert!(is_license_eligible_for_data_collection(&example_license));
+    async fn is_path_eligible(fs: &Arc<dyn Fs>, abs_path: PathBuf) -> Option<bool> {
+        log::info!("checking if `{abs_path:?}` is an open source license");
+        // Resolve symlinks so that the file size from metadata is correct.
+        let Some(abs_path) = fs.canonicalize(&abs_path).await.ok() else {
+            log::info!(
+                "`{abs_path:?}` license file probably deleted (error canonicalizing the path)"
+            );
+            return None;
+        };
+        let metadata = fs.metadata(&abs_path).await.log_err()??;
+        // If the license file is >32kb it's unlikely to legitimately match any eligible license.
+        if metadata.len > 32768 {
+            return None;
+        }
+        let text = fs.load(&abs_path).await.log_err()?;
+        let is_eligible = is_license_eligible_for_data_collection(&text);
+        if is_eligible {
+            log::info!(
+                "`{abs_path:?}` matches a license that is eligible for data collection (if enabled)"
+            );
+        } else {
+            log::info!(
+                "`{abs_path:?}` does not match a license that is eligible for data collection"
+            );
+        }
+        Some(is_eligible)
+    }
 
-        let example_license = unindent(
-            r#"
-                The MIT License (MIT)
+    /// Answers false until we find out it's open source
+    pub fn is_project_open_source(&self) -> bool {
+        match self {
+            Self::Local {
+                is_open_source_rx, ..
+            } => *is_open_source_rx.borrow(),
+            Self::SingleFile | Self::Remote => false,
+        }
+    }
+}
 
-                Copyright (c) 2019 John Doe
+#[cfg(test)]
+mod tests {
 
-                Permission is hereby granted, free of charge, to any person obtaining a copy
-                of this software and associated documentation files (the "Software"), to deal
-                in the Software without restriction, including without limitation the rights
-                to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-                copies of the Software, and to permit persons to whom the Software is
-                furnished to do so, subject to the following conditions:
+    use fs::FakeFs;
+    use gpui::TestAppContext;
+    use serde_json::json;
+    use settings::{Settings as _, SettingsStore};
+    use unindent::unindent;
+    use worktree::WorktreeSettings;
 
-                The above copyright notice and this permission notice shall be included in all
-                copies or substantial portions of the Software.
+    use super::*;
 
-                THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-                IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-                FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-                AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-                LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-                OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-                SOFTWARE.
-            "#
-            .trim(),
-        );
+    const MIT_LICENSE: &str = include_str!("license_detection/mit-text");
+    const APACHE_LICENSE: &str = include_str!("license_detection/apache-text");
 
-        assert!(is_license_eligible_for_data_collection(&example_license));
+    #[test]
+    fn test_mit_positive_detection() {
+        assert!(is_license_eligible_for_data_collection(&MIT_LICENSE));
     }
 
     #[test]
     fn test_mit_negative_detection() {
-        let example_license = unindent(
-            r#"
-                MIT License
-
-                Copyright (c) 2024 John Doe
+        let example_license = format!(
+            r#"{MIT_LICENSE}
 
-                Permission is hereby granted, free of charge, to any person obtaining a copy
-                of this software and associated documentation files (the "Software"), to deal
-                in the Software without restriction, including without limitation the rights
-                to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-                copies of the Software, and to permit persons to whom the Software is
-                furnished to do so, subject to the following conditions:
-
-                The above copyright notice and this permission notice shall be included in all
-                copies or substantial portions of the Software.
-
-                THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-                IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-                FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-                AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-                LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-                OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-                SOFTWARE.
-
-                This project is dual licensed under the MIT License and the Apache License, Version 2.0.
-            "#
-            .trim(),
+            This project is dual licensed under the MIT License and the Apache License, Version 2.0."#
         );
-
         assert!(!is_license_eligible_for_data_collection(&example_license));
     }
 
@@ -351,4 +360,307 @@ mod tests {
 
         assert!(!is_license_eligible_for_data_collection(&example_license));
     }
+
+    #[test]
+    fn test_apache_positive_detection() {
+        assert!(is_license_eligible_for_data_collection(APACHE_LICENSE));
+
+        let license_with_appendix = format!(
+            r#"{APACHE_LICENSE}
+
+            END OF TERMS AND CONDITIONS
+
+            APPENDIX: How to apply the Apache License to your work.
+
+               To apply the Apache License to your work, attach the following
+               boilerplate notice, with the fields enclosed by brackets "[]"
+               replaced with your own identifying information. (Don't include
+               the brackets!)  The text should be enclosed in the appropriate
+               comment syntax for the file format. We also recommend that a
+               file or class name and description of purpose be included on the
+               same "printed page" as the copyright notice for easier
+               identification within third-party archives.
+
+            Copyright [yyyy] [name of copyright owner]
+
+            Licensed under the Apache License, Version 2.0 (the "License");
+            you may not use this file except in compliance with the License.
+            You may obtain a copy of the License at
+
+                http://www.apache.org/licenses/LICENSE-2.0
+
+            Unless required by applicable law or agreed to in writing, software
+            distributed under the License is distributed on an "AS IS" BASIS,
+            WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+            See the License for the specific language governing permissions and
+            limitations under the License."#
+        );
+        assert!(is_license_eligible_for_data_collection(
+            &license_with_appendix
+        ));
+
+        // Sometimes people fill in the appendix with copyright info.
+        let license_with_copyright = license_with_appendix.replace(
+            "Copyright [yyyy] [name of copyright owner]",
+            "Copyright 2025 John Doe",
+        );
+        assert!(license_with_copyright != license_with_appendix);
+        assert!(is_license_eligible_for_data_collection(
+            &license_with_copyright
+        ));
+    }
+
+    #[test]
+    fn test_apache_negative_detection() {
+        assert!(!is_license_eligible_for_data_collection(&format!(
+            "{APACHE_LICENSE}\n\nThe terms in this license are void if P=NP."
+        )));
+    }
+
+    #[test]
+    fn test_license_file_name_regex() {
+        // Test basic license file names
+        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE"));
+        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENCE"));
+        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"license"));
+        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"licence"));
+
+        // Test with extensions
+        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE.txt"));
+        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE.md"));
+        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENCE.txt"));
+        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENCE.md"));
+
+        // Test with specific license types
+        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE-APACHE"));
+        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE-MIT"));
+        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE.MIT"));
+        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE_MIT"));
+        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE-ISC"));
+        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE-UPL"));
+
+        // Test combinations
+        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE-MIT.txt"));
+        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENCE.ISC.md"));
+        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"license_upl"));
+
+        // Test case insensitive
+        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"License"));
+        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"license-mit.TXT"));
+        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENCE_isc.MD"));
+
+        // Test edge cases that should match
+        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"license.mit"));
+        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"licence-upl.txt"));
+
+        // Test non-matching patterns
+        assert!(!LICENSE_FILE_NAME_REGEX.is_match(b"COPYING"));
+        assert!(!LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE.html"));
+        assert!(!LICENSE_FILE_NAME_REGEX.is_match(b"MYLICENSE"));
+        assert!(!LICENSE_FILE_NAME_REGEX.is_match(b"src/LICENSE"));
+        assert!(!LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE.old"));
+        assert!(!LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE-GPL"));
+        assert!(!LICENSE_FILE_NAME_REGEX.is_match(b"LICENSEABC"));
+        assert!(!LICENSE_FILE_NAME_REGEX.is_match(b""));
+    }
+
+    #[test]
+    fn test_canonicalize_license_text() {
+        // Test basic whitespace normalization
+        let input = "Line 1\n   Line 2   \n\n\n  Line 3  ";
+        let expected = "Line 1 Line 2\n\nLine 3";
+        assert_eq!(canonicalize_license_text(input), expected);
+
+        // Test paragraph separation
+        let input = "Paragraph 1\nwith multiple lines\n\n\n\nParagraph 2\nwith more lines";
+        let expected = "Paragraph 1 with multiple lines\n\nParagraph 2 with more lines";
+        assert_eq!(canonicalize_license_text(input), expected);
+
+        // Test empty paragraphs are filtered out
+        let input = "\n\n\nParagraph 1\n\n\n   \n\n\nParagraph 2\n\n\n";
+        let expected = "Paragraph 1\n\nParagraph 2";
+        assert_eq!(canonicalize_license_text(input), expected);
+
+        // Test single line
+        let input = "   Single line with spaces   ";
+        let expected = "Single line with spaces";
+        assert_eq!(canonicalize_license_text(input), expected);
+
+        // Test multiple consecutive spaces within lines
+        let input = "Word1    Word2\n\nWord3     Word4";
+        let expected = "Word1 Word2\n\nWord3 Word4";
+        assert_eq!(canonicalize_license_text(input), expected);
+
+        // Test tabs and mixed whitespace
+        let input = "Word1\t\tWord2\n\n   Word3\r\n\r\n\r\nWord4   ";
+        let expected = "Word1 Word2\n\nWord3\n\nWord4";
+        assert_eq!(canonicalize_license_text(input), expected);
+    }
+
+    #[test]
+    fn test_license_detection_canonicalizes_whitespace() {
+        let mit_with_weird_spacing = unindent(
+            r#"
+                MIT License
+
+
+                Copyright (c) 2024 John Doe
+
+
+                Permission is hereby granted, free of charge, to any person obtaining a copy
+                of this software   and   associated   documentation files (the "Software"), to deal
+                in the Software without restriction, including without limitation the rights
+                to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+                copies of the Software, and to permit persons to whom the Software is
+                furnished to do so, subject to the following conditions:
+
+
+
+                The above copyright notice and this permission notice shall be included in all
+                copies or substantial portions of the Software.
+
+
+
+                THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+                IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+                FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+                AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+                LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+                OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+                SOFTWARE.
+            "#
+            .trim(),
+        );
+
+        assert!(is_license_eligible_for_data_collection(
+            &mit_with_weird_spacing
+        ));
+    }
+
+    fn init_test(cx: &mut TestAppContext) {
+        cx.update(|cx| {
+            let settings_store = SettingsStore::test(cx);
+            cx.set_global(settings_store);
+            WorktreeSettings::register(cx);
+        });
+    }
+
+    #[gpui::test]
+    async fn test_watcher_single_file(cx: &mut TestAppContext) {
+        init_test(cx);
+
+        let fs = FakeFs::new(cx.background_executor.clone());
+        fs.insert_tree("/root", json!({ "main.rs": "fn main() {}" }))
+            .await;
+
+        let worktree = Worktree::local(
+            Path::new("/root/main.rs"),
+            true,
+            fs.clone(),
+            Default::default(),
+            &mut cx.to_async(),
+        )
+        .await
+        .unwrap();
+
+        let watcher = cx.update(|cx| LicenseDetectionWatcher::new(&worktree, cx));
+        assert!(matches!(watcher, LicenseDetectionWatcher::SingleFile));
+        assert!(!watcher.is_project_open_source());
+    }
+
+    #[gpui::test]
+    async fn test_watcher_updates_on_changes(cx: &mut TestAppContext) {
+        init_test(cx);
+
+        let fs = FakeFs::new(cx.background_executor.clone());
+        fs.insert_tree("/root", json!({ "main.rs": "fn main() {}" }))
+            .await;
+
+        let worktree = Worktree::local(
+            Path::new("/root"),
+            true,
+            fs.clone(),
+            Default::default(),
+            &mut cx.to_async(),
+        )
+        .await
+        .unwrap();
+
+        let watcher = cx.update(|cx| LicenseDetectionWatcher::new(&worktree, cx));
+        assert!(matches!(watcher, LicenseDetectionWatcher::Local { .. }));
+        assert!(!watcher.is_project_open_source());
+
+        fs.write(Path::new("/root/LICENSE-MIT"), MIT_LICENSE.as_bytes())
+            .await
+            .unwrap();
+
+        cx.background_executor.run_until_parked();
+        assert!(watcher.is_project_open_source());
+
+        fs.write(Path::new("/root/LICENSE-APACHE"), APACHE_LICENSE.as_bytes())
+            .await
+            .unwrap();
+
+        cx.background_executor.run_until_parked();
+        assert!(watcher.is_project_open_source());
+
+        fs.write(Path::new("/root/LICENSE-MIT"), "Nevermind".as_bytes())
+            .await
+            .unwrap();
+
+        // Still considered open source as LICENSE-APACHE is present
+        cx.background_executor.run_until_parked();
+        assert!(watcher.is_project_open_source());
+
+        fs.write(
+            Path::new("/root/LICENSE-APACHE"),
+            "Also nevermind".as_bytes(),
+        )
+        .await
+        .unwrap();
+
+        cx.background_executor.run_until_parked();
+        assert!(!watcher.is_project_open_source());
+    }
+
+    #[gpui::test]
+    async fn test_watcher_initially_opensource_and_then_deleted(cx: &mut TestAppContext) {
+        init_test(cx);
+
+        let fs = FakeFs::new(cx.background_executor.clone());
+        fs.insert_tree(
+            "/root",
+            json!({ "main.rs": "fn main() {}", "LICENSE-MIT": MIT_LICENSE }),
+        )
+        .await;
+
+        let worktree = Worktree::local(
+            Path::new("/root"),
+            true,
+            fs.clone(),
+            Default::default(),
+            &mut cx.to_async(),
+        )
+        .await
+        .unwrap();
+
+        let watcher = cx.update(|cx| LicenseDetectionWatcher::new(&worktree, cx));
+        assert!(matches!(watcher, LicenseDetectionWatcher::Local { .. }));
+
+        cx.background_executor.run_until_parked();
+        assert!(watcher.is_project_open_source());
+
+        fs.remove_file(
+            Path::new("/root/LICENSE-MIT"),
+            fs::RemoveOptions {
+                recursive: false,
+                ignore_if_not_exists: false,
+            },
+        )
+        .await
+        .unwrap();
+
+        cx.background_executor.run_until_parked();
+        assert!(!watcher.is_project_open_source());
+    }
 }

crates/zeta/src/license_detection/apache-text 🔗

@@ -0,0 +1,174 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.

crates/zeta/src/license_detection/apache.regex 🔗

@@ -0,0 +1,201 @@
+                                 ^Apache License
+                           Version 2\.0, January 2004
+                        http://www\.apache\.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1\. Definitions\.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document\.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License\.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity\. For the purposes of this definition,
+      "control" means \(i\) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or \(ii\) ownership of fifty percent \(50%\) or more of the
+      outstanding shares, or \(iii\) beneficial ownership of such entity\.
+
+      "You" \(or "Your"\) shall mean an individual or Legal Entity
+      exercising permissions granted by this License\.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files\.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types\.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      \(an example is provided in the Appendix below\)\.
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on \(or derived from\) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship\. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link \(or bind by name\) to the interfaces of,
+      the Work and Derivative Works thereof\.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner\. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution\."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work\.
+
+   2\. Grant of Copyright License\. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non\-exclusive, no\-charge, royalty\-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form\.
+
+   3\. Grant of Patent License\. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non\-exclusive, no\-charge, royalty\-free, irrevocable
+      \(except as stated in this section\) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution\(s\) alone or by combination of their Contribution\(s\)
+      with the Work to which such Contribution\(s\) was submitted\. If You
+      institute patent litigation against any entity \(including a
+      cross\-claim or counterclaim in a lawsuit\) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed\.
+
+   4\. Redistribution\. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      \(a\) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      \(b\) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      \(c\) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      \(d\) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third\-party notices normally appear\. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License\. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License\.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License\.
+
+   5\. Submission of Contributions\. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions\.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions\.
+
+   6\. Trademarks\. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file\.
+
+   7\. Disclaimer of Warranty\. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work \(and each
+      Contributor provides its Contributions\) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON\-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE\. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License\.
+
+   8\. Limitation of Liability\. In no event and under no legal theory,
+      whether in tort \(including negligence\), contract, or otherwise,
+      unless required by applicable law \(such as deliberate and grossly
+      negligent acts\) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work \(including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses\), even if such Contributor
+      has been advised of the possibility of such damages\.
+
+   9\. Accepting Warranty or Additional Liability\. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License\. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability\.(:?
+
+   END OF TERMS AND CONDITIONS)?(:?
+
+   APPENDIX: How to apply the Apache License to your work\.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "\[\]"
+      replaced with your own identifying information\. \(Don't include
+      the brackets!\)  The text should be enclosed in the appropriate
+      comment syntax for the file format\. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third\-party archives\.)?(:?
+
+   Copyright .*)?(:?
+
+   Licensed under the Apache License, Version 2\.0 \(the "License"\);
+   you may not use this file except in compliance with the License\.
+   You may obtain a copy of the License at
+
+       http://www\.apache\.org/licenses/LICENSE\-2\.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied\.
+   See the License for the specific language governing permissions and
+   limitations under the License\.)?$

crates/zeta/src/license_detection/isc.regex 🔗

@@ -0,0 +1,15 @@
+^.*ISC License.*
+
+Copyright.*
+
+Permission to use, copy, modify, and/or distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies\.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS\. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE\.$

crates/zeta/src/license_detection/mit-text 🔗

@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 John Doe
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

crates/zeta/src/license_detection/mit.regex 🔗

@@ -0,0 +1,21 @@
+^.*MIT License.*
+
+Copyright.*
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files \(the "Software"\), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software\.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT\. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE\.$

crates/zeta/src/license_detection/upl.regex 🔗

@@ -0,0 +1,35 @@
+^Copyright.*
+
+The Universal Permissive License.*
+
+Subject to the condition set forth below, permission is hereby granted to any person
+obtaining a copy of this software, associated documentation and/or data \(collectively
+the "Software"\), free of charge and under any and all copyright rights in the
+Software, and any and all patent rights owned or freely licensable by each licensor
+hereunder covering either \(i\) the unmodified Software as contributed to or provided
+by such licensor, or \(ii\) the Larger Works \(as defined below\), to deal in both
+
+\(a\) the Software, and
+
+\(b\) any piece of software and/or hardware listed in the lrgrwrks\.txt file if one is
+    included with the Software \(each a "Larger Work" to which the Software is
+    contributed by such licensors\),
+
+without restriction, including without limitation the rights to copy, create
+derivative works of, display, perform, and distribute the Software and make, use,
+sell, offer for sale, import, export, have made, and have sold the Software and the
+Larger Work\(s\), and to sublicense the foregoing rights on either these or other
+terms\.
+
+This license is subject to the following condition:
+
+The above copyright notice and either this complete permission notice or at a minimum
+a reference to the UPL must be included in all copies or substantial portions of the
+Software\.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT\. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
+OR THE USE OR OTHER DEALINGS IN THE SOFTWARE\.$

crates/zeta/src/zeta.rs 🔗

@@ -10,8 +10,7 @@ pub(crate) use completion_diff_element::*;
 use db::kvp::{Dismissable, KEY_VALUE_STORE};
 use edit_prediction::DataCollectionState;
 pub use init::*;
-use license_detection::LICENSE_FILES_TO_CHECK;
-pub use license_detection::is_license_eligible_for_data_collection;
+use license_detection::LicenseDetectionWatcher;
 pub use rate_completion_modal::*;
 
 use anyhow::{Context as _, Result, anyhow};
@@ -33,7 +32,6 @@ use language::{
     Anchor, Buffer, BufferSnapshot, EditPreview, OffsetRangeExt, ToOffset, ToPoint, text_diff,
 };
 use language_model::{LlmApiToken, RefreshLlmTokenListener};
-use postage::watch;
 use project::{Project, ProjectPath};
 use release_channel::AppVersion;
 use settings::WorktreeId;
@@ -253,11 +251,10 @@ impl Zeta {
 
         this.update(cx, move |this, cx| {
             if let Some(worktree) = worktree {
-                worktree.update(cx, |worktree, cx| {
-                    this.license_detection_watchers
-                        .entry(worktree.id())
-                        .or_insert_with(|| Rc::new(LicenseDetectionWatcher::new(worktree, cx)));
-                });
+                let worktree_id = worktree.read(cx).id();
+                this.license_detection_watchers
+                    .entry(worktree_id)
+                    .or_insert_with(|| Rc::new(LicenseDetectionWatcher::new(&worktree, cx)));
             }
         });
 
@@ -1104,59 +1101,6 @@ pub struct ZedUpdateRequiredError {
     minimum_version: SemanticVersion,
 }
 
-struct LicenseDetectionWatcher {
-    is_open_source_rx: watch::Receiver<bool>,
-    _is_open_source_task: Task<()>,
-}
-
-impl LicenseDetectionWatcher {
-    pub fn new(worktree: &Worktree, cx: &mut Context<Worktree>) -> Self {
-        let (mut is_open_source_tx, is_open_source_rx) = watch::channel_with::<bool>(false);
-
-        // Check if worktree is a single file, if so we do not need to check for a LICENSE file
-        let task = if worktree.abs_path().is_file() {
-            Task::ready(())
-        } else {
-            let loaded_files = LICENSE_FILES_TO_CHECK
-                .iter()
-                .map(Path::new)
-                .map(|file| worktree.load_file(file, cx))
-                .collect::<ArrayVec<_, { LICENSE_FILES_TO_CHECK.len() }>>();
-
-            cx.background_spawn(async move {
-                for loaded_file in loaded_files.into_iter() {
-                    let Ok(loaded_file) = loaded_file.await else {
-                        continue;
-                    };
-
-                    let path = &loaded_file.file.path;
-                    if is_license_eligible_for_data_collection(&loaded_file.text) {
-                        log::info!("detected '{path:?}' as open source license");
-                        *is_open_source_tx.borrow_mut() = true;
-                    } else {
-                        log::info!("didn't detect '{path:?}' as open source license");
-                    }
-
-                    // stop on the first license that successfully read
-                    return;
-                }
-
-                log::debug!("didn't find a license file to check, assuming closed source");
-            })
-        };
-
-        Self {
-            is_open_source_rx,
-            _is_open_source_task: task,
-        }
-    }
-
-    /// Answers false until we find out it's open source
-    pub fn is_project_open_source(&self) -> bool {
-        *self.is_open_source_rx.borrow()
-    }
-}
-
 fn common_prefix<T1: Iterator<Item = char>, T2: Iterator<Item = char>>(a: T1, b: T2) -> usize {
     a.zip(b)
         .take_while(|(a, b)| a == b)