license_detection.rs

  1use std::{
  2    collections::BTreeSet,
  3    fmt::{Display, Formatter},
  4    ops::Range,
  5    path::{Path, PathBuf},
  6    sync::{Arc, LazyLock},
  7};
  8
  9use anyhow::{Result, anyhow};
 10use fs::Fs;
 11use futures::StreamExt as _;
 12use gpui::{App, AppContext as _, Entity, Subscription, Task};
 13use itertools::Itertools;
 14use postage::watch;
 15use project::Worktree;
 16use strum::VariantArray;
 17use util::{ResultExt as _, maybe};
 18use worktree::ChildEntriesOptions;
 19
 20/// Matches the most common license locations, with US and UK English spelling.
 21static LICENSE_FILE_NAME_REGEX: LazyLock<regex::bytes::Regex> = LazyLock::new(|| {
 22    regex::bytes::RegexBuilder::new(
 23        "^ \
 24        (?: license | licence)? \
 25        (?: [\\-._]? \
 26            (?: apache (?: [\\-._] (?: 2.0 | 2 ))? | \
 27                0? bsd (?: [\\-._] [0123])? (?: [\\-._] clause)? | \
 28                isc | \
 29                mit | \
 30                upl | \
 31                zlib))? \
 32        (?: [\\-._]? (?: license | licence))? \
 33        (?: \\.txt | \\.md)? \
 34        $",
 35    )
 36    .ignore_whitespace(true)
 37    .case_insensitive(true)
 38    .build()
 39    .unwrap()
 40});
 41
 42#[derive(Debug, Clone, Copy, Eq, Ord, PartialOrd, PartialEq, VariantArray)]
 43pub enum OpenSourceLicense {
 44    Apache2_0,
 45    BSDZero,
 46    BSD,
 47    ISC,
 48    MIT,
 49    UPL1_0,
 50    Zlib,
 51}
 52
 53impl Display for OpenSourceLicense {
 54    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
 55        write!(f, "{}", self.spdx_identifier())
 56    }
 57}
 58
 59impl OpenSourceLicense {
 60    /// These are SPDX identifiers for the licenses, except for BSD, where the variants are not
 61    /// distinguished.
 62    pub fn spdx_identifier(&self) -> &'static str {
 63        match self {
 64            OpenSourceLicense::Apache2_0 => "apache-2.0",
 65            OpenSourceLicense::BSDZero => "0bsd",
 66            OpenSourceLicense::BSD => "bsd",
 67            OpenSourceLicense::ISC => "isc",
 68            OpenSourceLicense::MIT => "mit",
 69            OpenSourceLicense::UPL1_0 => "upl-1.0",
 70            OpenSourceLicense::Zlib => "zlib",
 71        }
 72    }
 73
 74    pub fn patterns(&self) -> &'static [&'static str] {
 75        match self {
 76            OpenSourceLicense::Apache2_0 => &[
 77                include_str!("../license_patterns/apache-2.0-pattern"),
 78                include_str!("../license_patterns/apache-2.0-reference-pattern"),
 79            ],
 80            OpenSourceLicense::BSDZero => &[include_str!("../license_patterns/0bsd-pattern")],
 81            OpenSourceLicense::BSD => &[include_str!("../license_patterns/bsd-pattern")],
 82            OpenSourceLicense::ISC => &[include_str!("../license_patterns/isc-pattern")],
 83            OpenSourceLicense::MIT => &[include_str!("../license_patterns/mit-pattern")],
 84            OpenSourceLicense::UPL1_0 => &[include_str!("../license_patterns/upl-1.0-pattern")],
 85            OpenSourceLicense::Zlib => &[include_str!("../license_patterns/zlib-pattern")],
 86        }
 87    }
 88}
 89
 90// TODO: Consider using databake or similar to not parse at runtime.
 91static LICENSE_PATTERNS: LazyLock<LicensePatterns> = LazyLock::new(|| {
 92    let mut approximate_max_length = 0;
 93    let mut patterns = Vec::new();
 94    for license in OpenSourceLicense::VARIANTS {
 95        for pattern in license.patterns() {
 96            let (pattern, length) = parse_pattern(pattern).unwrap();
 97            patterns.push((*license, pattern));
 98            approximate_max_length = approximate_max_length.max(length);
 99        }
100    }
101    LicensePatterns {
102        patterns,
103        approximate_max_length,
104    }
105});
106
107fn detect_license(text: &str) -> Option<OpenSourceLicense> {
108    let text = canonicalize_license_text(text);
109    for (license, pattern) in LICENSE_PATTERNS.patterns.iter() {
110        log::trace!("Checking if license is {}", license);
111        if check_pattern(&pattern, &text) {
112            return Some(*license);
113        }
114    }
115
116    None
117}
118
119struct LicensePatterns {
120    patterns: Vec<(OpenSourceLicense, Vec<PatternPart>)>,
121    approximate_max_length: usize,
122}
123
124#[derive(Debug, Clone, Default, PartialEq, Eq)]
125struct PatternPart {
126    /// Indicates that matching `text` is optional. Skipping `match_any_chars` is conditional on
127    /// matching `text`.
128    optional: bool,
129    /// Indicates the number of characters that can be skipped before matching `text`.
130    match_any_chars: Range<usize>,
131    /// The text to match, may be empty.
132    text: String,
133}
134
135/// Lines that start with "-- " begin a `PatternPart`. `-- 1..10` specifies `match_any_chars:
136/// 1..10`. `-- 1..10 optional:` additionally specifies `optional: true`. It's a parse error for a
137/// line to start with `--` without matching this format.
138///
139/// Text that does not have `--` prefixes participate in the `text` field and are canonicalized by
140/// lowercasing, replacing all runs of whitespace with a single space, and otherwise only keeping
141/// ascii alphanumeric characters.
142fn parse_pattern(pattern_source: &str) -> Result<(Vec<PatternPart>, usize)> {
143    let mut pattern = Vec::new();
144    let mut part = PatternPart::default();
145    let mut approximate_max_length = 0;
146    for line in pattern_source.lines() {
147        if let Some(directive) = line.trim().strip_prefix("--") {
148            if part != PatternPart::default() {
149                pattern.push(part);
150                part = PatternPart::default();
151            }
152            let valid = maybe!({
153                let directive_chunks = directive.split_whitespace().collect::<Vec<_>>();
154                if !(1..=2).contains(&directive_chunks.len()) {
155                    return None;
156                }
157                if directive_chunks.len() == 2 {
158                    part.optional = true;
159                }
160                let range_chunks = directive_chunks[0].split("..").collect::<Vec<_>>();
161                if range_chunks.len() != 2 {
162                    return None;
163                }
164                part.match_any_chars.start = range_chunks[0].parse::<usize>().ok()?;
165                part.match_any_chars.end = range_chunks[1].parse::<usize>().ok()?;
166                if part.match_any_chars.start > part.match_any_chars.end {
167                    return None;
168                }
169                approximate_max_length += part.match_any_chars.end;
170                Some(())
171            });
172            if valid.is_none() {
173                return Err(anyhow!("Invalid pattern directive: {}", line));
174            }
175            continue;
176        }
177        approximate_max_length += line.len() + 1;
178        let line = canonicalize_license_text(line);
179        if line.is_empty() {
180            continue;
181        }
182        if !part.text.is_empty() {
183            part.text.push(' ');
184        }
185        part.text.push_str(&line);
186    }
187    if part != PatternPart::default() {
188        pattern.push(part);
189    }
190    Ok((pattern, approximate_max_length))
191}
192
193/// Checks a pattern against text by iterating over the pattern parts in reverse order, and checking
194/// matches with the end of a prefix of the input. Assumes that `canonicalize_license_text` has
195/// already been applied to the input.
196fn check_pattern(pattern: &[PatternPart], input: &str) -> bool {
197    let mut input_ix = input.len();
198    let mut match_any_chars = 0..0;
199    for part in pattern.iter().rev() {
200        if part.text.is_empty() {
201            match_any_chars.start += part.match_any_chars.start;
202            match_any_chars.end += part.match_any_chars.end;
203            continue;
204        }
205        let mut matched = false;
206        for skip_count in match_any_chars.start..=match_any_chars.end {
207            let end_ix = input_ix.saturating_sub(skip_count);
208            if end_ix < part.text.len() {
209                break;
210            }
211            if input[..end_ix].ends_with(&part.text) {
212                matched = true;
213                input_ix = end_ix - part.text.len();
214                match_any_chars = part.match_any_chars.clone();
215                break;
216            }
217        }
218        if !matched && !part.optional {
219            log::trace!(
220                "Failed to match pattern `...{}` against input `...{}`",
221                &part.text[part.text.len().saturating_sub(128)..],
222                &input[input_ix.saturating_sub(128)..]
223            );
224            return false;
225        }
226    }
227    match_any_chars.contains(&input_ix)
228}
229
230/// Canonicalizes license text by removing all non-alphanumeric characters, lowercasing, and turning
231/// runs of whitespace into a single space. Unicode alphanumeric characters are intentionally
232/// preserved since these should cause license mismatch when not within a portion of the license
233/// where arbitrary text is allowed.
234fn canonicalize_license_text(license: &str) -> String {
235    license
236        .chars()
237        .filter(|c| c.is_ascii_whitespace() || c.is_alphanumeric())
238        .map(|c| c.to_ascii_lowercase())
239        .collect::<String>()
240        .split_ascii_whitespace()
241        .join(" ")
242}
243
244pub enum LicenseDetectionWatcher {
245    Local {
246        is_open_source_rx: watch::Receiver<bool>,
247        _is_open_source_task: Task<()>,
248        _worktree_subscription: Subscription,
249    },
250    SingleFile,
251    Remote,
252}
253
254impl LicenseDetectionWatcher {
255    pub fn new(worktree: &Entity<Worktree>, cx: &mut App) -> Self {
256        let worktree_ref = worktree.read(cx);
257        if worktree_ref.is_single_file() {
258            return Self::SingleFile;
259        }
260
261        let (files_to_check_tx, mut files_to_check_rx) = futures::channel::mpsc::unbounded();
262
263        let Worktree::Local(local_worktree) = worktree_ref else {
264            return Self::Remote;
265        };
266        let fs = local_worktree.fs().clone();
267        let worktree_abs_path = local_worktree.abs_path().clone();
268
269        let options = ChildEntriesOptions {
270            include_files: true,
271            include_dirs: false,
272            include_ignored: true,
273        };
274        for top_file in local_worktree.child_entries_with_options(Path::new(""), options) {
275            let path_bytes = top_file.path.as_os_str().as_encoded_bytes();
276            if top_file.is_created() && LICENSE_FILE_NAME_REGEX.is_match(path_bytes) {
277                let rel_path = top_file.path.clone();
278                files_to_check_tx.unbounded_send(rel_path).ok();
279            }
280        }
281
282        let _worktree_subscription =
283            cx.subscribe(worktree, move |_worktree, event, _cx| match event {
284                worktree::Event::UpdatedEntries(updated_entries) => {
285                    for updated_entry in updated_entries.iter() {
286                        let rel_path = &updated_entry.0;
287                        let path_bytes = rel_path.as_os_str().as_encoded_bytes();
288                        if LICENSE_FILE_NAME_REGEX.is_match(path_bytes) {
289                            files_to_check_tx.unbounded_send(rel_path.clone()).ok();
290                        }
291                    }
292                }
293                worktree::Event::DeletedEntry(_) | worktree::Event::UpdatedGitRepositories(_) => {}
294            });
295
296        let (mut is_open_source_tx, is_open_source_rx) = watch::channel_with::<bool>(false);
297
298        let _is_open_source_task = cx.background_spawn(async move {
299            let mut eligible_licenses = BTreeSet::new();
300            while let Some(rel_path) = files_to_check_rx.next().await {
301                let abs_path = worktree_abs_path.join(&rel_path);
302                let was_open_source = !eligible_licenses.is_empty();
303                if Self::is_path_eligible(&fs, abs_path).await.unwrap_or(false) {
304                    eligible_licenses.insert(rel_path);
305                } else {
306                    eligible_licenses.remove(&rel_path);
307                }
308                let is_open_source = !eligible_licenses.is_empty();
309                if is_open_source != was_open_source {
310                    *is_open_source_tx.borrow_mut() = is_open_source;
311                }
312            }
313        });
314
315        Self::Local {
316            is_open_source_rx,
317            _is_open_source_task,
318            _worktree_subscription,
319        }
320    }
321
322    async fn is_path_eligible(fs: &Arc<dyn Fs>, abs_path: PathBuf) -> Option<bool> {
323        log::debug!("checking if `{abs_path:?}` is an open source license");
324        // resolve symlinks so that the file size from metadata is correct
325        let Some(abs_path) = fs.canonicalize(&abs_path).await.ok() else {
326            log::debug!(
327                "`{abs_path:?}` license file probably deleted (error canonicalizing the path)"
328            );
329            return None;
330        };
331        let metadata = fs.metadata(&abs_path).await.log_err()??;
332        if metadata.len > LICENSE_PATTERNS.approximate_max_length as u64 {
333            log::debug!(
334                "`{abs_path:?}` license file was skipped \
335                because its size of {} bytes was larger than the max size of {} bytes",
336                metadata.len,
337                LICENSE_PATTERNS.approximate_max_length
338            );
339            return None;
340        }
341        let text = fs.load(&abs_path).await.log_err()?;
342        let is_eligible = detect_license(&text).is_some();
343        if is_eligible {
344            log::debug!(
345                "`{abs_path:?}` matches a license that is eligible for data collection (if enabled)"
346            );
347        } else {
348            log::debug!(
349                "`{abs_path:?}` does not match a license that is eligible for data collection"
350            );
351        }
352        Some(is_eligible)
353    }
354
355    /// Answers false until we find out it's open source
356    pub fn is_project_open_source(&self) -> bool {
357        match self {
358            Self::Local {
359                is_open_source_rx, ..
360            } => *is_open_source_rx.borrow(),
361            Self::SingleFile | Self::Remote => false,
362        }
363    }
364}
365
366#[cfg(test)]
367mod tests {
368
369    use fs::FakeFs;
370    use gpui::TestAppContext;
371    use serde_json::json;
372    use settings::{Settings as _, SettingsStore};
373    use worktree::WorktreeSettings;
374
375    use super::*;
376
377    const APACHE_2_0_TXT: &str = include_str!("../license_examples/apache-2.0-ex0.txt");
378    const ISC_TXT: &str = include_str!("../license_examples/isc.txt");
379    const MIT_TXT: &str = include_str!("../license_examples/mit-ex0.txt");
380    const UPL_1_0_TXT: &str = include_str!("../license_examples/upl-1.0.txt");
381    const BSD_0_TXT: &str = include_str!("../license_examples/0bsd.txt");
382
383    #[track_caller]
384    fn assert_matches_license(text: &str, license: OpenSourceLicense) {
385        assert_eq!(detect_license(text), Some(license));
386        assert!(text.len() < LICENSE_PATTERNS.approximate_max_length);
387    }
388
389    /*
390    // Uncomment this and run with `cargo test -p zeta -- --no-capture &> licenses-output` to
391    // traverse your entire home directory and run license detection on every file that has a
392    // license-like name.
393    #[test]
394    fn test_check_all_licenses_in_home_dir() {
395        let mut detected = Vec::new();
396        let mut unrecognized = Vec::new();
397        let mut walked_entries = 0;
398        let homedir = std::env::home_dir().unwrap();
399        for entry in walkdir::WalkDir::new(&homedir) {
400            walked_entries += 1;
401            if walked_entries % 10000 == 0 {
402                println!(
403                    "So far visited {} files in {}",
404                    walked_entries,
405                    homedir.display()
406                );
407            }
408            let Ok(entry) = entry else {
409                continue;
410            };
411            if !LICENSE_FILE_NAME_REGEX.is_match(entry.file_name().as_encoded_bytes()) {
412                continue;
413            }
414            let Ok(contents) = std::fs::read_to_string(entry.path()) else {
415                continue;
416            };
417            let path_string = entry.path().to_string_lossy().to_string();
418            let license = detect_license(&contents);
419            match license {
420                Some(license) => detected.push((license, path_string)),
421                None => unrecognized.push(path_string),
422            }
423        }
424        println!("\nDetected licenses:\n");
425        detected.sort();
426        for (license, path) in &detected {
427            println!("{}: {}", license.spdx_identifier(), path);
428        }
429        println!("\nUnrecognized licenses:\n");
430        for path in &unrecognized {
431            println!("{}", path);
432        }
433        panic!(
434            "{} licenses detected, {} unrecognized",
435            detected.len(),
436            unrecognized.len()
437        );
438        println!("This line has a warning to make sure this test is always commented out");
439    }
440    */
441
442    #[test]
443    fn test_apache_positive_detection() {
444        assert_matches_license(APACHE_2_0_TXT, OpenSourceLicense::Apache2_0);
445        assert_matches_license(
446            include_str!("../license_examples/apache-2.0-ex1.txt"),
447            OpenSourceLicense::Apache2_0,
448        );
449        assert_matches_license(
450            include_str!("../license_examples/apache-2.0-ex2.txt"),
451            OpenSourceLicense::Apache2_0,
452        );
453        assert_matches_license(
454            include_str!("../license_examples/apache-2.0-ex3.txt"),
455            OpenSourceLicense::Apache2_0,
456        );
457        assert_matches_license(
458            include_str!("../license_examples/apache-2.0-ex4.txt"),
459            OpenSourceLicense::Apache2_0,
460        );
461        assert_matches_license(
462            include_str!("../../../LICENSE-APACHE"),
463            OpenSourceLicense::Apache2_0,
464        );
465    }
466
467    #[test]
468    fn test_apache_negative_detection() {
469        assert_eq!(
470            detect_license(&format!(
471                "{APACHE_2_0_TXT}\n\nThe terms in this license are void if P=NP."
472            )),
473            None
474        );
475    }
476
477    #[test]
478    fn test_bsd_1_clause_positive_detection() {
479        assert_matches_license(
480            include_str!("../license_examples/bsd-1-clause.txt"),
481            OpenSourceLicense::BSD,
482        );
483    }
484
485    #[test]
486    fn test_bsd_2_clause_positive_detection() {
487        assert_matches_license(
488            include_str!("../license_examples/bsd-2-clause-ex0.txt"),
489            OpenSourceLicense::BSD,
490        );
491    }
492
493    #[test]
494    fn test_bsd_3_clause_positive_detection() {
495        assert_matches_license(
496            include_str!("../license_examples/bsd-3-clause-ex0.txt"),
497            OpenSourceLicense::BSD,
498        );
499        assert_matches_license(
500            include_str!("../license_examples/bsd-3-clause-ex1.txt"),
501            OpenSourceLicense::BSD,
502        );
503        assert_matches_license(
504            include_str!("../license_examples/bsd-3-clause-ex2.txt"),
505            OpenSourceLicense::BSD,
506        );
507        assert_matches_license(
508            include_str!("../license_examples/bsd-3-clause-ex3.txt"),
509            OpenSourceLicense::BSD,
510        );
511        assert_matches_license(
512            include_str!("../license_examples/bsd-3-clause-ex4.txt"),
513            OpenSourceLicense::BSD,
514        );
515    }
516
517    #[test]
518    fn test_bsd_0_positive_detection() {
519        assert_matches_license(BSD_0_TXT, OpenSourceLicense::BSDZero);
520    }
521
522    #[test]
523    fn test_isc_positive_detection() {
524        assert_matches_license(ISC_TXT, OpenSourceLicense::ISC);
525    }
526
527    #[test]
528    fn test_isc_negative_detection() {
529        let license_text = format!(
530            r#"{ISC_TXT}
531
532            This project is dual licensed under the ISC License and the MIT License."#
533        );
534
535        assert_eq!(detect_license(&license_text), None);
536    }
537
538    #[test]
539    fn test_mit_positive_detection() {
540        assert_matches_license(MIT_TXT, OpenSourceLicense::MIT);
541        assert_matches_license(
542            include_str!("../license_examples/mit-ex1.txt"),
543            OpenSourceLicense::MIT,
544        );
545        assert_matches_license(
546            include_str!("../license_examples/mit-ex2.txt"),
547            OpenSourceLicense::MIT,
548        );
549        assert_matches_license(
550            include_str!("../license_examples/mit-ex3.txt"),
551            OpenSourceLicense::MIT,
552        );
553    }
554
555    #[test]
556    fn test_mit_negative_detection() {
557        let license_text = format!(
558            r#"{MIT_TXT}
559
560            This project is dual licensed under the MIT License and the Apache License, Version 2.0."#
561        );
562        assert_eq!(detect_license(&license_text), None);
563    }
564
565    #[test]
566    fn test_upl_positive_detection() {
567        assert_matches_license(UPL_1_0_TXT, OpenSourceLicense::UPL1_0);
568    }
569
570    #[test]
571    fn test_upl_negative_detection() {
572        let license_text = format!(
573            r#"{UPL_1_0_TXT}
574
575            This project is dual licensed under the UPL License and the MIT License."#
576        );
577
578        assert_eq!(detect_license(&license_text), None);
579    }
580
581    #[test]
582    fn test_zlib_positive_detection() {
583        assert_matches_license(
584            include_str!("../license_examples/zlib-ex0.txt"),
585            OpenSourceLicense::Zlib,
586        );
587    }
588
589    #[test]
590    fn test_license_file_name_regex() {
591        // Test basic license file names
592        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE"));
593        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENCE"));
594        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"license"));
595        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"licence"));
596
597        // Test with extensions
598        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE.txt"));
599        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE.md"));
600        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENCE.txt"));
601        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENCE.md"));
602
603        // Test with specific license types
604        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE-APACHE"));
605        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE-MIT"));
606        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE.MIT"));
607        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE_MIT"));
608        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE-ISC"));
609        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE-UPL"));
610
611        // Test with "license" coming after
612        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"APACHE-LICENSE"));
613
614        // Test version numbers
615        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"APACHE-2"));
616        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"APACHE-2.0"));
617        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"BSD-1"));
618        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"BSD-2"));
619        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"BSD-3"));
620        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"BSD-3-CLAUSE"));
621
622        // Test combinations
623        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE-MIT.txt"));
624        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENCE.ISC.md"));
625        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"license_upl"));
626        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE.APACHE.2.0"));
627
628        // Test case insensitive
629        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"License"));
630        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"license-mit.TXT"));
631        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENCE_isc.MD"));
632
633        // Test edge cases that should match
634        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"license.mit"));
635        assert!(LICENSE_FILE_NAME_REGEX.is_match(b"licence-upl.txt"));
636
637        // Test non-matching patterns
638        assert!(!LICENSE_FILE_NAME_REGEX.is_match(b"COPYING"));
639        assert!(!LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE.html"));
640        assert!(!LICENSE_FILE_NAME_REGEX.is_match(b"MYLICENSE"));
641        assert!(!LICENSE_FILE_NAME_REGEX.is_match(b"src/LICENSE"));
642        assert!(!LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE.old"));
643        assert!(!LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE-GPL"));
644        assert!(!LICENSE_FILE_NAME_REGEX.is_match(b"LICENSEABC"));
645    }
646
647    #[test]
648    fn test_canonicalize_license_text() {
649        let input = "  Paragraph 1\nwith multiple lines\n\n\n\nParagraph 2\nwith more lines\n  ";
650        let expected = "paragraph 1 with multiple lines paragraph 2 with more lines";
651        assert_eq!(canonicalize_license_text(input), expected);
652
653        // Test tabs and mixed whitespace
654        let input = "Word1\t\tWord2\n\n   Word3\r\n\r\n\r\nWord4   ";
655        let expected = "word1 word2 word3 word4";
656        assert_eq!(canonicalize_license_text(input), expected);
657    }
658
659    fn init_test(cx: &mut TestAppContext) {
660        cx.update(|cx| {
661            let settings_store = SettingsStore::test(cx);
662            cx.set_global(settings_store);
663            WorktreeSettings::register(cx);
664        });
665    }
666
667    #[gpui::test]
668    async fn test_watcher_single_file(cx: &mut TestAppContext) {
669        init_test(cx);
670
671        let fs = FakeFs::new(cx.background_executor.clone());
672        fs.insert_tree("/root", json!({ "main.rs": "fn main() {}" }))
673            .await;
674
675        let worktree = Worktree::local(
676            Path::new("/root/main.rs"),
677            true,
678            fs.clone(),
679            Default::default(),
680            &mut cx.to_async(),
681        )
682        .await
683        .unwrap();
684
685        let watcher = cx.update(|cx| LicenseDetectionWatcher::new(&worktree, cx));
686        assert!(matches!(watcher, LicenseDetectionWatcher::SingleFile));
687        assert!(!watcher.is_project_open_source());
688    }
689
690    #[gpui::test]
691    async fn test_watcher_updates_on_changes(cx: &mut TestAppContext) {
692        init_test(cx);
693
694        let fs = FakeFs::new(cx.background_executor.clone());
695        fs.insert_tree("/root", json!({ "main.rs": "fn main() {}" }))
696            .await;
697
698        let worktree = Worktree::local(
699            Path::new("/root"),
700            true,
701            fs.clone(),
702            Default::default(),
703            &mut cx.to_async(),
704        )
705        .await
706        .unwrap();
707
708        let watcher = cx.update(|cx| LicenseDetectionWatcher::new(&worktree, cx));
709        assert!(matches!(watcher, LicenseDetectionWatcher::Local { .. }));
710        assert!(!watcher.is_project_open_source());
711
712        fs.write(Path::new("/root/LICENSE-MIT"), MIT_TXT.as_bytes())
713            .await
714            .unwrap();
715
716        cx.background_executor.run_until_parked();
717        assert!(watcher.is_project_open_source());
718
719        fs.write(Path::new("/root/LICENSE-APACHE"), APACHE_2_0_TXT.as_bytes())
720            .await
721            .unwrap();
722
723        cx.background_executor.run_until_parked();
724        assert!(watcher.is_project_open_source());
725
726        fs.write(Path::new("/root/LICENSE-MIT"), "Nevermind".as_bytes())
727            .await
728            .unwrap();
729
730        // Still considered open source as LICENSE-APACHE is present
731        cx.background_executor.run_until_parked();
732        assert!(watcher.is_project_open_source());
733
734        fs.write(
735            Path::new("/root/LICENSE-APACHE"),
736            "Also nevermind".as_bytes(),
737        )
738        .await
739        .unwrap();
740
741        cx.background_executor.run_until_parked();
742        assert!(!watcher.is_project_open_source());
743    }
744
745    #[gpui::test]
746    async fn test_watcher_initially_opensource_and_then_deleted(cx: &mut TestAppContext) {
747        init_test(cx);
748
749        let fs = FakeFs::new(cx.background_executor.clone());
750        fs.insert_tree(
751            "/root",
752            json!({ "main.rs": "fn main() {}", "LICENSE-MIT": MIT_TXT }),
753        )
754        .await;
755
756        let worktree = Worktree::local(
757            Path::new("/root"),
758            true,
759            fs.clone(),
760            Default::default(),
761            &mut cx.to_async(),
762        )
763        .await
764        .unwrap();
765
766        let watcher = cx.update(|cx| LicenseDetectionWatcher::new(&worktree, cx));
767        assert!(matches!(watcher, LicenseDetectionWatcher::Local { .. }));
768
769        cx.background_executor.run_until_parked();
770        assert!(watcher.is_project_open_source());
771
772        fs.remove_file(
773            Path::new("/root/LICENSE-MIT"),
774            fs::RemoveOptions {
775                recursive: false,
776                ignore_if_not_exists: false,
777            },
778        )
779        .await
780        .unwrap();
781
782        cx.background_executor.run_until_parked();
783        assert!(!watcher.is_project_open_source());
784    }
785}