1use std::{
2 collections::BTreeSet,
3 fmt::{Display, Formatter},
4 ops::Range,
5 path::PathBuf,
6 sync::{Arc, LazyLock},
7};
8
9use anyhow::{Result, anyhow};
10use fs::Fs;
11use futures::StreamExt as _;
12use gpui::{App, AppContext as _, Entity, Subscription, Task};
13use itertools::Itertools;
14use postage::watch;
15use project::Worktree;
16use strum::VariantArray;
17use util::{ResultExt as _, maybe, rel_path::RelPath};
18use worktree::ChildEntriesOptions;
19
20/// Matches the most common license locations, with US and UK English spelling.
21static LICENSE_FILE_NAME_REGEX: LazyLock<regex::bytes::Regex> = LazyLock::new(|| {
22 regex::bytes::RegexBuilder::new(
23 "^ \
24 (?: license | licence)? \
25 (?: [\\-._]? \
26 (?: apache (?: [\\-._] (?: 2.0 | 2 ))? | \
27 0? bsd (?: [\\-._] [0123])? (?: [\\-._] clause)? | \
28 isc | \
29 mit | \
30 upl | \
31 zlib))? \
32 (?: [\\-._]? (?: license | licence))? \
33 (?: \\.txt | \\.md)? \
34 $",
35 )
36 .ignore_whitespace(true)
37 .case_insensitive(true)
38 .build()
39 .unwrap()
40});
41
42#[derive(Debug, Clone, Copy, Eq, Ord, PartialOrd, PartialEq, VariantArray)]
43pub enum OpenSourceLicense {
44 Apache2_0,
45 BSDZero,
46 BSD,
47 ISC,
48 MIT,
49 UPL1_0,
50 Zlib,
51}
52
53impl Display for OpenSourceLicense {
54 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
55 write!(f, "{}", self.spdx_identifier())
56 }
57}
58
59impl OpenSourceLicense {
60 /// These are SPDX identifiers for the licenses, except for BSD, where the variants are not
61 /// distinguished.
62 pub fn spdx_identifier(&self) -> &'static str {
63 match self {
64 OpenSourceLicense::Apache2_0 => "apache-2.0",
65 OpenSourceLicense::BSDZero => "0bsd",
66 OpenSourceLicense::BSD => "bsd",
67 OpenSourceLicense::ISC => "isc",
68 OpenSourceLicense::MIT => "mit",
69 OpenSourceLicense::UPL1_0 => "upl-1.0",
70 OpenSourceLicense::Zlib => "zlib",
71 }
72 }
73
74 pub fn patterns(&self) -> &'static [&'static str] {
75 match self {
76 OpenSourceLicense::Apache2_0 => &[
77 include_str!("../license_patterns/apache-2.0-pattern"),
78 include_str!("../license_patterns/apache-2.0-reference-pattern"),
79 ],
80 OpenSourceLicense::BSDZero => &[include_str!("../license_patterns/0bsd-pattern")],
81 OpenSourceLicense::BSD => &[include_str!("../license_patterns/bsd-pattern")],
82 OpenSourceLicense::ISC => &[include_str!("../license_patterns/isc-pattern")],
83 OpenSourceLicense::MIT => &[include_str!("../license_patterns/mit-pattern")],
84 OpenSourceLicense::UPL1_0 => &[include_str!("../license_patterns/upl-1.0-pattern")],
85 OpenSourceLicense::Zlib => &[include_str!("../license_patterns/zlib-pattern")],
86 }
87 }
88}
89
90// TODO: Consider using databake or similar to not parse at runtime.
91static LICENSE_PATTERNS: LazyLock<LicensePatterns> = LazyLock::new(|| {
92 let mut approximate_max_length = 0;
93 let mut patterns = Vec::new();
94 for license in OpenSourceLicense::VARIANTS {
95 for pattern in license.patterns() {
96 let (pattern, length) = parse_pattern(pattern).unwrap();
97 patterns.push((*license, pattern));
98 approximate_max_length = approximate_max_length.max(length);
99 }
100 }
101 LicensePatterns {
102 patterns,
103 approximate_max_length,
104 }
105});
106
107fn detect_license(text: &str) -> Option<OpenSourceLicense> {
108 let text = canonicalize_license_text(text);
109 for (license, pattern) in LICENSE_PATTERNS.patterns.iter() {
110 log::trace!("Checking if license is {}", license);
111 if check_pattern(&pattern, &text) {
112 return Some(*license);
113 }
114 }
115
116 None
117}
118
119struct LicensePatterns {
120 patterns: Vec<(OpenSourceLicense, Vec<PatternPart>)>,
121 approximate_max_length: usize,
122}
123
124#[derive(Debug, Clone, Default, PartialEq, Eq)]
125struct PatternPart {
126 /// Indicates that matching `text` is optional. Skipping `match_any_chars` is conditional on
127 /// matching `text`.
128 optional: bool,
129 /// Indicates the number of characters that can be skipped before matching `text`.
130 match_any_chars: Range<usize>,
131 /// The text to match, may be empty.
132 text: String,
133}
134
135/// Lines that start with "-- " begin a `PatternPart`. `-- 1..10` specifies `match_any_chars:
136/// 1..10`. `-- 1..10 optional:` additionally specifies `optional: true`. It's a parse error for a
137/// line to start with `--` without matching this format.
138///
139/// Text that does not have `--` prefixes participate in the `text` field and are canonicalized by
140/// lowercasing, replacing all runs of whitespace with a single space, and otherwise only keeping
141/// ascii alphanumeric characters.
142fn parse_pattern(pattern_source: &str) -> Result<(Vec<PatternPart>, usize)> {
143 let mut pattern = Vec::new();
144 let mut part = PatternPart::default();
145 let mut approximate_max_length = 0;
146 for line in pattern_source.lines() {
147 if let Some(directive) = line.trim().strip_prefix("--") {
148 if part != PatternPart::default() {
149 pattern.push(part);
150 part = PatternPart::default();
151 }
152 let valid = maybe!({
153 let directive_chunks = directive.split_whitespace().collect::<Vec<_>>();
154 if !(1..=2).contains(&directive_chunks.len()) {
155 return None;
156 }
157 if directive_chunks.len() == 2 {
158 part.optional = true;
159 }
160 let range_chunks = directive_chunks[0].split("..").collect::<Vec<_>>();
161 if range_chunks.len() != 2 {
162 return None;
163 }
164 part.match_any_chars.start = range_chunks[0].parse::<usize>().ok()?;
165 part.match_any_chars.end = range_chunks[1].parse::<usize>().ok()?;
166 if part.match_any_chars.start > part.match_any_chars.end {
167 return None;
168 }
169 approximate_max_length += part.match_any_chars.end;
170 Some(())
171 });
172 if valid.is_none() {
173 return Err(anyhow!("Invalid pattern directive: {}", line));
174 }
175 continue;
176 }
177 approximate_max_length += line.len() + 1;
178 let line = canonicalize_license_text(line);
179 if line.is_empty() {
180 continue;
181 }
182 if !part.text.is_empty() {
183 part.text.push(' ');
184 }
185 part.text.push_str(&line);
186 }
187 if part != PatternPart::default() {
188 pattern.push(part);
189 }
190 Ok((pattern, approximate_max_length))
191}
192
193/// Checks a pattern against text by iterating over the pattern parts in reverse order, and checking
194/// matches with the end of a prefix of the input. Assumes that `canonicalize_license_text` has
195/// already been applied to the input.
196fn check_pattern(pattern: &[PatternPart], input: &str) -> bool {
197 let mut input_ix = input.len();
198 let mut match_any_chars = 0..0;
199 for part in pattern.iter().rev() {
200 if part.text.is_empty() {
201 match_any_chars.start += part.match_any_chars.start;
202 match_any_chars.end += part.match_any_chars.end;
203 continue;
204 }
205
206 let search_range_end = n_chars_before_offset(match_any_chars.start, input_ix, input);
207 let search_range_start = n_chars_before_offset(
208 match_any_chars.len() + part.text.len(),
209 search_range_end,
210 input,
211 );
212 let found_ix = input[search_range_start..search_range_end].rfind(&part.text);
213
214 if let Some(found_ix) = found_ix {
215 input_ix = search_range_start + found_ix;
216 match_any_chars = part.match_any_chars.clone();
217 } else if !part.optional {
218 log::trace!(
219 "Failed to match pattern\n`...{}`\nagainst input\n`...{}`",
220 &part.text[n_chars_before_offset(128, part.text.len(), &part.text)..],
221 &input[n_chars_before_offset(128, search_range_end, input)..search_range_end],
222 );
223 return false;
224 }
225 }
226 is_char_count_within_range(&input[..input_ix], match_any_chars)
227}
228
229fn n_chars_before_offset(char_count: usize, offset: usize, string: &str) -> usize {
230 if char_count == 0 {
231 return offset;
232 }
233 string[..offset]
234 .char_indices()
235 .nth_back(char_count.saturating_sub(1))
236 .map_or(0, |(byte_ix, _)| byte_ix)
237}
238
239fn is_char_count_within_range(string: &str, char_count_range: Range<usize>) -> bool {
240 if string.len() >= char_count_range.start * 4 && string.len() < char_count_range.end {
241 return true;
242 }
243 if string.len() < char_count_range.start || string.len() >= char_count_range.end * 4 {
244 return false;
245 }
246 char_count_range.contains(&string.chars().count())
247}
248
249/// Canonicalizes license text by removing all non-alphanumeric characters, lowercasing, and turning
250/// runs of whitespace into a single space. Unicode alphanumeric characters are intentionally
251/// preserved since these should cause license mismatch when not within a portion of the license
252/// where arbitrary text is allowed.
253fn canonicalize_license_text(license: &str) -> String {
254 license
255 .chars()
256 .filter(|c| c.is_ascii_whitespace() || c.is_alphanumeric())
257 .map(|c| c.to_ascii_lowercase())
258 .collect::<String>()
259 .split_ascii_whitespace()
260 .join(" ")
261}
262
263pub enum LicenseDetectionWatcher {
264 Local {
265 is_open_source_rx: watch::Receiver<bool>,
266 _is_open_source_task: Task<()>,
267 _worktree_subscription: Subscription,
268 },
269 SingleFile,
270 Remote,
271}
272
273impl LicenseDetectionWatcher {
274 pub fn new(worktree: &Entity<Worktree>, cx: &mut App) -> Self {
275 let worktree_ref = worktree.read(cx);
276 if worktree_ref.is_single_file() {
277 return Self::SingleFile;
278 }
279
280 let (files_to_check_tx, mut files_to_check_rx) = futures::channel::mpsc::unbounded();
281
282 let Worktree::Local(local_worktree) = worktree_ref else {
283 return Self::Remote;
284 };
285 let fs = local_worktree.fs().clone();
286
287 let options = ChildEntriesOptions {
288 include_files: true,
289 include_dirs: false,
290 include_ignored: true,
291 };
292 for top_file in local_worktree.child_entries_with_options(RelPath::empty(), options) {
293 let path_bytes = top_file.path.as_unix_str().as_bytes();
294 if top_file.is_created() && LICENSE_FILE_NAME_REGEX.is_match(path_bytes) {
295 let rel_path = top_file.path.clone();
296 files_to_check_tx.unbounded_send(rel_path).ok();
297 }
298 }
299
300 let _worktree_subscription =
301 cx.subscribe(worktree, move |_worktree, event, _cx| match event {
302 worktree::Event::UpdatedEntries(updated_entries) => {
303 for updated_entry in updated_entries.iter() {
304 let rel_path = &updated_entry.0;
305 let path_bytes = rel_path.as_unix_str().as_bytes();
306 if LICENSE_FILE_NAME_REGEX.is_match(path_bytes) {
307 files_to_check_tx.unbounded_send(rel_path.clone()).ok();
308 }
309 }
310 }
311 worktree::Event::DeletedEntry(_)
312 | worktree::Event::UpdatedGitRepositories(_)
313 | worktree::Event::Deleted => {}
314 });
315
316 let worktree_snapshot = worktree.read(cx).snapshot();
317 let (mut is_open_source_tx, is_open_source_rx) = watch::channel_with::<bool>(false);
318
319 let _is_open_source_task = cx.background_spawn(async move {
320 let mut eligible_licenses = BTreeSet::new();
321 while let Some(rel_path) = files_to_check_rx.next().await {
322 let abs_path = worktree_snapshot.absolutize(&rel_path);
323 let was_open_source = !eligible_licenses.is_empty();
324 if Self::is_path_eligible(&fs, abs_path).await.unwrap_or(false) {
325 eligible_licenses.insert(rel_path);
326 } else {
327 eligible_licenses.remove(&rel_path);
328 }
329 let is_open_source = !eligible_licenses.is_empty();
330 if is_open_source != was_open_source {
331 *is_open_source_tx.borrow_mut() = is_open_source;
332 }
333 }
334 });
335
336 Self::Local {
337 is_open_source_rx,
338 _is_open_source_task,
339 _worktree_subscription,
340 }
341 }
342
343 async fn is_path_eligible(fs: &Arc<dyn Fs>, abs_path: PathBuf) -> Option<bool> {
344 log::debug!("checking if `{abs_path:?}` is an open source license");
345 // resolve symlinks so that the file size from metadata is correct
346 let Some(abs_path) = fs.canonicalize(&abs_path).await.ok() else {
347 log::debug!(
348 "`{abs_path:?}` license file probably deleted (error canonicalizing the path)"
349 );
350 return None;
351 };
352 let metadata = fs.metadata(&abs_path).await.log_err()??;
353 if metadata.len > LICENSE_PATTERNS.approximate_max_length as u64 {
354 log::debug!(
355 "`{abs_path:?}` license file was skipped \
356 because its size of {} bytes was larger than the max size of {} bytes",
357 metadata.len,
358 LICENSE_PATTERNS.approximate_max_length
359 );
360 return None;
361 }
362 let text = fs.load(&abs_path).await.log_err()?;
363 let is_eligible = detect_license(&text).is_some();
364 if is_eligible {
365 log::debug!(
366 "`{abs_path:?}` matches a license that is eligible for data collection (if enabled)"
367 );
368 } else {
369 log::debug!(
370 "`{abs_path:?}` does not match a license that is eligible for data collection"
371 );
372 }
373 Some(is_eligible)
374 }
375
376 /// Answers false until we find out it's open source
377 pub fn is_project_open_source(&self) -> bool {
378 match self {
379 Self::Local {
380 is_open_source_rx, ..
381 } => *is_open_source_rx.borrow(),
382 Self::SingleFile | Self::Remote => false,
383 }
384 }
385}
386
387#[cfg(test)]
388mod tests {
389 use std::path::Path;
390
391 use fs::FakeFs;
392 use gpui::TestAppContext;
393 use project::WorktreeId;
394 use rand::Rng as _;
395 use serde_json::json;
396 use settings::SettingsStore;
397
398 use super::*;
399
400 const APACHE_2_0_TXT: &str = include_str!("../license_examples/apache-2.0-ex0.txt");
401 const ISC_TXT: &str = include_str!("../license_examples/isc.txt");
402 const MIT_TXT: &str = include_str!("../license_examples/mit-ex0.txt");
403 const UPL_1_0_TXT: &str = include_str!("../license_examples/upl-1.0.txt");
404 const BSD_0_TXT: &str = include_str!("../license_examples/0bsd.txt");
405
406 #[track_caller]
407 fn assert_matches_license(text: &str, license: OpenSourceLicense) {
408 assert_eq!(detect_license(text), Some(license));
409 assert!(text.len() < LICENSE_PATTERNS.approximate_max_length);
410 }
411
412 /*
413 // Uncomment this and run with `cargo test -p zeta -- --no-capture &> licenses-output` to
414 // traverse your entire home directory and run license detection on every file that has a
415 // license-like name.
416 #[test]
417 fn test_check_all_licenses_in_home_dir() {
418 let mut detected = Vec::new();
419 let mut unrecognized = Vec::new();
420 let mut walked_entries = 0;
421 let homedir = std::env::home_dir().unwrap();
422 for entry in walkdir::WalkDir::new(&homedir) {
423 walked_entries += 1;
424 if walked_entries % 10000 == 0 {
425 println!(
426 "So far visited {} files in {}",
427 walked_entries,
428 homedir.display()
429 );
430 }
431 let Ok(entry) = entry else {
432 continue;
433 };
434 if !LICENSE_FILE_NAME_REGEX.is_match(entry.file_name().as_encoded_bytes()) {
435 continue;
436 }
437 let Ok(contents) = std::fs::read_to_string(entry.path()) else {
438 continue;
439 };
440 let path_string = entry.path().to_string_lossy().into_owned();
441 let license = detect_license(&contents);
442 match license {
443 Some(license) => detected.push((license, path_string)),
444 None => unrecognized.push(path_string),
445 }
446 }
447 println!("\nDetected licenses:\n");
448 detected.sort();
449 for (license, path) in &detected {
450 println!("{}: {}", license.spdx_identifier(), path);
451 }
452 println!("\nUnrecognized licenses:\n");
453 for path in &unrecognized {
454 println!("{}", path);
455 }
456 panic!(
457 "{} licenses detected, {} unrecognized",
458 detected.len(),
459 unrecognized.len()
460 );
461 println!("This line has a warning to make sure this test is always commented out");
462 }
463 */
464
465 #[test]
466 fn test_apache_positive_detection() {
467 assert_matches_license(APACHE_2_0_TXT, OpenSourceLicense::Apache2_0);
468 assert_matches_license(
469 include_str!("../license_examples/apache-2.0-ex1.txt"),
470 OpenSourceLicense::Apache2_0,
471 );
472 assert_matches_license(
473 include_str!("../license_examples/apache-2.0-ex2.txt"),
474 OpenSourceLicense::Apache2_0,
475 );
476 assert_matches_license(
477 include_str!("../license_examples/apache-2.0-ex3.txt"),
478 OpenSourceLicense::Apache2_0,
479 );
480 assert_matches_license(
481 include_str!("../license_examples/apache-2.0-ex4.txt"),
482 OpenSourceLicense::Apache2_0,
483 );
484 assert_matches_license(
485 include_str!("../../../LICENSE-APACHE"),
486 OpenSourceLicense::Apache2_0,
487 );
488 }
489
490 #[test]
491 fn test_apache_negative_detection() {
492 assert_eq!(
493 detect_license(&format!(
494 "{APACHE_2_0_TXT}\n\nThe terms in this license are void if P=NP."
495 )),
496 None
497 );
498 }
499
500 #[test]
501 fn test_bsd_1_clause_positive_detection() {
502 assert_matches_license(
503 include_str!("../license_examples/bsd-1-clause.txt"),
504 OpenSourceLicense::BSD,
505 );
506 }
507
508 #[test]
509 fn test_bsd_2_clause_positive_detection() {
510 assert_matches_license(
511 include_str!("../license_examples/bsd-2-clause-ex0.txt"),
512 OpenSourceLicense::BSD,
513 );
514 }
515
516 #[test]
517 fn test_bsd_3_clause_positive_detection() {
518 assert_matches_license(
519 include_str!("../license_examples/bsd-3-clause-ex0.txt"),
520 OpenSourceLicense::BSD,
521 );
522 assert_matches_license(
523 include_str!("../license_examples/bsd-3-clause-ex1.txt"),
524 OpenSourceLicense::BSD,
525 );
526 assert_matches_license(
527 include_str!("../license_examples/bsd-3-clause-ex2.txt"),
528 OpenSourceLicense::BSD,
529 );
530 assert_matches_license(
531 include_str!("../license_examples/bsd-3-clause-ex3.txt"),
532 OpenSourceLicense::BSD,
533 );
534 assert_matches_license(
535 include_str!("../license_examples/bsd-3-clause-ex4.txt"),
536 OpenSourceLicense::BSD,
537 );
538 }
539
540 #[test]
541 fn test_bsd_0_positive_detection() {
542 assert_matches_license(BSD_0_TXT, OpenSourceLicense::BSDZero);
543 }
544
545 #[test]
546 fn test_isc_positive_detection() {
547 assert_matches_license(ISC_TXT, OpenSourceLicense::ISC);
548 }
549
550 #[test]
551 fn test_isc_negative_detection() {
552 let license_text = format!(
553 r#"{ISC_TXT}
554
555 This project is dual licensed under the ISC License and the MIT License."#
556 );
557
558 assert_eq!(detect_license(&license_text), None);
559 }
560
561 #[test]
562 fn test_mit_positive_detection() {
563 assert_matches_license(MIT_TXT, OpenSourceLicense::MIT);
564 assert_matches_license(
565 include_str!("../license_examples/mit-ex1.txt"),
566 OpenSourceLicense::MIT,
567 );
568 assert_matches_license(
569 include_str!("../license_examples/mit-ex2.txt"),
570 OpenSourceLicense::MIT,
571 );
572 assert_matches_license(
573 include_str!("../license_examples/mit-ex3.txt"),
574 OpenSourceLicense::MIT,
575 );
576 }
577
578 #[test]
579 fn test_mit_negative_detection() {
580 let license_text = format!(
581 r#"{MIT_TXT}
582
583 This project is dual licensed under the MIT License and the Apache License, Version 2.0."#
584 );
585 assert_eq!(detect_license(&license_text), None);
586 }
587
588 #[test]
589 fn test_upl_positive_detection() {
590 assert_matches_license(UPL_1_0_TXT, OpenSourceLicense::UPL1_0);
591 }
592
593 #[test]
594 fn test_upl_negative_detection() {
595 let license_text = format!(
596 r#"{UPL_1_0_TXT}
597
598 This project is dual licensed under the UPL License and the MIT License."#
599 );
600
601 assert_eq!(detect_license(&license_text), None);
602 }
603
604 #[test]
605 fn test_zlib_positive_detection() {
606 assert_matches_license(
607 include_str!("../license_examples/zlib-ex0.txt"),
608 OpenSourceLicense::Zlib,
609 );
610 }
611
612 #[test]
613 fn random_strings_negative_detection() {
614 for _i in 0..20 {
615 let random_string = rand::rng()
616 .sample_iter::<char, _>(rand::distr::StandardUniform)
617 .take(512)
618 .collect::<String>();
619 assert_eq!(detect_license(&random_string), None);
620 }
621 }
622
623 #[test]
624 fn test_n_chars_before_offset() {
625 assert_eq!(n_chars_before_offset(2, 4, "hello"), 2);
626
627 let input = "ㄒ乇丂ㄒ";
628 assert_eq!(n_chars_before_offset(2, input.len(), input), "ㄒ乇".len());
629 }
630
631 #[test]
632 fn test_is_char_count_within_range() {
633 // TODO: make this into a proper property test.
634 for _i in 0..20 {
635 let mut rng = rand::rng();
636 let random_char_count = rng.random_range(0..64);
637 let random_string = rand::rng()
638 .sample_iter::<char, _>(rand::distr::StandardUniform)
639 .take(random_char_count)
640 .collect::<String>();
641 let min_chars = rng.random_range(0..10);
642 let max_chars = rng.random_range(min_chars..32);
643 let char_count_range = min_chars..max_chars;
644 assert_eq!(
645 is_char_count_within_range(&random_string, char_count_range.clone()),
646 char_count_range.contains(&random_char_count),
647 );
648 }
649 }
650
651 #[test]
652 fn test_license_file_name_regex() {
653 // Test basic license file names
654 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE"));
655 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENCE"));
656 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"license"));
657 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"licence"));
658
659 // Test with extensions
660 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE.txt"));
661 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE.md"));
662 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENCE.txt"));
663 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENCE.md"));
664
665 // Test with specific license types
666 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE-APACHE"));
667 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE-MIT"));
668 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE.MIT"));
669 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE_MIT"));
670 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE-ISC"));
671 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE-UPL"));
672
673 // Test with "license" coming after
674 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"APACHE-LICENSE"));
675
676 // Test version numbers
677 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"APACHE-2"));
678 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"APACHE-2.0"));
679 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"BSD-1"));
680 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"BSD-2"));
681 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"BSD-3"));
682 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"BSD-3-CLAUSE"));
683
684 // Test combinations
685 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE-MIT.txt"));
686 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENCE.ISC.md"));
687 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"license_upl"));
688 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE.APACHE.2.0"));
689
690 // Test case insensitive
691 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"License"));
692 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"license-mit.TXT"));
693 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENCE_isc.MD"));
694
695 // Test edge cases that should match
696 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"license.mit"));
697 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"licence-upl.txt"));
698
699 // Test non-matching patterns
700 assert!(!LICENSE_FILE_NAME_REGEX.is_match(b"COPYING"));
701 assert!(!LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE.html"));
702 assert!(!LICENSE_FILE_NAME_REGEX.is_match(b"MYLICENSE"));
703 assert!(!LICENSE_FILE_NAME_REGEX.is_match(b"src/LICENSE"));
704 assert!(!LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE.old"));
705 assert!(!LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE-GPL"));
706 assert!(!LICENSE_FILE_NAME_REGEX.is_match(b"LICENSEABC"));
707 }
708
709 #[test]
710 fn test_canonicalize_license_text() {
711 let input = " Paragraph 1\nwith multiple lines\n\n\n\nParagraph 2\nwith more lines\n ";
712 let expected = "paragraph 1 with multiple lines paragraph 2 with more lines";
713 assert_eq!(canonicalize_license_text(input), expected);
714
715 // Test tabs and mixed whitespace
716 let input = "Word1\t\tWord2\n\n Word3\r\n\r\n\r\nWord4 ";
717 let expected = "word1 word2 word3 word4";
718 assert_eq!(canonicalize_license_text(input), expected);
719 }
720
721 fn init_test(cx: &mut TestAppContext) {
722 cx.update(|cx| {
723 let settings_store = SettingsStore::test(cx);
724 cx.set_global(settings_store);
725 });
726 }
727
728 #[gpui::test]
729 async fn test_watcher_single_file(cx: &mut TestAppContext) {
730 init_test(cx);
731
732 let fs = FakeFs::new(cx.background_executor.clone());
733 fs.insert_tree("/root", json!({ "main.rs": "fn main() {}" }))
734 .await;
735
736 let worktree = Worktree::local(
737 Path::new("/root/main.rs"),
738 true,
739 fs.clone(),
740 Default::default(),
741 true,
742 WorktreeId::from_proto(0),
743 &mut cx.to_async(),
744 )
745 .await
746 .unwrap();
747
748 let watcher = cx.update(|cx| LicenseDetectionWatcher::new(&worktree, cx));
749 assert!(matches!(watcher, LicenseDetectionWatcher::SingleFile));
750 assert!(!watcher.is_project_open_source());
751 }
752
753 #[gpui::test]
754 async fn test_watcher_updates_on_changes(cx: &mut TestAppContext) {
755 init_test(cx);
756
757 let fs = FakeFs::new(cx.background_executor.clone());
758 fs.insert_tree("/root", json!({ "main.rs": "fn main() {}" }))
759 .await;
760
761 let worktree = Worktree::local(
762 Path::new("/root"),
763 true,
764 fs.clone(),
765 Default::default(),
766 true,
767 WorktreeId::from_proto(0),
768 &mut cx.to_async(),
769 )
770 .await
771 .unwrap();
772
773 let watcher = cx.update(|cx| LicenseDetectionWatcher::new(&worktree, cx));
774 assert!(matches!(watcher, LicenseDetectionWatcher::Local { .. }));
775 assert!(!watcher.is_project_open_source());
776
777 fs.write(Path::new("/root/LICENSE-MIT"), MIT_TXT.as_bytes())
778 .await
779 .unwrap();
780
781 cx.background_executor.run_until_parked();
782 assert!(watcher.is_project_open_source());
783
784 fs.write(Path::new("/root/LICENSE-APACHE"), APACHE_2_0_TXT.as_bytes())
785 .await
786 .unwrap();
787
788 cx.background_executor.run_until_parked();
789 assert!(watcher.is_project_open_source());
790
791 fs.write(Path::new("/root/LICENSE-MIT"), "Nevermind".as_bytes())
792 .await
793 .unwrap();
794
795 // Still considered open source as LICENSE-APACHE is present
796 cx.background_executor.run_until_parked();
797 assert!(watcher.is_project_open_source());
798
799 fs.write(
800 Path::new("/root/LICENSE-APACHE"),
801 "Also nevermind".as_bytes(),
802 )
803 .await
804 .unwrap();
805
806 cx.background_executor.run_until_parked();
807 assert!(!watcher.is_project_open_source());
808 }
809
810 #[gpui::test]
811 async fn test_watcher_initially_opensource_and_then_deleted(cx: &mut TestAppContext) {
812 init_test(cx);
813
814 let fs = FakeFs::new(cx.background_executor.clone());
815 fs.insert_tree(
816 "/root",
817 json!({ "main.rs": "fn main() {}", "LICENSE-MIT": MIT_TXT }),
818 )
819 .await;
820
821 let worktree = Worktree::local(
822 Path::new("/root"),
823 true,
824 fs.clone(),
825 Default::default(),
826 true,
827 WorktreeId::from_proto(0),
828 &mut cx.to_async(),
829 )
830 .await
831 .unwrap();
832
833 let watcher = cx.update(|cx| LicenseDetectionWatcher::new(&worktree, cx));
834 assert!(matches!(watcher, LicenseDetectionWatcher::Local { .. }));
835
836 cx.background_executor.run_until_parked();
837 assert!(watcher.is_project_open_source());
838
839 fs.remove_file(
840 Path::new("/root/LICENSE-MIT"),
841 fs::RemoveOptions {
842 recursive: false,
843 ignore_if_not_exists: false,
844 },
845 )
846 .await
847 .unwrap();
848
849 cx.background_executor.run_until_parked();
850 assert!(!watcher.is_project_open_source());
851 }
852}