1use std::{
2 collections::BTreeSet,
3 fmt::{Display, Formatter},
4 ops::Range,
5 path::PathBuf,
6 sync::{Arc, LazyLock},
7};
8
9use anyhow::{Result, anyhow};
10use fs::Fs;
11use futures::StreamExt as _;
12use gpui::{App, AppContext as _, Entity, Subscription, Task};
13use itertools::Itertools;
14use postage::watch;
15use project::Worktree;
16use strum::VariantArray;
17use util::{ResultExt as _, maybe, rel_path::RelPath};
18use worktree::ChildEntriesOptions;
19
20/// Matches the most common license locations, with US and UK English spelling.
21static LICENSE_FILE_NAME_REGEX: LazyLock<regex::bytes::Regex> = LazyLock::new(|| {
22 regex::bytes::RegexBuilder::new(
23 "^ \
24 (?: \
25 (?: license | licence) \
26 (?: [\\-._]? \
27 (?: apache (?: [\\-._] (?: 2.0 | 2 ))? | \
28 0? bsd (?: [\\-._] [0123])? (?: [\\-._] clause)? | \
29 isc | \
30 mit | \
31 upl | \
32 zlib))? \
33 | \
34 (?: apache (?: [\\-._] (?: 2.0 | 2 ))? | \
35 0? bsd (?: [\\-._] [0123])? (?: [\\-._] clause)? | \
36 isc | \
37 mit | \
38 upl | \
39 zlib) \
40 ) \
41 (?: [\\-._]? (?: license | licence))? \
42 (?: \\.txt | \\.md)? \
43 $",
44 )
45 .ignore_whitespace(true)
46 .case_insensitive(true)
47 .build()
48 .unwrap()
49});
50
51#[derive(Debug, Clone, Copy, Eq, Ord, PartialOrd, PartialEq, VariantArray)]
52pub enum OpenSourceLicense {
53 Apache2_0,
54 BSDZero,
55 BSD,
56 ISC,
57 MIT,
58 UPL1_0,
59 Zlib,
60}
61
62impl Display for OpenSourceLicense {
63 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
64 write!(f, "{}", self.spdx_identifier())
65 }
66}
67
68impl OpenSourceLicense {
69 /// These are SPDX identifiers for the licenses, except for BSD, where the variants are not
70 /// distinguished.
71 pub fn spdx_identifier(&self) -> &'static str {
72 match self {
73 OpenSourceLicense::Apache2_0 => "apache-2.0",
74 OpenSourceLicense::BSDZero => "0bsd",
75 OpenSourceLicense::BSD => "bsd",
76 OpenSourceLicense::ISC => "isc",
77 OpenSourceLicense::MIT => "mit",
78 OpenSourceLicense::UPL1_0 => "upl-1.0",
79 OpenSourceLicense::Zlib => "zlib",
80 }
81 }
82
83 pub fn patterns(&self) -> &'static [&'static str] {
84 match self {
85 OpenSourceLicense::Apache2_0 => &[
86 include_str!("../license_patterns/apache-2.0-pattern"),
87 include_str!("../license_patterns/apache-2.0-reference-pattern"),
88 ],
89 OpenSourceLicense::BSDZero => &[include_str!("../license_patterns/0bsd-pattern")],
90 OpenSourceLicense::BSD => &[include_str!("../license_patterns/bsd-pattern")],
91 OpenSourceLicense::ISC => &[include_str!("../license_patterns/isc-pattern")],
92 OpenSourceLicense::MIT => &[include_str!("../license_patterns/mit-pattern")],
93 OpenSourceLicense::UPL1_0 => &[include_str!("../license_patterns/upl-1.0-pattern")],
94 OpenSourceLicense::Zlib => &[include_str!("../license_patterns/zlib-pattern")],
95 }
96 }
97}
98
99// TODO: Consider using databake or similar to not parse at runtime.
100static LICENSE_PATTERNS: LazyLock<LicensePatterns> = LazyLock::new(|| {
101 let mut approximate_max_length = 0;
102 let mut patterns = Vec::new();
103 for license in OpenSourceLicense::VARIANTS {
104 for pattern in license.patterns() {
105 let (pattern, length) = parse_pattern(pattern).unwrap();
106 patterns.push((*license, pattern));
107 approximate_max_length = approximate_max_length.max(length);
108 }
109 }
110 LicensePatterns {
111 patterns,
112 approximate_max_length,
113 }
114});
115
116fn detect_license(text: &str) -> Option<OpenSourceLicense> {
117 let text = canonicalize_license_text(text);
118 for (license, pattern) in LICENSE_PATTERNS.patterns.iter() {
119 log::trace!("Checking if license is {}", license);
120 if check_pattern(&pattern, &text) {
121 return Some(*license);
122 }
123 }
124
125 None
126}
127
128struct LicensePatterns {
129 patterns: Vec<(OpenSourceLicense, Vec<PatternPart>)>,
130 approximate_max_length: usize,
131}
132
133#[derive(Debug, Clone, Default, PartialEq, Eq)]
134struct PatternPart {
135 /// Indicates that matching `text` is optional. Skipping `match_any_chars` is conditional on
136 /// matching `text`.
137 optional: bool,
138 /// Indicates the number of characters that can be skipped before matching `text`.
139 match_any_chars: Range<usize>,
140 /// The text to match, may be empty.
141 text: String,
142}
143
144/// Lines that start with "-- " begin a `PatternPart`. `-- 1..10` specifies `match_any_chars:
145/// 1..10`. `-- 1..10 optional:` additionally specifies `optional: true`. It's a parse error for a
146/// line to start with `--` without matching this format.
147///
148/// Text that does not have `--` prefixes participate in the `text` field and are canonicalized by
149/// lowercasing, replacing all runs of whitespace with a single space, and otherwise only keeping
150/// ascii alphanumeric characters.
151fn parse_pattern(pattern_source: &str) -> Result<(Vec<PatternPart>, usize)> {
152 let mut pattern = Vec::new();
153 let mut part = PatternPart::default();
154 let mut approximate_max_length = 0;
155 for line in pattern_source.lines() {
156 if let Some(directive) = line.trim().strip_prefix("--") {
157 if part != PatternPart::default() {
158 pattern.push(part);
159 part = PatternPart::default();
160 }
161 let valid = maybe!({
162 let directive_chunks = directive.split_whitespace().collect::<Vec<_>>();
163 if !(1..=2).contains(&directive_chunks.len()) {
164 return None;
165 }
166 if directive_chunks.len() == 2 {
167 part.optional = true;
168 }
169 let range_chunks = directive_chunks[0].split("..").collect::<Vec<_>>();
170 if range_chunks.len() != 2 {
171 return None;
172 }
173 part.match_any_chars.start = range_chunks[0].parse::<usize>().ok()?;
174 part.match_any_chars.end = range_chunks[1].parse::<usize>().ok()?;
175 if part.match_any_chars.start > part.match_any_chars.end {
176 return None;
177 }
178 approximate_max_length += part.match_any_chars.end;
179 Some(())
180 });
181 if valid.is_none() {
182 return Err(anyhow!("Invalid pattern directive: {}", line));
183 }
184 continue;
185 }
186 approximate_max_length += line.len() + 1;
187 let line = canonicalize_license_text(line);
188 if line.is_empty() {
189 continue;
190 }
191 if !part.text.is_empty() {
192 part.text.push(' ');
193 }
194 part.text.push_str(&line);
195 }
196 if part != PatternPart::default() {
197 pattern.push(part);
198 }
199 Ok((pattern, approximate_max_length))
200}
201
202/// Checks a pattern against text by iterating over the pattern parts in reverse order, and checking
203/// matches with the end of a prefix of the input. Assumes that `canonicalize_license_text` has
204/// already been applied to the input.
205fn check_pattern(pattern: &[PatternPart], input: &str) -> bool {
206 let mut input_ix = input.len();
207 let mut match_any_chars = 0..0;
208 for part in pattern.iter().rev() {
209 if part.text.is_empty() {
210 match_any_chars.start += part.match_any_chars.start;
211 match_any_chars.end += part.match_any_chars.end;
212 continue;
213 }
214
215 let search_range_end = n_chars_before_offset(match_any_chars.start, input_ix, input);
216 let search_range_start = n_chars_before_offset(
217 match_any_chars.len() + part.text.len(),
218 search_range_end,
219 input,
220 );
221 let found_ix = input[search_range_start..search_range_end].rfind(&part.text);
222
223 if let Some(found_ix) = found_ix {
224 input_ix = search_range_start + found_ix;
225 match_any_chars = part.match_any_chars.clone();
226 } else if !part.optional {
227 log::trace!(
228 "Failed to match pattern\n`...{}`\nagainst input\n`...{}`",
229 &part.text[n_chars_before_offset(128, part.text.len(), &part.text)..],
230 &input[n_chars_before_offset(128, search_range_end, input)..search_range_end],
231 );
232 return false;
233 }
234 }
235 is_char_count_within_range(&input[..input_ix], match_any_chars)
236}
237
238fn n_chars_before_offset(char_count: usize, offset: usize, string: &str) -> usize {
239 if char_count == 0 {
240 return offset;
241 }
242 string[..offset]
243 .char_indices()
244 .nth_back(char_count.saturating_sub(1))
245 .map_or(0, |(byte_ix, _)| byte_ix)
246}
247
248fn is_char_count_within_range(string: &str, char_count_range: Range<usize>) -> bool {
249 if string.len() >= char_count_range.start * 4 && string.len() < char_count_range.end {
250 return true;
251 }
252 if string.len() < char_count_range.start || string.len() >= char_count_range.end * 4 {
253 return false;
254 }
255 char_count_range.contains(&string.chars().count())
256}
257
258/// Canonicalizes license text by removing all non-alphanumeric characters, lowercasing, and turning
259/// runs of whitespace into a single space. Unicode alphanumeric characters are intentionally
260/// preserved since these should cause license mismatch when not within a portion of the license
261/// where arbitrary text is allowed.
262fn canonicalize_license_text(license: &str) -> String {
263 license
264 .chars()
265 .filter(|c| c.is_ascii_whitespace() || c.is_alphanumeric())
266 .map(|c| c.to_ascii_lowercase())
267 .collect::<String>()
268 .split_ascii_whitespace()
269 .join(" ")
270}
271
272pub enum LicenseDetectionWatcher {
273 Local {
274 is_open_source_rx: watch::Receiver<bool>,
275 _is_open_source_task: Task<()>,
276 _worktree_subscription: Subscription,
277 },
278 SingleFile,
279 Remote,
280}
281
282impl LicenseDetectionWatcher {
283 pub fn new(worktree: &Entity<Worktree>, cx: &mut App) -> Self {
284 let worktree_ref = worktree.read(cx);
285 if worktree_ref.is_single_file() {
286 return Self::SingleFile;
287 }
288
289 let (files_to_check_tx, mut files_to_check_rx) = futures::channel::mpsc::unbounded();
290
291 let Worktree::Local(local_worktree) = worktree_ref else {
292 return Self::Remote;
293 };
294 let fs = local_worktree.fs().clone();
295
296 let options = ChildEntriesOptions {
297 include_files: true,
298 include_dirs: false,
299 include_ignored: true,
300 };
301 for top_file in local_worktree.child_entries_with_options(RelPath::empty(), options) {
302 let path_bytes = top_file.path.as_unix_str().as_bytes();
303 if top_file.is_created() && LICENSE_FILE_NAME_REGEX.is_match(path_bytes) {
304 let rel_path = top_file.path.clone();
305 files_to_check_tx.unbounded_send(rel_path).ok();
306 }
307 }
308
309 let _worktree_subscription =
310 cx.subscribe(worktree, move |_worktree, event, _cx| match event {
311 worktree::Event::UpdatedEntries(updated_entries) => {
312 for updated_entry in updated_entries.iter() {
313 let rel_path = &updated_entry.0;
314 let path_bytes = rel_path.as_unix_str().as_bytes();
315 if LICENSE_FILE_NAME_REGEX.is_match(path_bytes) {
316 files_to_check_tx.unbounded_send(rel_path.clone()).ok();
317 }
318 }
319 }
320 worktree::Event::DeletedEntry(_)
321 | worktree::Event::UpdatedGitRepositories(_)
322 | worktree::Event::UpdatedRootRepoCommonDir
323 | worktree::Event::Deleted => {}
324 });
325
326 let worktree_snapshot = worktree.read(cx).snapshot();
327 let (mut is_open_source_tx, is_open_source_rx) = watch::channel_with::<bool>(false);
328
329 let _is_open_source_task = cx.background_spawn(async move {
330 let mut eligible_licenses = BTreeSet::new();
331 while let Some(rel_path) = files_to_check_rx.next().await {
332 let abs_path = worktree_snapshot.absolutize(&rel_path);
333 let was_open_source = !eligible_licenses.is_empty();
334 if Self::is_path_eligible(&fs, abs_path).await.unwrap_or(false) {
335 eligible_licenses.insert(rel_path);
336 } else {
337 eligible_licenses.remove(&rel_path);
338 }
339 let is_open_source = !eligible_licenses.is_empty();
340 if is_open_source != was_open_source {
341 *is_open_source_tx.borrow_mut() = is_open_source;
342 }
343 }
344 });
345
346 Self::Local {
347 is_open_source_rx,
348 _is_open_source_task,
349 _worktree_subscription,
350 }
351 }
352
353 async fn is_path_eligible(fs: &Arc<dyn Fs>, abs_path: PathBuf) -> Option<bool> {
354 log::debug!("checking if `{abs_path:?}` is an open source license");
355 // resolve symlinks so that the file size from metadata is correct
356 let Some(abs_path) = fs.canonicalize(&abs_path).await.ok() else {
357 log::debug!(
358 "`{abs_path:?}` license file probably deleted (error canonicalizing the path)"
359 );
360 return None;
361 };
362 let metadata = fs.metadata(&abs_path).await.log_err()??;
363 if metadata.is_dir {
364 return None;
365 }
366 if metadata.len > LICENSE_PATTERNS.approximate_max_length as u64 {
367 log::debug!(
368 "`{abs_path:?}` license file was skipped \
369 because its size of {} bytes was larger than the max size of {} bytes",
370 metadata.len,
371 LICENSE_PATTERNS.approximate_max_length
372 );
373 return None;
374 }
375 let text = fs.load(&abs_path).await.log_err()?;
376 let is_eligible = detect_license(&text).is_some();
377 if is_eligible {
378 log::debug!(
379 "`{abs_path:?}` matches a license that is eligible for data collection (if enabled)"
380 );
381 } else {
382 log::debug!(
383 "`{abs_path:?}` does not match a license that is eligible for data collection"
384 );
385 }
386 Some(is_eligible)
387 }
388
389 /// Answers false until we find out it's open source
390 pub fn is_project_open_source(&self) -> bool {
391 match self {
392 Self::Local {
393 is_open_source_rx, ..
394 } => *is_open_source_rx.borrow(),
395 Self::SingleFile | Self::Remote => false,
396 }
397 }
398}
399
400#[cfg(test)]
401mod tests {
402 use std::path::Path;
403
404 use fs::FakeFs;
405 use gpui::TestAppContext;
406 use project::WorktreeId;
407 use rand::Rng as _;
408 use serde_json::json;
409 use settings::SettingsStore;
410
411 use super::*;
412
413 const APACHE_2_0_TXT: &str = include_str!("../license_examples/apache-2.0-ex0.txt");
414 const ISC_TXT: &str = include_str!("../license_examples/isc.txt");
415 const MIT_TXT: &str = include_str!("../license_examples/mit-ex0.txt");
416 const UPL_1_0_TXT: &str = include_str!("../license_examples/upl-1.0.txt");
417 const BSD_0_TXT: &str = include_str!("../license_examples/0bsd.txt");
418
419 #[track_caller]
420 fn assert_matches_license(text: &str, license: OpenSourceLicense) {
421 assert_eq!(detect_license(text), Some(license));
422 assert!(text.len() < LICENSE_PATTERNS.approximate_max_length);
423 }
424
425 /*
426 // Uncomment this and run with `cargo test -p zeta -- --no-capture &> licenses-output` to
427 // traverse your entire home directory and run license detection on every file that has a
428 // license-like name.
429 #[test]
430 fn test_check_all_licenses_in_home_dir() {
431 let mut detected = Vec::new();
432 let mut unrecognized = Vec::new();
433 let mut walked_entries = 0;
434 let homedir = std::env::home_dir().unwrap();
435 for entry in walkdir::WalkDir::new(&homedir) {
436 walked_entries += 1;
437 if walked_entries % 10000 == 0 {
438 println!(
439 "So far visited {} files in {}",
440 walked_entries,
441 homedir.display()
442 );
443 }
444 let Ok(entry) = entry else {
445 continue;
446 };
447 if !LICENSE_FILE_NAME_REGEX.is_match(entry.file_name().as_encoded_bytes()) {
448 continue;
449 }
450 let Ok(contents) = std::fs::read_to_string(entry.path()) else {
451 continue;
452 };
453 let path_string = entry.path().to_string_lossy().into_owned();
454 let license = detect_license(&contents);
455 match license {
456 Some(license) => detected.push((license, path_string)),
457 None => unrecognized.push(path_string),
458 }
459 }
460 println!("\nDetected licenses:\n");
461 detected.sort();
462 for (license, path) in &detected {
463 println!("{}: {}", license.spdx_identifier(), path);
464 }
465 println!("\nUnrecognized licenses:\n");
466 for path in &unrecognized {
467 println!("{}", path);
468 }
469 panic!(
470 "{} licenses detected, {} unrecognized",
471 detected.len(),
472 unrecognized.len()
473 );
474 println!("This line has a warning to make sure this test is always commented out");
475 }
476 */
477
478 #[test]
479 fn test_apache_positive_detection() {
480 assert_matches_license(APACHE_2_0_TXT, OpenSourceLicense::Apache2_0);
481 assert_matches_license(
482 include_str!("../license_examples/apache-2.0-ex1.txt"),
483 OpenSourceLicense::Apache2_0,
484 );
485 assert_matches_license(
486 include_str!("../license_examples/apache-2.0-ex2.txt"),
487 OpenSourceLicense::Apache2_0,
488 );
489 assert_matches_license(
490 include_str!("../license_examples/apache-2.0-ex3.txt"),
491 OpenSourceLicense::Apache2_0,
492 );
493 assert_matches_license(
494 include_str!("../license_examples/apache-2.0-ex4.txt"),
495 OpenSourceLicense::Apache2_0,
496 );
497 assert_matches_license(
498 include_str!("../../../LICENSE-APACHE"),
499 OpenSourceLicense::Apache2_0,
500 );
501 }
502
503 #[test]
504 fn test_apache_negative_detection() {
505 assert_eq!(
506 detect_license(&format!(
507 "{APACHE_2_0_TXT}\n\nThe terms in this license are void if P=NP."
508 )),
509 None
510 );
511 }
512
513 #[test]
514 fn test_bsd_1_clause_positive_detection() {
515 assert_matches_license(
516 include_str!("../license_examples/bsd-1-clause.txt"),
517 OpenSourceLicense::BSD,
518 );
519 }
520
521 #[test]
522 fn test_bsd_2_clause_positive_detection() {
523 assert_matches_license(
524 include_str!("../license_examples/bsd-2-clause-ex0.txt"),
525 OpenSourceLicense::BSD,
526 );
527 }
528
529 #[test]
530 fn test_bsd_3_clause_positive_detection() {
531 assert_matches_license(
532 include_str!("../license_examples/bsd-3-clause-ex0.txt"),
533 OpenSourceLicense::BSD,
534 );
535 assert_matches_license(
536 include_str!("../license_examples/bsd-3-clause-ex1.txt"),
537 OpenSourceLicense::BSD,
538 );
539 assert_matches_license(
540 include_str!("../license_examples/bsd-3-clause-ex2.txt"),
541 OpenSourceLicense::BSD,
542 );
543 assert_matches_license(
544 include_str!("../license_examples/bsd-3-clause-ex3.txt"),
545 OpenSourceLicense::BSD,
546 );
547 assert_matches_license(
548 include_str!("../license_examples/bsd-3-clause-ex4.txt"),
549 OpenSourceLicense::BSD,
550 );
551 }
552
553 #[test]
554 fn test_bsd_0_positive_detection() {
555 assert_matches_license(BSD_0_TXT, OpenSourceLicense::BSDZero);
556 }
557
558 #[test]
559 fn test_isc_positive_detection() {
560 assert_matches_license(ISC_TXT, OpenSourceLicense::ISC);
561 }
562
563 #[test]
564 fn test_isc_negative_detection() {
565 let license_text = format!(
566 r#"{ISC_TXT}
567
568 This project is dual licensed under the ISC License and the MIT License."#
569 );
570
571 assert_eq!(detect_license(&license_text), None);
572 }
573
574 #[test]
575 fn test_mit_positive_detection() {
576 assert_matches_license(MIT_TXT, OpenSourceLicense::MIT);
577 assert_matches_license(
578 include_str!("../license_examples/mit-ex1.txt"),
579 OpenSourceLicense::MIT,
580 );
581 assert_matches_license(
582 include_str!("../license_examples/mit-ex2.txt"),
583 OpenSourceLicense::MIT,
584 );
585 assert_matches_license(
586 include_str!("../license_examples/mit-ex3.txt"),
587 OpenSourceLicense::MIT,
588 );
589 }
590
591 #[test]
592 fn test_mit_negative_detection() {
593 let license_text = format!(
594 r#"{MIT_TXT}
595
596 This project is dual licensed under the MIT License and the Apache License, Version 2.0."#
597 );
598 assert_eq!(detect_license(&license_text), None);
599 }
600
601 #[test]
602 fn test_upl_positive_detection() {
603 assert_matches_license(UPL_1_0_TXT, OpenSourceLicense::UPL1_0);
604 }
605
606 #[test]
607 fn test_upl_negative_detection() {
608 let license_text = format!(
609 r#"{UPL_1_0_TXT}
610
611 This project is dual licensed under the UPL License and the MIT License."#
612 );
613
614 assert_eq!(detect_license(&license_text), None);
615 }
616
617 #[test]
618 fn test_zlib_positive_detection() {
619 assert_matches_license(
620 include_str!("../license_examples/zlib-ex0.txt"),
621 OpenSourceLicense::Zlib,
622 );
623 }
624
625 #[test]
626 fn random_strings_negative_detection() {
627 for _i in 0..20 {
628 let random_string = rand::rng()
629 .sample_iter::<char, _>(rand::distr::StandardUniform)
630 .take(512)
631 .collect::<String>();
632 assert_eq!(detect_license(&random_string), None);
633 }
634 }
635
636 #[test]
637 fn test_n_chars_before_offset() {
638 assert_eq!(n_chars_before_offset(2, 4, "hello"), 2);
639
640 let input = "ㄒ乇丂ㄒ";
641 assert_eq!(n_chars_before_offset(2, input.len(), input), "ㄒ乇".len());
642 }
643
644 #[test]
645 fn test_is_char_count_within_range() {
646 // TODO: make this into a proper property test.
647 for _i in 0..20 {
648 let mut rng = rand::rng();
649 let random_char_count = rng.random_range(0..64);
650 let random_string = rand::rng()
651 .sample_iter::<char, _>(rand::distr::StandardUniform)
652 .take(random_char_count)
653 .collect::<String>();
654 let min_chars = rng.random_range(0..10);
655 let max_chars = rng.random_range(min_chars..32);
656 let char_count_range = min_chars..max_chars;
657 assert_eq!(
658 is_char_count_within_range(&random_string, char_count_range.clone()),
659 char_count_range.contains(&random_char_count),
660 );
661 }
662 }
663
664 #[test]
665 fn test_license_file_name_regex() {
666 // Test basic license file names
667 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE"));
668 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENCE"));
669 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"license"));
670 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"licence"));
671
672 // Test with extensions
673 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE.txt"));
674 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE.md"));
675 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENCE.txt"));
676 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENCE.md"));
677
678 // Test with specific license types
679 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE-APACHE"));
680 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE-MIT"));
681 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE.MIT"));
682 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE_MIT"));
683 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE-ISC"));
684 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE-UPL"));
685
686 // Test with "license" coming after
687 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"APACHE-LICENSE"));
688
689 // Test version numbers
690 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"APACHE-2"));
691 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"APACHE-2.0"));
692 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"BSD-1"));
693 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"BSD-2"));
694 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"BSD-3"));
695 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"BSD-3-CLAUSE"));
696
697 // Test combinations
698 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE-MIT.txt"));
699 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENCE.ISC.md"));
700 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"license_upl"));
701 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE.APACHE.2.0"));
702
703 // Test case insensitive
704 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"License"));
705 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"license-mit.TXT"));
706 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"LICENCE_isc.MD"));
707
708 // Test edge cases that should match
709 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"license.mit"));
710 assert!(LICENSE_FILE_NAME_REGEX.is_match(b"licence-upl.txt"));
711
712 // Test non-matching patterns
713 assert!(!LICENSE_FILE_NAME_REGEX.is_match(b""));
714 assert!(!LICENSE_FILE_NAME_REGEX.is_match(b"COPYING"));
715 assert!(!LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE.html"));
716 assert!(!LICENSE_FILE_NAME_REGEX.is_match(b"MYLICENSE"));
717 assert!(!LICENSE_FILE_NAME_REGEX.is_match(b"src/LICENSE"));
718 assert!(!LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE.old"));
719 assert!(!LICENSE_FILE_NAME_REGEX.is_match(b"LICENSE-GPL"));
720 assert!(!LICENSE_FILE_NAME_REGEX.is_match(b"LICENSEABC"));
721 }
722
723 #[test]
724 fn test_canonicalize_license_text() {
725 let input = " Paragraph 1\nwith multiple lines\n\n\n\nParagraph 2\nwith more lines\n ";
726 let expected = "paragraph 1 with multiple lines paragraph 2 with more lines";
727 assert_eq!(canonicalize_license_text(input), expected);
728
729 // Test tabs and mixed whitespace
730 let input = "Word1\t\tWord2\n\n Word3\r\n\r\n\r\nWord4 ";
731 let expected = "word1 word2 word3 word4";
732 assert_eq!(canonicalize_license_text(input), expected);
733 }
734
735 fn init_test(cx: &mut TestAppContext) {
736 cx.update(|cx| {
737 let settings_store = SettingsStore::test(cx);
738 cx.set_global(settings_store);
739 });
740 }
741
742 #[gpui::test]
743 async fn test_watcher_single_file(cx: &mut TestAppContext) {
744 init_test(cx);
745
746 let fs = FakeFs::new(cx.background_executor.clone());
747 fs.insert_tree("/root", json!({ "main.rs": "fn main() {}" }))
748 .await;
749
750 let worktree = Worktree::local(
751 Path::new("/root/main.rs"),
752 true,
753 fs.clone(),
754 Default::default(),
755 true,
756 WorktreeId::from_proto(0),
757 &mut cx.to_async(),
758 )
759 .await
760 .unwrap();
761
762 let watcher = cx.update(|cx| LicenseDetectionWatcher::new(&worktree, cx));
763 assert!(matches!(watcher, LicenseDetectionWatcher::SingleFile));
764 assert!(!watcher.is_project_open_source());
765 }
766
767 #[gpui::test]
768 async fn test_watcher_updates_on_changes(cx: &mut TestAppContext) {
769 init_test(cx);
770
771 let fs = FakeFs::new(cx.background_executor.clone());
772 fs.insert_tree("/root", json!({ "main.rs": "fn main() {}" }))
773 .await;
774
775 let worktree = Worktree::local(
776 Path::new("/root"),
777 true,
778 fs.clone(),
779 Default::default(),
780 true,
781 WorktreeId::from_proto(0),
782 &mut cx.to_async(),
783 )
784 .await
785 .unwrap();
786
787 let watcher = cx.update(|cx| LicenseDetectionWatcher::new(&worktree, cx));
788 assert!(matches!(watcher, LicenseDetectionWatcher::Local { .. }));
789 assert!(!watcher.is_project_open_source());
790
791 fs.write(Path::new("/root/LICENSE-MIT"), MIT_TXT.as_bytes())
792 .await
793 .unwrap();
794
795 cx.background_executor.run_until_parked();
796 assert!(watcher.is_project_open_source());
797
798 fs.write(Path::new("/root/LICENSE-APACHE"), APACHE_2_0_TXT.as_bytes())
799 .await
800 .unwrap();
801
802 cx.background_executor.run_until_parked();
803 assert!(watcher.is_project_open_source());
804
805 fs.write(Path::new("/root/LICENSE-MIT"), "Nevermind".as_bytes())
806 .await
807 .unwrap();
808
809 // Still considered open source as LICENSE-APACHE is present
810 cx.background_executor.run_until_parked();
811 assert!(watcher.is_project_open_source());
812
813 fs.write(
814 Path::new("/root/LICENSE-APACHE"),
815 "Also nevermind".as_bytes(),
816 )
817 .await
818 .unwrap();
819
820 cx.background_executor.run_until_parked();
821 assert!(!watcher.is_project_open_source());
822 }
823
824 #[gpui::test]
825 async fn test_watcher_initially_opensource_and_then_deleted(cx: &mut TestAppContext) {
826 init_test(cx);
827
828 let fs = FakeFs::new(cx.background_executor.clone());
829 fs.insert_tree(
830 "/root",
831 json!({ "main.rs": "fn main() {}", "LICENSE-MIT": MIT_TXT }),
832 )
833 .await;
834
835 let worktree = Worktree::local(
836 Path::new("/root"),
837 true,
838 fs.clone(),
839 Default::default(),
840 true,
841 WorktreeId::from_proto(0),
842 &mut cx.to_async(),
843 )
844 .await
845 .unwrap();
846
847 let watcher = cx.update(|cx| LicenseDetectionWatcher::new(&worktree, cx));
848 assert!(matches!(watcher, LicenseDetectionWatcher::Local { .. }));
849
850 cx.background_executor.run_until_parked();
851 assert!(watcher.is_project_open_source());
852
853 fs.remove_file(
854 Path::new("/root/LICENSE-MIT"),
855 fs::RemoveOptions {
856 recursive: false,
857 ignore_if_not_exists: false,
858 },
859 )
860 .await
861 .unwrap();
862
863 cx.background_executor.run_until_parked();
864 assert!(!watcher.is_project_open_source());
865 }
866}