Cargo.lock π
@@ -20840,7 +20840,6 @@ dependencies = [
"tree-sitter-go",
"tree-sitter-rust",
"ui",
- "unindent",
"util",
"uuid",
"workspace",
Michael Sloan created
See discussion on #36564
Adds a simple ad-hoc substring matching pattern language which allows
skipping a bounded number of chars between matched substrings. Before
this change compiling the regex was taking ~120ms on a fast machine and
~8mb of memory. This new version is way faster and uses minimal memory.
Checked the behavior of this vs by running it against 10k licenses that
happened to be in my home dir. There were only 4 differences of behavior
with the regex implementation, and these were false negatives for the
regex implementation that are true positives with the new one.
Of the ~10k licenses in my home dir, ~1k do not match one of these
licenses, usually because it's GPL/MPL/etc.
Release Notes:
- N/A
Cargo.lock | 1
crates/zeta/Cargo.toml | 1
crates/zeta/license_examples/apache-2.0-ex4.txt | 187 ++++
crates/zeta/license_patterns/0bsd-pattern | 11
crates/zeta/license_patterns/apache-2.0-pattern | 65 +
crates/zeta/license_patterns/apache-2.0-reference-pattern | 14
crates/zeta/license_patterns/bsd-pattern | 32
crates/zeta/license_patterns/isc-pattern | 12
crates/zeta/license_patterns/mit-pattern | 18
crates/zeta/license_patterns/upl-1.0-pattern | 32
crates/zeta/license_patterns/zlib-pattern | 21
crates/zeta/license_regexes/0bsd.regex | 10
crates/zeta/license_regexes/apache-2.0.regex | 223 -----
crates/zeta/license_regexes/bsd.regex | 23
crates/zeta/license_regexes/isc.regex | 12
crates/zeta/license_regexes/mit.regex | 17
crates/zeta/license_regexes/upl-1.0.regex | 32
crates/zeta/license_regexes/zlib.regex | 18
crates/zeta/src/license_detection.rs | 352 ++++----
19 files changed, 570 insertions(+), 511 deletions(-)
@@ -20840,7 +20840,6 @@ dependencies = [
"tree-sitter-go",
"tree-sitter-rust",
"ui",
- "unindent",
"util",
"uuid",
"workspace",
@@ -78,7 +78,6 @@ settings = { workspace = true, features = ["test-support"] }
theme = { workspace = true, features = ["test-support"] }
tree-sitter-go.workspace = true
tree-sitter-rust.workspace = true
-unindent.workspace = true
workspace = { workspace = true, features = ["test-support"] }
worktree = { workspace = true, features = ["test-support"] }
zlog.workspace = true
@@ -0,0 +1,187 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ Copyright (c) 2017, The Android Open Source Project
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
@@ -0,0 +1,11 @@
+-- 0..512
+Permission to use, copy, modify, and/or distribute this software for
+any purpose with or without fee is hereby granted.
+
+THE SOFTWARE IS PROVIDED βAS ISβ AND THE AUTHOR DISCLAIMS ALL
+WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
+OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE
+FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY
+DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
@@ -0,0 +1,109 @@
+-- 0..512
+-- 0..0 optional:
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http
+-- 0..1 optional:
+://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+-- 0..5
+Apache License
+
+Version 2.0, January 2004
+
+http
+-- 0..1
+://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+"License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document.
+
+"Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License.
+
+"Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity.
+
+"You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License.
+
+"Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files.
+
+"Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types.
+
+"Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below).
+
+"Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof.
+
+"Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution."
+
+"Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions:
+
+-- 1..5
+You must give any other recipients of the Work or Derivative Works a copy of this License; and
+
+-- 1..5
+You must cause any modified files to carry prominent notices stating that You changed the files; and
+
+-- 1..5
+You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and
+
+-- 1..5
@@ -0,0 +1,14 @@
+-- 0..512
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http
+-- 0..1
+://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
@@ -0,0 +1,32 @@
+-- 0..512
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+-- 1..5
+Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.
+
+-- 1..5 optional:
+Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation and/or
+other materials provided with the distribution.
+
+-- 1..128 optional:
+may be used to endorse or promote products derived from this software without
+specific prior written permission.
+
+-- 1..5
+THIS SOFTWARE IS PROVIDED
+-- 1..128
+βAS ISβ AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL
+-- 1..128
+BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,12 @@
+-- 0..512
+Permission to use, copy, modify, and/or distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
@@ -0,0 +1,18 @@
+-- 0..512
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -0,0 +1,32 @@
+-- 0..512
+Subject to the condition set forth below, permission is hereby granted to any person
+obtaining a copy of this software, associated documentation and/or data (collectively
+the "Software"), free of charge and under any and all copyright rights in the
+Software, and any and all patent rights owned or freely licensable by each licensor
+hereunder covering either (i) the unmodified Software as contributed to or provided
+by such licensor, or (ii) the Larger Works (as defined below), to deal in both
+
+(a) the Software, and
+
+(b) any piece of software and/or hardware listed in the lrgrwrks.txt file if one is
+ included with the Software (each a "Larger Work" to which the Software is
+ contributed by such licensors),
+
+without restriction, including without limitation the rights to copy, create
+derivative works of, display, perform, and distribute the Software and make, use,
+sell, offer for sale, import, export, have made, and have sold the Software and the
+Larger Work(s), and to sublicense the foregoing rights on either these or other
+terms.
+
+This license is subject to the following condition:
+
+The above copyright notice and either this complete permission notice or at a minimum
+a reference to the UPL must be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
+OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,21 @@
+-- 0..512
+This software is provided 'as-is', without any express or implied warranty. In
+no event will the authors be held liable for any damages arising from the use of
+this software.
+
+Permission is granted to anyone to use this software for any purpose, including
+commercial applications, and to alter it and redistribute it freely, subject to
+the following restrictions:
+
+-- 1..5
+The origin of this software must not be misrepresented; you must not claim
+that you wrote the original software. If you use this software in a product,
+an acknowledgment in the product documentation would be appreciated but is
+not required.
+
+-- 1..5
+Altered source versions must be plainly marked as such, and must not be
+misrepresented as being the original software.
+
+-- 1..5
+This notice may not be removed or altered from any source distribution.
@@ -1,10 +0,0 @@
-.{0,512}Permission to use copy modify andor distribute this software for any
-purpose with or without fee is hereby granted
-
-THE SOFTWARE IS PROVIDED AS IS AND THE AUTHOR DISCLAIMS ALL
-WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
-OF MERCHANTABILITY AND FITNESS IN NO EVENT SHALL THE AUTHOR BE LIABLE
-FOR ANY SPECIAL DIRECT INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
-DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE DATA OR PROFITS WHETHER IN
-AN ACTION OF CONTRACT NEGLIGENCE OR OTHER TORTIOUS ACTION ARISING OUT
-OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE
@@ -1,223 +0,0 @@
-.{0,512}Licensed under the Apache License Version 20 the License
-you may not use this file except in compliance with the License
-You may obtain a copy of the License at
-
- https?wwwapacheorglicensesLICENSE20
-
-Unless required by applicable law or agreed to in writing software
-distributed under the License is distributed on an AS IS BASIS
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND either express or implied
-See the License for the specific language governing permissions and
-limitations under the License|.{0,512}(?:Licensed under the Apache License Version 20 the License
-you may not use this file except in compliance with the License
-You may obtain a copy of the License at
-
- https?wwwapacheorglicensesLICENSE20
-
-Unless required by applicable law or agreed to in writing software
-distributed under the License is distributed on an AS IS BASIS
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND either express or implied
-See the License for the specific language governing permissions and
-limitations under the License)?
-
- ?Apache License
- Version 20 January 2004
- https?wwwapacheorglicenses
-
- TERMS AND CONDITIONS FOR USE REPRODUCTION AND DISTRIBUTION
-
- 1 Definitions
-
- License shall mean the terms and conditions for use reproduction
- and distribution as defined by Sections 1 through 9 of this document
-
- Licensor shall mean the copyright owner or entity authorized by
- the copyright owner that is granting the License
-
- Legal Entity shall mean the union of the acting entity and all
- other entities that control are controlled by or are under common
- control with that entity For the purposes of this definition
- control means i the power direct or indirect to cause the
- direction or management of such entity whether by contract or
- otherwise or ii ownership of fifty percent 50 or more of the
- outstanding shares or iii beneficial ownership of such entity
-
- You or Your shall mean an individual or Legal Entity
- exercising permissions granted by this License
-
- Source form shall mean the preferred form for making modifications
- including but not limited to software source code documentation
- source and configuration files
-
- Object form shall mean any form resulting from mechanical
- transformation or translation of a Source form including but
- not limited to compiled object code generated documentation
- and conversions to other media types
-
- Work shall mean the work of authorship whether in Source or
- Object form made available under the License as indicated by a
- copyright notice that is included in or attached to the work
- an example is provided in the Appendix below
-
- Derivative Works shall mean any work whether in Source or Object
- form that is based on or derived from the Work and for which the
- editorial revisions annotations elaborations or other modifications
- represent as a whole an original work of authorship For the purposes
- of this License Derivative Works shall not include works that remain
- separable from or merely link or bind by name to the interfaces of
- the Work and Derivative Works thereof
-
- Contribution shall mean any work of authorship including
- the original version of the Work and any modifications or additions
- to that Work or Derivative Works thereof that is intentionally
- submitted to Licensor for inclusion in the Work by the copyright owner
- or by an individual or Legal Entity authorized to submit on behalf of
- the copyright owner For the purposes of this definition submitted
- means any form of electronic verbal or written communication sent
- to the Licensor or its representatives including but not limited to
- communication on electronic mailing lists source code control systems
- and issue tracking systems that are managed by or on behalf of the
- Licensor for the purpose of discussing and improving the Work but
- excluding communication that is conspicuously marked or otherwise
- designated in writing by the copyright owner as Not a Contribution
-
- Contributor shall mean Licensor and any individual or Legal Entity
- on behalf of whom a Contribution has been received by Licensor and
- subsequently incorporated within the Work
-
- 2 Grant of Copyright License Subject to the terms and conditions of
- this License each Contributor hereby grants to You a perpetual
- worldwide nonexclusive nocharge royaltyfree irrevocable
- copyright license to reproduce prepare Derivative Works of
- publicly display publicly perform sublicense and distribute the
- Work and such Derivative Works in Source or Object form
-
- 3 Grant of Patent License Subject to the terms and conditions of
- this License each Contributor hereby grants to You a perpetual
- worldwide nonexclusive nocharge royaltyfree irrevocable
- except as stated in this section patent license to make have made
- use offer to sell sell import and otherwise transfer the Work
- where such license applies only to those patent claims licensable
- by such Contributor that are necessarily infringed by their
- Contributions alone or by combination of their Contributions
- with the Work to which such Contributions was submitted If You
- institute patent litigation against any entity including a
- crossclaim or counterclaim in a lawsuit alleging that the Work
- or a Contribution incorporated within the Work constitutes direct
- or contributory patent infringement then any patent licenses
- granted to You under this License for that Work shall terminate
- as of the date such litigation is filed
-
- 4 Redistribution You may reproduce and distribute copies of the
- Work or Derivative Works thereof in any medium with or without
- modifications and in Source or Object form provided that You
- meet the following conditions
-
- (?:a )?You must give any other recipients of the Work or
- Derivative Works a copy of this License and
-
- (?:b )?You must cause any modified files to carry prominent notices
- stating that You changed the files and
-
- (?:c )?You must retain in the Source form of any Derivative Works
- that You distribute all copyright patent trademark and
- attribution notices from the Source form of the Work
- excluding those notices that do not pertain to any part of
- the Derivative Works and
-
- (?:d )?If the Work includes a NOTICE text file as part of its
- distribution then any Derivative Works that You distribute must
- include a readable copy of the attribution notices contained
- within such NOTICE file excluding those notices that do not
- pertain to any part of the Derivative Works in at least one
- of the following places within a NOTICE text file distributed
- as part of the Derivative Works within the Source form or
- documentation if provided along with the Derivative Works or
- within a display generated by the Derivative Works if and
- wherever such thirdparty notices normally appear The contents
- of the NOTICE file are for informational purposes only and
- do not modify the License You may add Your own attribution
- notices within Derivative Works that You distribute alongside
- or as an addendum to the NOTICE text from the Work provided
- that such additional attribution notices cannot be construed
- as modifying the License
-
- You may add Your own copyright statement to Your modifications and
- may provide additional or different license terms and conditions
- for use reproduction or distribution of Your modifications or
- for any such Derivative Works as a whole provided Your use
- reproduction and distribution of the Work otherwise complies with
- the conditions stated in this License
-
- 5 Submission of Contributions Unless You explicitly state otherwise
- any Contribution intentionally submitted for inclusion in the Work
- by You to the Licensor shall be under the terms and conditions of
- this License without any additional terms or conditions
- Notwithstanding the above nothing herein shall supersede or modify
- the terms of any separate license agreement you may have executed
- with Licensor regarding such Contributions
-
- 6 Trademarks This License does not grant permission to use the trade
- names trademarks service marks or product names of the Licensor
- except as required for reasonable and customary use in describing the
- origin of the Work and reproducing the content of the NOTICE file
-
- 7 Disclaimer of Warranty Unless required by applicable law or
- agreed to in writing Licensor provides the Work and each
- Contributor provides its Contributions on an AS IS BASIS
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND either express or
- implied including without limitation any warranties or conditions
- of TITLE NONINFRINGEMENT MERCHANTABILITY or FITNESS FOR A
- PARTICULAR PURPOSE You are solely responsible for determining the
- appropriateness of using or redistributing the Work and assume any
- risks associated with Your exercise of permissions under this License
-
- 8 Limitation of Liability In no event and under no legal theory
- whether in tort including negligence contract or otherwise
- unless required by applicable law such as deliberate and grossly
- negligent acts or agreed to in writing shall any Contributor be
- liable to You for damages including any direct indirect special
- incidental or consequential damages of any character arising as a
- result of this License or out of the use or inability to use the
- Work including but not limited to damages for loss of goodwill
- work stoppage computer failure or malfunction or any and all
- other commercial damages or losses even if such Contributor
- has been advised of the possibility of such damages
-
- 9 Accepting Warranty or Additional Liability While redistributing
- the Work or Derivative Works thereof You may choose to offer
- and charge a fee for acceptance of support warranty indemnity
- or other liability obligations andor rights consistent with this
- License However in accepting such obligations You may act only
- on Your own behalf and on Your sole responsibility not on behalf
- of any other Contributor and only if You agree to indemnify
- defend and hold each Contributor harmless for any liability
- incurred by or claims asserted against such Contributor by reason
- of your accepting any such warranty or additional liability(?:
-
- END OF TERMS AND CONDITIONS)?(?:
-
- APPENDIX How to apply the Apache License to your work
-
- To apply the Apache License to your work attach the following
- boilerplate notice with the fields enclosed by brackets
- replaced with your own identifying information Dont include
- the brackets The text should be enclosed in the appropriate
- comment syntax for the file format We also recommend that a
- file or class name and description of purpose be included on the
- same printed page as the copyright notice for easier
- identification within thirdparty archives)?(?:
-
- Copyright.{0,512})?(?:
-
- Licensed under the Apache License Version 20 the License
- you may not use this file except in compliance with the License
- You may obtain a copy of the License at
-
- https?wwwapacheorglicensesLICENSE20
-
- Unless required by applicable law or agreed to in writing software
- distributed under the License is distributed on an AS IS BASIS
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND either express or implied
- See the License for the specific language governing permissions and
- limitations under the License)?
@@ -1,23 +0,0 @@
-.{0,512}Redistribution and use in source and binary forms with or without
-modification are permitted provided that the following conditions are met
-
-(?:1 )?Redistributions of source code must retain the above copyright
-notice this list of conditions and the following disclaimer(?:
-
-(?:2 )?Redistributions in binary form must reproduce the above copyright
-notice this list of conditions and the following disclaimer in the
-documentation andor other materials provided with the distribution(?:
-
-(?:3 )?.{0,128} may be used to endorse or
-promote products derived from this software without specific prior written
-permission)?)?
-
-THIS SOFTWARE IS PROVIDED BY .{0,128}AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES
-INCLUDING BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
-FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED IN NO EVENT SHALL .{0,128}BE LIABLE
-FOR ANY DIRECT INDIRECT INCIDENTAL SPECIAL EXEMPLARY OR CONSEQUENTIAL
-DAMAGES INCLUDING BUT NOT LIMITED TO PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES LOSS OF USE DATA OR PROFITS OR BUSINESS INTERRUPTION HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY WHETHER IN CONTRACT STRICT LIABILITY OR
-TORT INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF
-THIS SOFTWARE EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE
@@ -1,12 +0,0 @@
-.{0,512}Permission to use copy modify andor distribute
-this software for any purpose with or without fee is hereby granted provided
-that the above copyright notice and this permission notice appear in all
-copies
-
-THE SOFTWARE IS PROVIDED AS IS AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-MERCHANTABILITY AND FITNESS IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-ANY SPECIAL DIRECT INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-WHATSOEVER RESULTING FROM LOSS OF USE DATA OR PROFITS WHETHER IN AN
-ACTION OF CONTRACT NEGLIGENCE OR OTHER TORTIOUS ACTION ARISING OUT OF
-OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE
@@ -1,17 +0,0 @@
-.{0,512}Permission is hereby granted free of charge to any
-person obtaining a copy of this software and associated documentation files
-the Software to deal in the Software without restriction including
-without limitation the rights to use copy modify merge publish distribute
-sublicense andor sell copies of the Software and to permit persons to whom
-the Software is furnished to do so subject to the following conditions
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software
-
-THE SOFTWARE IS PROVIDED AS IS WITHOUT WARRANTY OF ANY KIND EXPRESS OR
-IMPLIED INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM DAMAGES OR OTHER
-LIABILITY WHETHER IN AN ACTION OF CONTRACT TORT OR OTHERWISE ARISING FROM
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE
@@ -1,32 +0,0 @@
-.{0,512}Subject to the condition set forth below permission is hereby granted to any
-person obtaining a copy of this software associated documentation andor data
-collectively the Software free of charge and under any and all copyright
-rights in the Software and any and all patent rights owned or freely licensable
-by each licensor hereunder covering either i the unmodified Software as
-contributed to or provided by such licensor or ii the Larger Works as
-defined below to deal in both
-
-a the Software and
-
-b any piece of software andor hardware listed in the lrgrwrkstxt file if one is
- included with the Software each a Larger Work to which the Software is
- contributed by such licensors
-
-without restriction including without limitation the rights to copy create
-derivative works of display perform and distribute the Software and make use
-sell offer for sale import export have made and have sold the Software and the
-Larger Works and to sublicense the foregoing rights on either these or other
-terms
-
-This license is subject to the following condition
-
-The above copyright notice and either this complete permission notice or at a minimum
-a reference to the UPL must be included in all copies or substantial portions of the
-Software
-
-THE SOFTWARE IS PROVIDED AS IS WITHOUT WARRANTY OF ANY KIND EXPRESS OR IMPLIED
-INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY FITNESS FOR A
-PARTICULAR PURPOSE AND NONINFRINGEMENT IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-HOLDERS BE LIABLE FOR ANY CLAIM DAMAGES OR OTHER LIABILITY WHETHER IN AN ACTION OF
-CONTRACT TORT OR OTHERWISE ARISING FROM OUT OF OR IN CONNECTION WITH THE SOFTWARE
-OR THE USE OR OTHER DEALINGS IN THE SOFTWARE
@@ -1,18 +0,0 @@
-.{0,512}This software is provided asis without any express or implied
-warranty In no event will the authors be held liable for any damages
-arising from the use of this software
-
-Permission is granted to anyone to use this software for any purpose
-including commercial applications and to alter it and redistribute it
-freely subject to the following restrictions
-
-1? The origin of this software must not be misrepresented you must not
-claim that you wrote the original software If you use this software
-in a product an acknowledgment in the product documentation would be
-appreciated but is not required
-
-2? Altered source versions must be plainly marked as such and must not be
-misrepresented as being the original software
-
-3? This notice may not be removed or altered from any source
-distribution
@@ -1,19 +1,20 @@
use std::{
collections::BTreeSet,
fmt::{Display, Formatter},
+ ops::Range,
path::{Path, PathBuf},
sync::{Arc, LazyLock},
};
+use anyhow::{Result, anyhow};
use fs::Fs;
use futures::StreamExt as _;
use gpui::{App, AppContext as _, Entity, Subscription, Task};
use itertools::Itertools;
use postage::watch;
use project::Worktree;
-use regex::Regex;
use strum::VariantArray;
-use util::ResultExt as _;
+use util::{ResultExt as _, maybe};
use worktree::ChildEntriesOptions;
/// Matches the most common license locations, with US and UK English spelling.
@@ -70,68 +71,170 @@ impl OpenSourceLicense {
}
}
- /// Regexes to match the license text. These regexes are expected to match the entire file. Also
- /// note that `canonicalize_license_text` removes everything but alphanumeric ascii characters.
- pub fn regex(&self) -> &'static str {
+ pub fn patterns(&self) -> &'static [&'static str] {
match self {
- OpenSourceLicense::Apache2_0 => include_str!("../license_regexes/apache-2.0.regex"),
- OpenSourceLicense::BSDZero => include_str!("../license_regexes/0bsd.regex"),
- OpenSourceLicense::BSD => include_str!("../license_regexes/bsd.regex"),
- OpenSourceLicense::ISC => include_str!("../license_regexes/isc.regex"),
- OpenSourceLicense::MIT => include_str!("../license_regexes/mit.regex"),
- OpenSourceLicense::UPL1_0 => include_str!("../license_regexes/upl-1.0.regex"),
- OpenSourceLicense::Zlib => include_str!("../license_regexes/zlib.regex"),
+ OpenSourceLicense::Apache2_0 => &[
+ include_str!("../license_patterns/apache-2.0-pattern"),
+ include_str!("../license_patterns/apache-2.0-reference-pattern"),
+ ],
+ OpenSourceLicense::BSDZero => &[include_str!("../license_patterns/0bsd-pattern")],
+ OpenSourceLicense::BSD => &[include_str!("../license_patterns/bsd-pattern")],
+ OpenSourceLicense::ISC => &[include_str!("../license_patterns/isc-pattern")],
+ OpenSourceLicense::MIT => &[include_str!("../license_patterns/mit-pattern")],
+ OpenSourceLicense::UPL1_0 => &[include_str!("../license_patterns/upl-1.0-pattern")],
+ OpenSourceLicense::Zlib => &[include_str!("../license_patterns/zlib-pattern")],
}
}
}
-fn detect_license(license: &str) -> Option<OpenSourceLicense> {
- static LICENSE_REGEX: LazyLock<Regex> = LazyLock::new(|| {
- let mut regex_string = String::new();
- let mut is_first = true;
- for license in OpenSourceLicense::VARIANTS {
- if is_first {
- regex_string.push_str("^(?:(");
- is_first = false;
- } else {
- regex_string.push_str(")|(");
- }
- regex_string.push_str(&canonicalize_license_regex(license.regex()));
+// TODO: Consider using databake or similar to not parse at runtime.
+static LICENSE_PATTERNS: LazyLock<LicensePatterns> = LazyLock::new(|| {
+ let mut approximate_max_length = 0;
+ let mut patterns = Vec::new();
+ for license in OpenSourceLicense::VARIANTS {
+ for pattern in license.patterns() {
+ let (pattern, length) = parse_pattern(pattern).unwrap();
+ patterns.push((*license, pattern));
+ approximate_max_length = approximate_max_length.max(length);
+ }
+ }
+ LicensePatterns {
+ patterns,
+ approximate_max_length,
+ }
+});
+
+fn detect_license(text: &str) -> Option<OpenSourceLicense> {
+ let text = canonicalize_license_text(text);
+ for (license, pattern) in LICENSE_PATTERNS.patterns.iter() {
+ log::trace!("Checking if license is {}", license);
+ if check_pattern(&pattern, &text) {
+ return Some(*license);
}
- regex_string.push_str("))$");
- let regex = Regex::new(®ex_string).unwrap();
- assert_eq!(regex.captures_len(), OpenSourceLicense::VARIANTS.len() + 1);
- regex
- });
-
- LICENSE_REGEX
- .captures(&canonicalize_license_text(license))
- .and_then(|captures| {
- let license = OpenSourceLicense::VARIANTS
- .iter()
- .enumerate()
- .find(|(index, _)| captures.get(index + 1).is_some())
- .map(|(_, license)| *license);
- if license.is_none() {
- log::error!("bug: open source license regex matched without any capture groups");
+ }
+
+ None
+}
+
+struct LicensePatterns {
+ patterns: Vec<(OpenSourceLicense, Vec<PatternPart>)>,
+ approximate_max_length: usize,
+}
+
+#[derive(Debug, Clone, Default, PartialEq, Eq)]
+struct PatternPart {
+ /// Indicates that matching `text` is optional. Skipping `match_any_chars` is conditional on
+ /// matching `text`.
+ optional: bool,
+ /// Indicates the number of characters that can be skipped before matching `text`.
+ match_any_chars: Range<usize>,
+ /// The text to match, may be empty.
+ text: String,
+}
+
+/// Lines that start with "-- " begin a `PatternPart`. `-- 1..10` specifies `match_any_chars:
+/// 1..10`. `-- 1..10 optional:` additionally specifies `optional: true`. It's a parse error for a
+/// line to start with `--` without matching this format.
+///
+/// Text that does not have `--` prefixes participate in the `text` field and are canonicalized by
+/// lowercasing, replacing all runs of whitespace with a single space, and otherwise only keeping
+/// ascii alphanumeric characters.
+fn parse_pattern(pattern_source: &str) -> Result<(Vec<PatternPart>, usize)> {
+ let mut pattern = Vec::new();
+ let mut part = PatternPart::default();
+ let mut approximate_max_length = 0;
+ for line in pattern_source.lines() {
+ if let Some(directive) = line.trim().strip_prefix("--") {
+ if part != PatternPart::default() {
+ pattern.push(part);
+ part = PatternPart::default();
+ }
+ let valid = maybe!({
+ let directive_chunks = directive.split_whitespace().collect::<Vec<_>>();
+ if !(1..=2).contains(&directive_chunks.len()) {
+ return None;
+ }
+ if directive_chunks.len() == 2 {
+ part.optional = true;
+ }
+ let range_chunks = directive_chunks[0].split("..").collect::<Vec<_>>();
+ if range_chunks.len() != 2 {
+ return None;
+ }
+ part.match_any_chars.start = range_chunks[0].parse::<usize>().ok()?;
+ part.match_any_chars.end = range_chunks[1].parse::<usize>().ok()?;
+ if part.match_any_chars.start > part.match_any_chars.end {
+ return None;
+ }
+ approximate_max_length += part.match_any_chars.end;
+ Some(())
+ });
+ if valid.is_none() {
+ return Err(anyhow!("Invalid pattern directive: {}", line));
}
- license
- })
+ continue;
+ }
+ approximate_max_length += line.len() + 1;
+ let line = canonicalize_license_text(line);
+ if line.is_empty() {
+ continue;
+ }
+ if !part.text.is_empty() {
+ part.text.push(' ');
+ }
+ part.text.push_str(&line);
+ }
+ if part != PatternPart::default() {
+ pattern.push(part);
+ }
+ Ok((pattern, approximate_max_length))
}
-/// Canonicalizes the whitespace of license text.
-fn canonicalize_license_regex(license: &str) -> String {
- license
- .split_ascii_whitespace()
- .join(" ")
- .to_ascii_lowercase()
+/// Checks a pattern against text by iterating over the pattern parts in reverse order, and checking
+/// matches with the end of a prefix of the input. Assumes that `canonicalize_license_text` has
+/// already been applied to the input.
+fn check_pattern(pattern: &[PatternPart], input: &str) -> bool {
+ let mut input_ix = input.len();
+ let mut match_any_chars = 0..0;
+ for part in pattern.iter().rev() {
+ if part.text.is_empty() {
+ match_any_chars.start += part.match_any_chars.start;
+ match_any_chars.end += part.match_any_chars.end;
+ continue;
+ }
+ let mut matched = false;
+ for skip_count in match_any_chars.start..=match_any_chars.end {
+ let end_ix = input_ix.saturating_sub(skip_count);
+ if end_ix < part.text.len() {
+ break;
+ }
+ if input[..end_ix].ends_with(&part.text) {
+ matched = true;
+ input_ix = end_ix - part.text.len();
+ match_any_chars = part.match_any_chars.clone();
+ break;
+ }
+ }
+ if !matched && !part.optional {
+ log::trace!(
+ "Failed to match pattern `...{}` against input `...{}`",
+ &part.text[part.text.len().saturating_sub(128)..],
+ &input[input_ix.saturating_sub(128)..]
+ );
+ return false;
+ }
+ }
+ match_any_chars.contains(&input_ix)
}
-/// Canonicalizes the whitespace of license text.
+/// Canonicalizes license text by removing all non-alphanumeric characters, lowercasing, and turning
+/// runs of whitespace into a single space. Unicode alphanumeric characters are intentionally
+/// preserved since these should cause license mismatch when not within a portion of the license
+/// where arbitrary text is allowed.
fn canonicalize_license_text(license: &str) -> String {
license
.chars()
- .filter(|c| c.is_ascii_alphanumeric() || c.is_ascii_whitespace())
+ .filter(|c| c.is_ascii_whitespace() || c.is_alphanumeric())
.map(|c| c.to_ascii_lowercase())
.collect::<String>()
.split_ascii_whitespace()
@@ -218,7 +321,7 @@ impl LicenseDetectionWatcher {
async fn is_path_eligible(fs: &Arc<dyn Fs>, abs_path: PathBuf) -> Option<bool> {
log::debug!("checking if `{abs_path:?}` is an open source license");
- // Resolve symlinks so that the file size from metadata is correct.
+ // resolve symlinks so that the file size from metadata is correct
let Some(abs_path) = fs.canonicalize(&abs_path).await.ok() else {
log::debug!(
"`{abs_path:?}` license file probably deleted (error canonicalizing the path)"
@@ -226,8 +329,13 @@ impl LicenseDetectionWatcher {
return None;
};
let metadata = fs.metadata(&abs_path).await.log_err()??;
- // If the license file is >32kb it's unlikely to legitimately match any eligible license.
- if metadata.len > 32768 {
+ if metadata.len > LICENSE_PATTERNS.approximate_max_length as u64 {
+ log::debug!(
+ "`{abs_path:?}` license file was skipped \
+ because its size of {} bytes was larger than the max size of {} bytes",
+ metadata.len,
+ LICENSE_PATTERNS.approximate_max_length
+ );
return None;
}
let text = fs.load(&abs_path).await.log_err()?;
@@ -262,7 +370,6 @@ mod tests {
use gpui::TestAppContext;
use serde_json::json;
use settings::{Settings as _, SettingsStore};
- use unindent::unindent;
use worktree::WorktreeSettings;
use super::*;
@@ -275,25 +382,8 @@ mod tests {
#[track_caller]
fn assert_matches_license(text: &str, license: OpenSourceLicense) {
- if detect_license(text) != Some(license) {
- let license_regex_text = canonicalize_license_regex(license.regex());
- let license_regex = Regex::new(&format!("^{}$", license_regex_text)).unwrap();
- let text = canonicalize_license_text(text);
- let matched_regex = license_regex.is_match(&text);
- if matched_regex {
- panic!(
- "The following text matches the individual regex for {}, \
- but not the combined one:\n```license-text\n{}\n```\n",
- license, text
- );
- } else {
- panic!(
- "The following text doesn't match the regex for {}:\n\
- ```license-text\n{}\n```\n\n```regex\n{}\n```\n",
- license, text, license_regex_text
- );
- }
- }
+ assert_eq!(detect_license(text), Some(license));
+ assert!(text.len() < LICENSE_PATTERNS.approximate_max_length);
}
/*
@@ -325,7 +415,8 @@ mod tests {
continue;
};
let path_string = entry.path().to_string_lossy().to_string();
- match detect_license(&contents) {
+ let license = detect_license(&contents);
+ match license {
Some(license) => detected.push((license, path_string)),
None => unrecognized.push(path_string),
}
@@ -348,87 +439,38 @@ mod tests {
}
*/
- #[test]
- fn test_no_unicode_in_regexes() {
- for license in OpenSourceLicense::VARIANTS {
- assert!(
- !license.regex().contains(|c: char| !c.is_ascii()),
- "{}.regex contains unicode",
- license.spdx_identifier()
- );
- }
- }
-
#[test]
fn test_apache_positive_detection() {
assert_matches_license(APACHE_2_0_TXT, OpenSourceLicense::Apache2_0);
-
- let license_with_appendix = format!(
- r#"{APACHE_2_0_TXT}
-
- END OF TERMS AND CONDITIONS
-
- APPENDIX: How to apply the Apache License to your work.
-
- To apply the Apache License to your work, attach the following
- boilerplate notice, with the fields enclosed by brackets "[]"
- replaced with your own identifying information. (Don't include
- the brackets!) The text should be enclosed in the appropriate
- comment syntax for the file format. We also recommend that a
- file or class name and description of purpose be included on the
- same "printed page" as the copyright notice for easier
- identification within third-party archives.
-
- Copyright [yyyy] [name of copyright owner]
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License."#
- );
- assert_matches_license(&license_with_appendix, OpenSourceLicense::Apache2_0);
-
- // Sometimes people fill in the appendix with copyright info.
- let license_with_copyright = license_with_appendix.replace(
- "Copyright [yyyy] [name of copyright owner]",
- "Copyright 2025 John Doe",
+ assert_matches_license(
+ include_str!("../license_examples/apache-2.0-ex1.txt"),
+ OpenSourceLicense::Apache2_0,
);
- assert!(license_with_copyright != license_with_appendix);
- assert_matches_license(&license_with_copyright, OpenSourceLicense::Apache2_0);
-
assert_matches_license(
- include_str!("../../../LICENSE-APACHE"),
+ include_str!("../license_examples/apache-2.0-ex2.txt"),
OpenSourceLicense::Apache2_0,
);
-
assert_matches_license(
- include_str!("../license_examples/apache-2.0-ex1.txt"),
+ include_str!("../license_examples/apache-2.0-ex3.txt"),
OpenSourceLicense::Apache2_0,
);
assert_matches_license(
- include_str!("../license_examples/apache-2.0-ex2.txt"),
+ include_str!("../license_examples/apache-2.0-ex4.txt"),
OpenSourceLicense::Apache2_0,
);
assert_matches_license(
- include_str!("../license_examples/apache-2.0-ex3.txt"),
+ include_str!("../../../LICENSE-APACHE"),
OpenSourceLicense::Apache2_0,
);
}
#[test]
fn test_apache_negative_detection() {
- assert!(
+ assert_eq!(
detect_license(&format!(
"{APACHE_2_0_TXT}\n\nThe terms in this license are void if P=NP."
- ))
- .is_none()
+ )),
+ None
);
}
@@ -490,7 +532,7 @@ mod tests {
This project is dual licensed under the ISC License and the MIT License."#
);
- assert!(detect_license(&license_text).is_none());
+ assert_eq!(detect_license(&license_text), None);
}
#[test]
@@ -517,7 +559,7 @@ mod tests {
This project is dual licensed under the MIT License and the Apache License, Version 2.0."#
);
- assert!(detect_license(&license_text).is_none());
+ assert_eq!(detect_license(&license_text), None);
}
#[test]
@@ -533,7 +575,7 @@ mod tests {
This project is dual licensed under the UPL License and the MIT License."#
);
- assert!(detect_license(&license_text).is_none());
+ assert_eq!(detect_license(&license_text), None);
}
#[test]
@@ -614,44 +656,6 @@ mod tests {
assert_eq!(canonicalize_license_text(input), expected);
}
- #[test]
- fn test_license_detection_canonicalizes_whitespace() {
- let mit_with_weird_spacing = unindent(
- r#"
- MIT License
-
-
- Copyright (c) 2024 John Doe
-
-
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
-
-
-
- The above copyright notice and this permission notice shall be included in all
- copies or substantial portions of the Software.
-
-
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- SOFTWARE.
- "#
- .trim(),
- );
-
- assert_matches_license(&mit_with_weird_spacing, OpenSourceLicense::MIT);
- }
-
fn init_test(cx: &mut TestAppContext) {
cx.update(|cx| {
let settings_store = SettingsStore::test(cx);