Skip to content

Commit e054254

Browse files
authored
Merge pull request #3924 from aboutcode-org/update-rules-with-required-phrases-auto
Update rules with required phrases automatically
2 parents 4b57a7f + f837a38 commit e054254

File tree

9,622 files changed

+24723
-30248
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

9,622 files changed

+24723
-30248
lines changed

docs/source/how-to-guides/add_new_license_detection_rule.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ More (advanced) rules options:
7373
be present in the result license detections. These just have the license text and a
7474
`is_false_positive` flag set to True.
7575

76-
- you can specify key phrases by surrounding one or more words between the `{{`
76+
- you can specify required phrases by surrounding one or more words between the `{{`
7777
and `}}` tags. Key phrases are words that **must** be matched/present in order
7878
for a RULE to be considered a match.
7979

etc/scripts/licenses/buildrules.py

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from licensedcode import models
1717
from licensedcode import match_hash
1818
from licensedcode import frontmatter
19+
from licensedcode.models import get_rule_id_for_text
1920
from license_expression import Licensing
2021

2122
"""
@@ -130,23 +131,6 @@ def load_data(location="00-new-licenses.txt"):
130131
return rules
131132

132133

133-
def rule_exists(text):
134-
"""
135-
Return the matched rule identifier if the text is an existing rule matched
136-
exactly, False otherwise.
137-
"""
138-
idx = cache.get_index()
139-
140-
matches = idx.match(query_string=text)
141-
if not matches:
142-
return False
143-
if len(matches) > 1:
144-
return False
145-
match = matches[0]
146-
if match.matcher == match_hash.MATCH_HASH and match.score() == 100:
147-
return match.rule.identifier
148-
149-
150134
def all_rule_by_tokens():
151135
"""
152136
Return a mapping of {tuples of tokens: rule id}, with one item for each
@@ -347,7 +331,7 @@ def cli(licenses_file, dump_to_file_on_errors=False):
347331

348332
text = rule.text
349333

350-
existing_rule = rule_exists(text)
334+
existing_rule = get_rule_id_for_text(text)
351335
skinny_text = " ".join(text[:80].split()).replace("{", " ").replace("}", " ")
352336

353337
existing_msg = (

etc/scripts/licenses/report_license_rules.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@
6262
"is_license_reference",
6363
"is_license_intro",
6464
"is_license_clue",
65+
"is_required_phrase",
66+
"skip_for_required_phrase_generation",
6567
"is_deprecated",
6668
"has_unknown",
6769
"only_known_words",

respective.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
This software is (C) by the respective authors, and licensed under the GPL
2+
License.
3+

setup-mini.cfg

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ console_scripts =
158158
scancode-reindex-licenses = licensedcode.reindex:reindex_licenses
159159
scancode-license-data = licensedcode.license_db:dump_scancode_license_data
160160
regen-package-docs = packagedcode.regen_package_docs:regen_package_docs
161+
add-required-phrases = licensedcode.required_phrases:add_required_phrases
161162

162163
# These are configurations for ScanCode plugins as setuptools entry points.
163164
# Each plugin entry hast this form:

setup.cfg

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,8 @@ console_scripts =
158158
scancode-reindex-licenses = licensedcode.reindex:reindex_licenses
159159
scancode-license-data = licensedcode.license_db:dump_scancode_license_data
160160
regen-package-docs = packagedcode.regen_package_docs:regen_package_docs
161+
add-required-phrases = licensedcode.required_phrases:add_required_phrases
162+
gen-new-required-phrases-rules = licensedcode.required_phrases:gen_required_phrases_rules
161163

162164
# These are configurations for ScanCode plugins as setuptools entry points.
163165
# Each plugin entry hast this form:

src/licensedcode/cache.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
import os
1111
import pickle
12+
1213
from shutil import rmtree
1314

1415
from commoncode.fileutils import create_dir
@@ -185,10 +186,7 @@ def load_or_build(
185186
additional_license_plugins=plugin_directories,
186187
)
187188

188-
# save the cache as pickle new tree checksum
189-
with open(cache_file, 'wb') as fn:
190-
pickle.dump(license_cache, fn, protocol=PICKLE_PROTOCOL)
191-
189+
license_cache.dump(cache_file)
192190
return license_cache
193191

194192
except lockfile.LockTimeout:
@@ -201,6 +199,13 @@ def has_additional_licenses(self):
201199
if cache.additional_license_directory or cache.additional_license_plugins:
202200
return True
203201

202+
def dump(self, cache_file):
203+
"""
204+
Dump this license cache on disk at ``cache_file``.
205+
"""
206+
with open(cache_file, 'wb') as fn:
207+
pickle.dump(self, fn, protocol=PICKLE_PROTOCOL)
208+
204209

205210
def build_index(
206211
licenses_db=None,
@@ -235,9 +240,12 @@ def build_index(
235240
if not licenses_db:
236241
# combine the licenses in these additional directories with the licenses in the original DB
237242
additional_license_dirs = get_license_dirs(additional_dirs=additional_directories)
238-
combined_license_directories = [licenses_data_dir] + additional_license_dirs
239243
# generate a single combined license db with all licenses
240-
licenses_db = load_licenses_from_multiple_dirs(license_dirs=combined_license_directories)
244+
licenses_db = load_licenses_from_multiple_dirs(
245+
builtin_license_data_dir=licenses_data_dir,
246+
additional_license_data_dirs=additional_license_dirs,
247+
with_deprecated=False,
248+
)
241249

242250
# if we have additional directories, extract the rules from them
243251
additional_rule_dirs = get_rule_dirs(additional_dirs=additional_directories)
@@ -393,7 +401,7 @@ def get_cache(
393401
Return a LicenseCache either rebuilt, cached or loaded from disk.
394402
395403
If ``index_all_languages`` is True, include texts in all languages when
396-
building the license index. Otherwise, only include the English license \
404+
building the license index. Otherwise, only include the English license
397405
texts and rules (the default)
398406
"""
399407
return populate_cache(
@@ -531,7 +539,7 @@ def validate_spdx_license_keys(license_expression, licensing):
531539
if not type(key) == str:
532540
msg = f"Invalid license key: {key} of type {type(key)}, license key should be a string"
533541
messages.append(msg)
534-
542+
535543
lic = license_db.get(key, None)
536544
if not lic:
537545
licenses = load_licenses(with_deprecated=True)

src/licensedcode/data/licenses/any-osi-perl-modules.LICENSE

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ other_urls:
1111
- https://metacpan.org/pod/Net::MQTT::Simple#LICENSE
1212
ignorable_urls:
1313
- http://www.opensource.org/licenses/alphabetical
14+
minimum_coverage: 90
1415
---
1516

1617
This software may be redistributed under the terms of the GPL, LGPL,
@@ -23,4 +24,4 @@ When using a packaged version, please refer to the package metadata to see
2324
under which license terms it was distributed. Alternatively, a distributor
2425
may choose to replace the LICENSE section of the documentation and/or
2526
include a LICENSE file to reflect the license(s) they chose to redistribute
26-
under.
27+
under.
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
---
2+
key: dco-1.0
3+
short_name: DCO 1.0
4+
name: Developer Certificate of Origin 1.0
5+
category: CLA
6+
owner: Linux Foundation
7+
homepage_url: https://developercertificate.org/
8+
spdx_license_key: LicenseRef-scancode-dco-1.0
9+
text_urls:
10+
- https://developercertificate.org/
11+
other_urls:
12+
- https://github.com/nexB/scancode-toolkit/issues/3038#issuecomment-1317511139
13+
minimum_coverage: 90
14+
---
15+
16+
Developer's Certificate of Origin 1.0
17+
18+
By making a contribution to this project, I certify that:
19+
20+
1. The contribution was created in whole or in part by me and I
21+
have the right to submit it under the open source license
22+
indicated in the file LICENSE; or
23+
24+
2. The contribution is based upon previous work that, to the best
25+
of my knowledge, is covered under an appropriate open source
26+
license and I have the right under that license to submit that
27+
work with modifications, whether created in whole or in part
28+
by me, under the same open source license (unless I am
29+
permitted to submit under a different license), as indicated
30+
in the file LICENSE; or
31+
32+
3. The contribution was provided directly to me by some other
33+
person who certified (1), (2) or (3) and I have not modified
34+
it.
35+
36+
4. I understand and agree that this project and the contribution
37+
are public and that a record of the contribution (including all
38+
personal information I submit with it, including my sign-off) is
39+
maintained indefinitely and may be redistributed consistent
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
---
2+
key: samba-dco-1.0
3+
short_name: Samba Developer's Declaration, Version 1.0
4+
name: Samba Developer's Declaration, Version 1.0
5+
category: CLA
6+
owner: Samba
7+
homepage_url: https://github.com/samba-team/samba/blob/master/README.contributing
8+
spdx_license_key: LicenseRef-scancode-samba-dco-1.0
9+
text_urls:
10+
- https://github.com/samba-team/samba/blob/master/README.contributing
11+
minimum_coverage: 80
12+
ignorable_urls:
13+
- http://www.gnu.org/licenses/gpl-3.0.html
14+
- http://www.gnu.org/licenses/lgpl-3.0.html
15+
---
16+
17+
Samba Developer's Declaration, Version 1.0
18+
19+
By making a contribution to this project, I certify that:
20+
21+
(a) The contribution was created in whole or in part by me and I
22+
have the right to submit it under the appropriate
23+
version of the GNU General Public License; or
24+
25+
(b) The contribution is based upon previous work that, to the best
26+
of my knowledge, is covered under an appropriate open source
27+
license and I have the right under that license to submit that
28+
work with modifications, whether created in whole or in part
29+
by me, under the GNU General Public License, in the
30+
appropriate version; or
31+
32+
(c) The contribution was provided directly to me by some other
33+
person who certified (a) or (b) and I have not modified
34+
it.
35+
36+
(d) I understand and agree that this project and the
37+
contribution are public and that a record of the
38+
contribution (including all metadata and personal
39+
information I submit with it, including my sign-off) is
40+
maintained indefinitely and may be redistributed
41+
consistent with the Samba Team's policies and the
42+
requirements of the GNU GPL where they are relevant.
43+
44+
(e) I am granting this work to this project under the terms of both
45+
the GNU General Public License and the GNU Lesser General Public
46+
License as published by the Free Software Foundation; either version
47+
3 of these Licenses, or (at the option of the project) any later
48+
version.
49+
50+
http://www.gnu.org/licenses/gpl-3.0.html
51+
http://www.gnu.org/licenses/lgpl-3.0.html

0 commit comments

Comments
 (0)