Skip to content

Commit ac4f7dc

Browse files
committed
Improve Rule check with is_synthetic check
Also move test utilities to test module Signed-off-by: Philippe Ombredanne <pombredanne@nexb.com>
1 parent c0590ca commit ac4f7dc

File tree

10 files changed

+304
-321
lines changed

10 files changed

+304
-321
lines changed

src/licensedcode/models.py

Lines changed: 31 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -1580,16 +1580,13 @@ def rule_url(self):
15801580
Return a string with the permanent URL to this rule on
15811581
scancode-toolkit github repository.
15821582
"""
1583-
if any([
1584-
'spdx-license-identifier' in self.identifier,
1585-
'package-manifest' in self.identifier,
1586-
'license-detection-unknown' in self.identifier
1587-
]):
1583+
if self.is_synthetic:
15881584
return None
1589-
elif self.is_from_license:
1585+
1586+
if self.is_from_license:
15901587
return SCANCODE_LICENSE_RULE_URL.format(self.identifier)
1591-
else:
1592-
return SCANCODE_RULE_URL.format(self.identifier)
1588+
1589+
return SCANCODE_RULE_URL.format(self.identifier)
15931590

15941591
def rule_file(
15951592
self,
@@ -1601,10 +1598,13 @@ def rule_file(
16011598
given the `rules_data_dir` directory or `licenses_data_dir`
16021599
if a license rule.
16031600
"""
1601+
if self.is_synthetic:
1602+
return None
1603+
16041604
if self.is_from_license:
16051605
return join(licenses_data_dir, self.identifier)
1606-
else:
1607-
return join(rules_data_dir, self.identifier)
1606+
1607+
return join(rules_data_dir, self.identifier)
16081608

16091609
def __attrs_post_init__(self, *args, **kwargs):
16101610
self.setup()
@@ -1932,75 +1932,9 @@ def from_file(cls, rule_file, is_builtin=True):
19321932
rule.load_data(rule_file=rule_file)
19331933
return rule
19341934

1935-
@classmethod
1936-
def _from_text_file_and_expression(
1937-
cls,
1938-
text_file,
1939-
license_expression=None,
1940-
identifier=None,
1941-
**kwargs,
1942-
):
1943-
"""
1944-
Return a new Rule object loaded from a ``text_file`` and a
1945-
``license_expression``. Used for testing only.
1946-
"""
1947-
license_expression = license_expression or 'mit'
1948-
if exists(text_file):
1949-
text = get_rule_text(location=text_file)
1950-
else:
1951-
text = ''
1952-
1953-
return cls._from_text_and_expression(
1954-
text=text,
1955-
license_expression=license_expression,
1956-
identifier=identifier,
1957-
**kwargs,
1958-
)
1959-
1960-
@classmethod
1961-
def _from_text_and_expression(
1962-
cls,
1963-
text=None,
1964-
license_expression=None,
1965-
identifier=None,
1966-
**kwargs,
1967-
):
1968-
"""
1969-
Return a new Rule object loaded from a ``text_file`` and a
1970-
``license_expression``. Used for testing only.
1971-
"""
1972-
license_expression = license_expression or 'mit'
1973-
text = text or ''
1974-
identifier = identifier or f'_tst_{time()}_{len(text)}_{license_expression}'
1975-
rule = Rule(
1976-
license_expression=license_expression,
1977-
text=text,
1978-
is_synthetic=True,
1979-
identifier=identifier,
1980-
**kwargs,
1981-
)
1982-
rule.setup()
1983-
return rule
1984-
1985-
@classmethod
1986-
def _from_expression(cls, license_expression=None, identifier=None, **kwargs):
1987-
"""
1988-
Return a new Rule object from a ``license_expression``. Used for testing only.
1989-
"""
1990-
license_expression = license_expression or 'mit'
1991-
identifier = identifier or f'_tst_{time()}_expr_{license_expression}'
1992-
rule = Rule(
1993-
identifier=identifier,
1994-
license_expression=license_expression,
1995-
text='',
1996-
is_synthetic=True,
1997-
)
1998-
rule.setup()
1999-
return rule
2000-
20011935
def compute_unique_id(self):
20021936
"""
2003-
Return a a unique id string based on this rule content.
1937+
Return a a unique id string based on this rule content.
20041938
20051939
(This is a SHA1 checksum of the identifier expression and text, but this
20061940
is an implementation detail)
@@ -2245,36 +2179,29 @@ def set_relevance(self):
22452179
self.relevance = computed_relevance
22462180

22472181

2248-
def get_rule_object_from_match(license_match_mapping):
2182+
def get_rule_object_from_match(license_match):
22492183
"""
2250-
Return a rehydrated Rule object from a `license_match_mapping`
2184+
Return a rehydrated Rule object from a `license_match`
22512185
LicenseMatch mapping.
22522186
"""
2253-
license_expression = license_match_mapping["license_expression"]
2254-
text = license_match_mapping.get("matched_text", None)
2255-
length = license_match_mapping["matched_length"]
2256-
rule_identifier = license_match_mapping["rule_identifier"]
2257-
if 'spdx-license-identifier' in rule_identifier:
2258-
return SpdxRule(
2259-
license_expression=license_expression,
2260-
text=text,
2261-
length=length,
2262-
)
2263-
elif 'license-detection-unknown' in rule_identifier:
2264-
return UnknownRule(
2265-
license_expression=license_expression,
2266-
text=text,
2267-
length=length,
2268-
)
2269-
elif 'package-manifest' in rule_identifier:
2270-
return UnDetectedRule(
2271-
license_expression=license_expression,
2272-
text=text,
2273-
length=length,
2274-
)
2275-
else:
2276-
from licensedcode.cache import get_index
2277-
return get_index().rules_by_id[rule_identifier]
2187+
2188+
rule_subclass_by_identifier_prefix = {
2189+
"spdx-license-identifier": SpdxRule,
2190+
"license-detection-unknown": UnknownRule,
2191+
"package-manifest": UnDetectedRule,
2192+
}
2193+
2194+
rule_identifier = license_match["rule_identifier"]
2195+
for prefix, cls in rule_subclass_by_identifier_prefix.items():
2196+
if rule_identifier.startswith(prefix):
2197+
return cls(
2198+
license_expression=license_match["license_expression"],
2199+
text=license_match.get("matched_text", None),
2200+
length=license_match["matched_length"],
2201+
)
2202+
2203+
from licensedcode.cache import get_index
2204+
return get_index().rules_by_id[rule_identifier]
22782205

22792206

22802207
def compute_relevance(length):

src/licensedcode_test_utils.py

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,18 @@
1010
import io
1111
import os
1212
import traceback
13+
from time import time
1314

1415
import attr
15-
from license_expression import Licensing
1616
import pytest
1717
import saneyaml
18+
from license_expression import Licensing
1819

1920
from commoncode import text
2021
from commoncode.testcase import get_test_file_pairs
2122

2223
from scancode_config import REGEN_TEST_FIXTURES
2324

24-
2525
"""
2626
Data-driven tests using expectations stored in YAML files.
2727
"""
@@ -350,3 +350,53 @@ def query_tokens_with_unknowns(qry):
350350
yield token
351351
for _ in range(unknowns.get(pos, 0)):
352352
yield None
353+
354+
355+
def create_rule_from_text_file_and_expression(
356+
text_file,
357+
license_expression=None,
358+
identifier=None,
359+
**kwargs
360+
):
361+
"""
362+
Return a new Rule object from a ``text_file`` and a ``license_expression``.
363+
"""
364+
license_expression = license_expression or 'mit'
365+
if os.path.exists(text_file):
366+
from licensedcode.models import get_rule_text
367+
text = get_rule_text(location=text_file)
368+
else:
369+
text = ''
370+
371+
return create_rule_from_text_and_expression(
372+
text=text,
373+
license_expression=license_expression,
374+
identifier=identifier,
375+
**kwargs,
376+
)
377+
378+
379+
def create_rule_from_text_and_expression(
380+
text=None,
381+
license_expression=None,
382+
identifier=None,
383+
**kwargs,
384+
):
385+
"""
386+
Return a new Rule object from a ``text``, a ``license_expression`` and a
387+
rule ``identifier``.
388+
"""
389+
from licensedcode.models import Rule
390+
license_expression = license_expression or 'mit'
391+
text = text or ''
392+
identifier = identifier or f'_tst_{time()}_{len(text)}_{license_expression}'
393+
rule = Rule(
394+
license_expression=license_expression,
395+
text=text,
396+
is_synthetic=True,
397+
identifier=identifier,
398+
**kwargs,
399+
)
400+
rule.setup()
401+
return rule
402+

0 commit comments

Comments
 (0)