Skip to content

Commit a1fc5d8

Browse files
Add new sample_matches attribute to top-level detection
Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com>
1 parent dbffe43 commit a1fc5d8

File tree

101 files changed

+8717
-316
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

101 files changed

+8717
-316
lines changed

src/licensedcode/detection.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -670,6 +670,7 @@ def to_dict(
670670

671671
if include_text:
672672
result['matched_text'] = matched_text
673+
673674
return result
674675

675676

@@ -917,7 +918,12 @@ def dict_fields(attr, value):
917918

918919
return True
919920

920-
return attr.asdict(self, filter=dict_fields)
921+
detection_mapping = attr.asdict(self, filter=dict_fields)
922+
detection_mapping["sample_matches"] = [
923+
match.to_dict(include_text=True)
924+
for match in self.matches
925+
]
926+
return detection_mapping
921927

922928
def get_license_detection_object(self):
923929
return LicenseDetection(

tests/cluecode/data/plugin_filter_clues/filtered-expected.json

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,23 @@
33
{
44
"identifier": "apache_1_1-1712efcb-d696-b5e4-214d-b2ab69680c2a",
55
"license_expression": "apache-1.1",
6-
"detection_count": 1
6+
"detection_count": 1,
7+
"sample_matches": [
8+
{
9+
"score": 96.07,
10+
"from_file": "LICENSE",
11+
"start_line": 7,
12+
"end_line": 70,
13+
"matched_length": 367,
14+
"match_coverage": 100.0,
15+
"matcher": "3-seq",
16+
"license_expression": "apache-1.1",
17+
"rule_identifier": "apache-1.1_63.RULE",
18+
"rule_relevance": 100,
19+
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-1.1_63.RULE",
20+
"matched_text": null
21+
}
22+
]
723
}
824
],
925
"files": [

tests/cluecode/data/plugin_filter_clues/filtered-expected2.json

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,23 @@
33
{
44
"identifier": "pygres_2_2-04f085e8-3db1-f9e2-8dde-7ba8a7ba619a",
55
"license_expression": "pygres-2.2",
6-
"detection_count": 1
6+
"detection_count": 1,
7+
"sample_matches": [
8+
{
9+
"score": 100.0,
10+
"from_file": "LICENSE2",
11+
"start_line": 7,
12+
"end_line": 22,
13+
"matched_length": 145,
14+
"match_coverage": 100.0,
15+
"matcher": "2-aho",
16+
"license_expression": "pygres-2.2",
17+
"rule_identifier": "pygres-2.2_2.RULE",
18+
"rule_relevance": 100,
19+
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/pygres-2.2_2.RULE",
20+
"matched_text": null
21+
}
22+
]
723
}
824
],
925
"files": [

tests/cluecode/data/plugin_filter_clues/filtered-expected3.json

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,23 @@
33
{
44
"identifier": "pcre-c61d8210-7748-d787-5e3e-fd64c6cd6b6a",
55
"license_expression": "pcre",
6-
"detection_count": 1
6+
"detection_count": 1,
7+
"sample_matches": [
8+
{
9+
"score": 100.0,
10+
"from_file": "LICENSE3",
11+
"start_line": 1,
12+
"end_line": 47,
13+
"matched_length": 303,
14+
"match_coverage": 100.0,
15+
"matcher": "1-hash",
16+
"license_expression": "pcre",
17+
"rule_identifier": "pcre.LICENSE",
18+
"rule_relevance": 100,
19+
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/pcre.LICENSE",
20+
"matched_text": null
21+
}
22+
]
723
}
824
],
925
"files": [

tests/formattedcode/data/common/manifests-expected.json

Lines changed: 198 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -472,52 +472,240 @@
472472
{
473473
"identifier": "apache_2_0-d66ab77d-a5cc-7104-e702-dc7df61fe9e8",
474474
"license_expression": "apache-2.0",
475-
"detection_count": 3
475+
"detection_count": 3,
476+
"sample_matches": [
477+
{
478+
"score": 100.0,
479+
"from_file": "manifests/npm-license-mapping/package.json",
480+
"start_line": 20,
481+
"end_line": 20,
482+
"matched_length": 3,
483+
"match_coverage": 100.0,
484+
"matcher": "2-aho",
485+
"license_expression": "apache-2.0",
486+
"rule_identifier": "spdx_license_id_apache-2.0_for_apache-2.0.RULE",
487+
"rule_relevance": 100,
488+
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/spdx_license_id_apache-2.0_for_apache-2.0.RULE",
489+
"matched_text": null
490+
}
491+
]
476492
},
477493
{
478494
"identifier": "apache_2_0-ec759ae0-ea5a-f138-793e-388520e080c0",
479495
"license_expression": "apache-2.0",
480-
"detection_count": 1
496+
"detection_count": 1,
497+
"sample_matches": [
498+
{
499+
"score": 100.0,
500+
"from_file": "manifests/npm-license-mapping/package.json",
501+
"start_line": 6,
502+
"end_line": 6,
503+
"matched_length": 4,
504+
"match_coverage": 100.0,
505+
"matcher": "2-aho",
506+
"license_expression": "apache-2.0",
507+
"rule_identifier": "apache-2.0_65.RULE",
508+
"rule_relevance": 100,
509+
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_65.RULE",
510+
"matched_text": null
511+
}
512+
]
481513
},
482514
{
483515
"identifier": "cddl_1_0-c6dbef4d-659c-289f-5ee9-1ca0278edad6",
484516
"license_expression": "cddl-1.0",
485-
"detection_count": 1
517+
"detection_count": 1,
518+
"sample_matches": [
519+
{
520+
"score": 16.0,
521+
"from_file": "manifests/maven/persistence-api-1.0.pom",
522+
"start_line": 17,
523+
"end_line": 19,
524+
"matched_length": 3,
525+
"match_coverage": 100.0,
526+
"matcher": "2-aho",
527+
"license_expression": "unknown-license-reference",
528+
"rule_identifier": "license-intro_72.RULE",
529+
"rule_relevance": 16,
530+
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/license-intro_72.RULE",
531+
"matched_text": null
532+
},
533+
{
534+
"score": 82.35,
535+
"from_file": "manifests/maven/persistence-api-1.0.pom",
536+
"start_line": 18,
537+
"end_line": 20,
538+
"matched_length": 14,
539+
"match_coverage": 82.35,
540+
"matcher": "3-seq",
541+
"license_expression": "cddl-1.0",
542+
"rule_identifier": "cddl-1.0_32.RULE",
543+
"rule_relevance": 100,
544+
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/cddl-1.0_32.RULE",
545+
"matched_text": null
546+
}
547+
]
486548
},
487549
{
488550
"identifier": "cddl_1_0-dd3dd7df-afca-6a5e-492c-f7b279fdd880",
489551
"license_expression": "cddl-1.0",
490-
"detection_count": 1
552+
"detection_count": 1,
553+
"sample_matches": [
554+
{
555+
"score": 100.0,
556+
"from_file": "manifests/maven/persistence-api-1.0.pom",
557+
"start_line": 1,
558+
"end_line": 1,
559+
"matched_length": 8,
560+
"match_coverage": 100.0,
561+
"matcher": "2-aho",
562+
"license_expression": "cddl-1.0",
563+
"rule_identifier": "cddl-1.0.RULE",
564+
"rule_relevance": 100,
565+
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/cddl-1.0.RULE",
566+
"matched_text": "- name: Common Development and Distribution License (CDDL) v1.0"
567+
},
568+
{
569+
"score": 100.0,
570+
"from_file": "manifests/maven/persistence-api-1.0.pom",
571+
"start_line": 2,
572+
"end_line": 2,
573+
"matched_length": 7,
574+
"match_coverage": 100.0,
575+
"matcher": "2-aho",
576+
"license_expression": "cddl-1.0",
577+
"rule_identifier": "cddl-1.0_4.RULE",
578+
"rule_relevance": 100,
579+
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/cddl-1.0_4.RULE",
580+
"matched_text": " url: http://www.sun.com/cddl/cddl.html"
581+
}
582+
]
491583
},
492584
{
493585
"identifier": "lgpl_3_0-272571eb-5e68-95b6-ddb0-71de2d8df321",
494586
"license_expression": "lgpl-3.0",
495-
"detection_count": 2
587+
"detection_count": 2,
588+
"sample_matches": [
589+
{
590+
"score": 100.0,
591+
"from_file": "manifests/pypi/bluepyopt_setup.py",
592+
"start_line": 74,
593+
"end_line": 75,
594+
"matched_length": 10,
595+
"match_coverage": 100.0,
596+
"matcher": "2-aho",
597+
"license_expression": "lgpl-3.0",
598+
"rule_identifier": "pypi_gnu_lesser_general_public_license_v3.RULE",
599+
"rule_relevance": 100,
600+
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/pypi_gnu_lesser_general_public_license_v3.RULE",
601+
"matched_text": null
602+
}
603+
]
496604
},
497605
{
498606
"identifier": "lgpl_3_0-121be3c2-9c80-df84-d3da-8f674e4125c0",
499607
"license_expression": "lgpl-3.0",
500-
"detection_count": 1
608+
"detection_count": 1,
609+
"sample_matches": [
610+
{
611+
"score": 100.0,
612+
"from_file": "manifests/pypi/bluepyopt_setup.py",
613+
"start_line": 9,
614+
"end_line": 20,
615+
"matched_length": 106,
616+
"match_coverage": 100.0,
617+
"matcher": "2-aho",
618+
"license_expression": "lgpl-3.0",
619+
"rule_identifier": "lgpl-3.0_276.RULE",
620+
"rule_relevance": 100,
621+
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/lgpl-3.0_276.RULE",
622+
"matched_text": null
623+
}
624+
]
501625
},
502626
{
503627
"identifier": "lgpl_3_0-2db87bcf-56b4-9d7d-7075-2effae31c631",
504628
"license_expression": "lgpl-3.0",
505-
"detection_count": 1
629+
"detection_count": 1,
630+
"sample_matches": [
631+
{
632+
"score": 100.0,
633+
"from_file": "manifests/pypi/bluepyopt_setup.py",
634+
"start_line": 65,
635+
"end_line": 65,
636+
"matched_length": 2,
637+
"match_coverage": 100.0,
638+
"matcher": "2-aho",
639+
"license_expression": "lgpl-3.0",
640+
"rule_identifier": "lgpl-3.0_152.RULE",
641+
"rule_relevance": 100,
642+
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/lgpl-3.0_152.RULE",
643+
"matched_text": null
644+
}
645+
]
506646
},
507647
{
508648
"identifier": "lgpl_3_0-38174920-e8ed-7bda-41ec-94df7380b7d5",
509649
"license_expression": "lgpl-3.0",
510-
"detection_count": 1
650+
"detection_count": 1,
651+
"sample_matches": [
652+
{
653+
"score": 100.0,
654+
"from_file": "manifests/pypi/bluepyopt_setup.py",
655+
"start_line": 1,
656+
"end_line": 1,
657+
"matched_length": 1,
658+
"match_coverage": 100.0,
659+
"matcher": "1-hash",
660+
"license_expression": "lgpl-3.0",
661+
"rule_identifier": "lgpl-3.0_29.RULE",
662+
"rule_relevance": 100,
663+
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/lgpl-3.0_29.RULE",
664+
"matched_text": "LGPLv3"
665+
}
666+
]
511667
},
512668
{
513669
"identifier": "mit-3fce6ea2-8abd-6c6b-3ede-a37af7c6efee",
514670
"license_expression": "mit",
515-
"detection_count": 1
671+
"detection_count": 1,
672+
"sample_matches": [
673+
{
674+
"score": 100.0,
675+
"from_file": "manifests/npm-license-string/package.json",
676+
"start_line": 4,
677+
"end_line": 4,
678+
"matched_length": 2,
679+
"match_coverage": 100.0,
680+
"matcher": "2-aho",
681+
"license_expression": "mit",
682+
"rule_identifier": "mit_30.RULE",
683+
"rule_relevance": 100,
684+
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/mit_30.RULE",
685+
"matched_text": null
686+
}
687+
]
516688
},
517689
{
518690
"identifier": "mit-a822f434-d61f-f2b1-c792-8b8cb9e7b9bf",
519691
"license_expression": "mit",
520-
"detection_count": 1
692+
"detection_count": 1,
693+
"sample_matches": [
694+
{
695+
"score": 100.0,
696+
"from_file": "manifests/npm-license-string/package.json",
697+
"start_line": 1,
698+
"end_line": 1,
699+
"matched_length": 1,
700+
"match_coverage": 100.0,
701+
"matcher": "1-spdx-id",
702+
"license_expression": "mit",
703+
"rule_identifier": "spdx-license-identifier-mit-5da48780aba670b0860c46d899ed42a0f243ff06",
704+
"rule_relevance": 100,
705+
"rule_url": null,
706+
"matched_text": "MIT"
707+
}
708+
]
521709
}
522710
],
523711
"files": [

0 commit comments

Comments
 (0)