Skip to content

Commit d672650

Browse files
Support LicenseDetection creation in all pipelines
Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com>
1 parent 206fe2b commit d672650

File tree

8 files changed

+137
-3
lines changed

8 files changed

+137
-3
lines changed

scanpipe/models.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2726,6 +2726,16 @@ def with_resources_count(self):
27262726
)
27272727
return self.annotate(resources_count=count_subquery)
27282728

2729+
def has_license_detections(self):
2730+
return self.filter(
2731+
~Q(license_detections=[]) | ~Q(other_license_detections=[])
2732+
)
2733+
2734+
def has_no_license_detections(self):
2735+
return self.filter(
2736+
Q(license_detections=[]) & Q(other_license_detections=[])
2737+
)
2738+
27292739

27302740
class AbstractPackage(models.Model):
27312741
"""These fields should be kept in line with `packagedcode.models.PackageData`."""
@@ -3506,6 +3516,7 @@ class AbstractLicenseDetection(models.Model):
35063516
)
35073517

35083518
matches = models.JSONField(
3519+
_("Reference Matches"),
35093520
default=list,
35103521
blank=True,
35113522
help_text=_('List of license matches combined in this detection.'),
@@ -3547,13 +3558,18 @@ class DiscoveredLicense(
35473558
"""
35483559
license_expression_field = "license_expression"
35493560

3561+
# If this license was discovered in a extracted license statement
3562+
# this is True, and False if this was discovered in a file.
3563+
from_package = None
3564+
35503565
detection_count = models.BigIntegerField(
35513566
blank=True,
35523567
null=True,
35533568
help_text=_("Total number of this license detection discovered."),
35543569
)
35553570

35563571
file_regions = models.JSONField(
3572+
_("Detection Locations"),
35573573
default=list,
35583574
blank=True,
35593575
help_text=_(
@@ -3622,6 +3638,20 @@ def create_from_data(cls, project, detection_data):
36223638
discovered_license.save(save_error=False, capture_exception=False)
36233639
return discovered_license
36243640

3641+
def update_with_file_region(self, file_region):
3642+
"""
3643+
If the `file_region` is a new file region, include it in the
3644+
`file_regions` list and increase the `detection_count` by 1.
3645+
"""
3646+
file_region_data = file_region.to_dict()
3647+
if not file_region_data in self.file_regions:
3648+
self.file_regions.append(file_region_data)
3649+
if not self.detection_count:
3650+
self.detection_count = 1
3651+
else:
3652+
self.detection_count += 1
3653+
self.save(update_fields=["detection_count", "file_regions"])
3654+
36253655

36263656
class WebhookSubscription(UUIDPKModel, ProjectRelatedModel):
36273657
target_url = models.URLField(_("Target URL"), max_length=1024)

scanpipe/pipelines/deploy_to_develop.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ def steps(cls):
8383
cls.remove_packages_without_resources,
8484
cls.scan_unmapped_to_files,
8585
cls.scan_mapped_from_for_files,
86+
cls.collect_and_create_license_detections,
8687
cls.flag_deployed_from_resources_with_missing_license,
8788
cls.create_local_files_packages,
8889
)
@@ -286,6 +287,13 @@ def scan_mapped_from_for_files(self):
286287
scan_files = d2d.get_from_files_for_scanning(self.project.codebaseresources)
287288
scancode.scan_for_files(self.project, scan_files, progress_logger=self.log)
288289

290+
def collect_and_create_license_detections(self):
291+
"""
292+
Collect and create unique license detections from resources and
293+
package data.
294+
"""
295+
scancode.collect_and_create_license_detections(project=self.project)
296+
289297
def create_local_files_packages(self):
290298
"""Create local-files packages for codebase resources not part of a package."""
291299
d2d.create_local_files_packages(self.project)

scanpipe/pipelines/docker.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ def steps(cls):
4242
cls.flag_ignored_resources,
4343
cls.scan_for_application_packages,
4444
cls.scan_for_files,
45+
cls.collect_and_create_license_detections,
4546
cls.analyze_scanned_files,
4647
cls.flag_not_analyzed_codebase_resources,
4748
)

scanpipe/pipelines/docker_windows.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ def steps(cls):
4545
cls.flag_ignored_resources,
4646
cls.scan_for_application_packages,
4747
cls.scan_for_files,
48+
cls.collect_and_create_license_detections,
4849
cls.analyze_scanned_files,
4950
cls.flag_data_files_with_no_clues,
5051
cls.flag_not_analyzed_codebase_resources,

scanpipe/pipelines/root_filesystem.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ def steps(cls):
4545
cls.scan_for_application_packages,
4646
cls.match_not_analyzed_to_system_packages,
4747
cls.scan_for_files,
48+
cls.collect_and_create_license_detections,
4849
cls.analyze_scanned_files,
4950
cls.flag_not_analyzed_codebase_resources,
5051
)
@@ -123,6 +124,13 @@ def scan_for_files(self):
123124
"""Scan unknown resources for copyrights, licenses, emails, and urls."""
124125
scancode.scan_for_files(self.project, progress_logger=self.log)
125126

127+
def collect_and_create_license_detections(self):
128+
"""
129+
Collect and create unique license detections from resources and
130+
package data.
131+
"""
132+
scancode.collect_and_create_license_detections(project=self.project)
133+
126134
def analyze_scanned_files(self):
127135
"""Analyze single file scan results for completeness."""
128136
flag.analyze_scanned_files(self.project)

scanpipe/pipelines/scan_codebase.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ def steps(cls):
4545
cls.flag_ignored_resources,
4646
cls.scan_for_application_packages,
4747
cls.scan_for_files,
48+
cls.collect_and_create_license_detections,
4849
)
4950

5051
def copy_inputs_to_codebase_directory(self):
@@ -65,3 +66,10 @@ def scan_for_application_packages(self):
6566
def scan_for_files(self):
6667
"""Scan unknown resources for copyrights, licenses, emails, and urls."""
6768
scancode.scan_for_files(self.project, progress_logger=self.log)
69+
70+
def collect_and_create_license_detections(self):
71+
"""
72+
Collect and create unique license detections from resources and
73+
package data.
74+
"""
75+
scancode.collect_and_create_license_detections(project=self.project)

scanpipe/pipes/__init__.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -246,11 +246,18 @@ def update_or_create_dependency(
246246
return dependency
247247

248248

249-
def update_or_create_license_detection(project, detection_data):
249+
def update_or_create_license_detection(
250+
project, detection_data, resource_path, from_package=False,
251+
):
250252
"""
251253
Get, update or create a DiscoveredLicense object then return it.
252254
Use the `project` and `detection_data` mapping to lookup and creates the
253255
DiscoveredLicense using its detection identifier as a unique key.
256+
257+
Additonally if `resource_path` is passed, add the file region where
258+
the license was detected to the DiscoveredLicense object, if not present
259+
already. `from_package` is True if the license detection was in a
260+
`extracted_license_statement` from a package metadata.
254261
"""
255262
detection_identifier = detection_data["identifier"]
256263

@@ -267,13 +274,22 @@ def update_or_create_license_detection(project, detection_data):
267274
detection_data,
268275
)
269276

277+
if resource_path:
278+
file_region = scancode.get_file_region(
279+
detection_data=detection_data,
280+
resource_path=resource_path,
281+
)
282+
license_detection.update_with_file_region(file_region)
283+
284+
license_detection.from_package = from_package
270285
return license_detection
271286

272287

273288
def _clean_license_detection_data(detection_data):
274289
detection_data = detection_data.copy()
275-
matches = detection_data.pop("sample_matches")
276-
detection_data["matches"] = matches
290+
if "sample_matches" in detection_data:
291+
matches = detection_data.pop("sample_matches")
292+
detection_data["matches"] = matches
277293
return detection_data
278294

279295

scanpipe/pipes/scancode.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
from extractcode import api as extractcode_api
4040
from packagedcode import get_package_handler
4141
from packagedcode import models as packagedcode_models
42+
from licensedcode.detection import FileRegion
4243
from scancode import Scanner
4344
from scancode import api as scancode_api
4445
from scancode import cli as scancode_cli
@@ -411,6 +412,67 @@ def add_resource_to_package(package_uid, resource, project):
411412
resource.discovered_packages.add(package)
412413

413414

415+
def collect_and_create_license_detections(project):
416+
"""
417+
Create instances of DiscoveredLicense for `project` from the parsed
418+
license detections present in the CodebaseResources and
419+
DiscoveredPackages of `project`.
420+
"""
421+
logger.info(f"Project {project} collect_license_detections:")
422+
423+
for resource in project.codebaseresources.has_license_detections():
424+
logger.info(f" Processing: {resource.path} for licenses")
425+
426+
for detection_data in resource.license_detections:
427+
pipes.update_or_create_license_detection(
428+
project=project,
429+
detection_data=detection_data,
430+
resource_path=resource.path,
431+
)
432+
433+
for resource in project.codebaseresources.has_package_data():
434+
435+
for package_mapping in resource.package_data:
436+
package_data = packagedcode_models.PackageData.from_dict(
437+
mapping=package_mapping,
438+
)
439+
440+
for detection in package_data.license_detections:
441+
pipes.update_or_create_license_detection(
442+
project=project,
443+
detection_data=detection,
444+
resource_path=resource.path,
445+
from_package=True,
446+
)
447+
448+
for detection in package_data.other_license_detections:
449+
pipes.update_or_create_license_detection(
450+
project=project,
451+
detection_data=detection,
452+
resource_path=resource.path,
453+
from_package=True,
454+
)
455+
456+
457+
def get_file_region(detection_data, resource_path):
458+
"""
459+
From a LicenseDetection mapping `detection_data`, create a FileRegion
460+
object containing information about where this license was detected
461+
exactly in a codebase, with `resource_path`, with start and end lines.
462+
"""
463+
start_line = min(
464+
[match['start_line'] for match in detection_data["matches"]]
465+
)
466+
end_line = max(
467+
[match['end_line'] for match in detection_data["matches"]]
468+
)
469+
return FileRegion(
470+
path=resource_path,
471+
start_line=start_line,
472+
end_line=end_line,
473+
)
474+
475+
414476
def assemble_packages(project):
415477
"""
416478
Create instances of DiscoveredPackage and DiscoveredDependency for `project`

0 commit comments

Comments
 (0)