Skip to content

Commit 8491f06

Browse files
Support LicenseDetection creation in all pipelines
Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com>
1 parent 52ad26a commit 8491f06

File tree

8 files changed

+137
-3
lines changed

8 files changed

+137
-3
lines changed

scanpipe/models.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2684,6 +2684,16 @@ def with_resources_count(self):
26842684
)
26852685
return self.annotate(resources_count=count_subquery)
26862686

2687+
def has_license_detections(self):
2688+
return self.filter(
2689+
~Q(license_detections=[]) | ~Q(other_license_detections=[])
2690+
)
2691+
2692+
def has_no_license_detections(self):
2693+
return self.filter(
2694+
Q(license_detections=[]) & Q(other_license_detections=[])
2695+
)
2696+
26872697

26882698
class AbstractPackage(models.Model):
26892699
"""These fields should be kept in line with `packagedcode.models.PackageData`."""
@@ -3456,6 +3466,7 @@ class AbstractLicenseDetection(models.Model):
34563466
)
34573467

34583468
matches = models.JSONField(
3469+
_("Reference Matches"),
34593470
default=list,
34603471
blank=True,
34613472
help_text=_('List of license matches combined in this detection.'),
@@ -3497,13 +3508,18 @@ class DiscoveredLicense(
34973508
"""
34983509
license_expression_field = "license_expression"
34993510

3511+
# If this license was discovered in a extracted license statement
3512+
# this is True, and False if this was discovered in a file.
3513+
from_package = None
3514+
35003515
detection_count = models.BigIntegerField(
35013516
blank=True,
35023517
null=True,
35033518
help_text=_("Total number of this license detection discovered."),
35043519
)
35053520

35063521
file_regions = models.JSONField(
3522+
_("Detection Locations"),
35073523
default=list,
35083524
blank=True,
35093525
help_text=_(
@@ -3572,6 +3588,20 @@ def create_from_data(cls, project, detection_data):
35723588
discovered_license.save(save_error=False, capture_exception=False)
35733589
return discovered_license
35743590

3591+
def update_with_file_region(self, file_region):
3592+
"""
3593+
If the `file_region` is a new file region, include it in the
3594+
`file_regions` list and increase the `detection_count` by 1.
3595+
"""
3596+
file_region_data = file_region.to_dict()
3597+
if not file_region_data in self.file_regions:
3598+
self.file_regions.append(file_region_data)
3599+
if not self.detection_count:
3600+
self.detection_count = 1
3601+
else:
3602+
self.detection_count += 1
3603+
self.save(update_fields=["detection_count", "file_regions"])
3604+
35753605

35763606
class WebhookSubscription(UUIDPKModel, ProjectRelatedModel):
35773607
target_url = models.URLField(_("Target URL"), max_length=1024)

scanpipe/pipelines/deploy_to_develop.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ def steps(cls):
8383
cls.remove_packages_without_resources,
8484
cls.scan_unmapped_to_files,
8585
cls.scan_mapped_from_for_files,
86+
cls.collect_and_create_license_detections,
8687
cls.flag_deployed_from_resources_with_missing_license,
8788
cls.create_local_files_packages,
8889
)
@@ -286,6 +287,13 @@ def scan_mapped_from_for_files(self):
286287
scan_files = d2d.get_from_files_for_scanning(self.project.codebaseresources)
287288
scancode.scan_for_files(self.project, scan_files, progress_logger=self.log)
288289

290+
def collect_and_create_license_detections(self):
291+
"""
292+
Collect and create unique license detections from resources and
293+
package data.
294+
"""
295+
scancode.collect_and_create_license_detections(project=self.project)
296+
289297
def create_local_files_packages(self):
290298
"""Create local-files packages for codebase resources not part of a package."""
291299
d2d.create_local_files_packages(self.project)

scanpipe/pipelines/docker.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ def steps(cls):
4242
cls.flag_ignored_resources,
4343
cls.scan_for_application_packages,
4444
cls.scan_for_files,
45+
cls.collect_and_create_license_detections,
4546
cls.analyze_scanned_files,
4647
cls.flag_not_analyzed_codebase_resources,
4748
)

scanpipe/pipelines/docker_windows.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ def steps(cls):
4545
cls.flag_ignored_resources,
4646
cls.scan_for_application_packages,
4747
cls.scan_for_files,
48+
cls.collect_and_create_license_detections,
4849
cls.analyze_scanned_files,
4950
cls.flag_data_files_with_no_clues,
5051
cls.flag_not_analyzed_codebase_resources,

scanpipe/pipelines/root_filesystem.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ def steps(cls):
4545
cls.scan_for_application_packages,
4646
cls.match_not_analyzed_to_system_packages,
4747
cls.scan_for_files,
48+
cls.collect_and_create_license_detections,
4849
cls.analyze_scanned_files,
4950
cls.flag_not_analyzed_codebase_resources,
5051
)
@@ -123,6 +124,13 @@ def scan_for_files(self):
123124
"""Scan unknown resources for copyrights, licenses, emails, and urls."""
124125
scancode.scan_for_files(self.project, progress_logger=self.log)
125126

127+
def collect_and_create_license_detections(self):
128+
"""
129+
Collect and create unique license detections from resources and
130+
package data.
131+
"""
132+
scancode.collect_and_create_license_detections(project=self.project)
133+
126134
def analyze_scanned_files(self):
127135
"""Analyze single file scan results for completeness."""
128136
flag.analyze_scanned_files(self.project)

scanpipe/pipelines/scan_codebase.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ def steps(cls):
4545
cls.flag_ignored_resources,
4646
cls.scan_for_application_packages,
4747
cls.scan_for_files,
48+
cls.collect_and_create_license_detections,
4849
)
4950

5051
def copy_inputs_to_codebase_directory(self):
@@ -65,3 +66,10 @@ def scan_for_application_packages(self):
6566
def scan_for_files(self):
6667
"""Scan unknown resources for copyrights, licenses, emails, and urls."""
6768
scancode.scan_for_files(self.project, progress_logger=self.log)
69+
70+
def collect_and_create_license_detections(self):
71+
"""
72+
Collect and create unique license detections from resources and
73+
package data.
74+
"""
75+
scancode.collect_and_create_license_detections(project=self.project)

scanpipe/pipes/__init__.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -238,11 +238,18 @@ def update_or_create_dependency(
238238
return dependency
239239

240240

241-
def update_or_create_license_detection(project, detection_data):
241+
def update_or_create_license_detection(
242+
project, detection_data, resource_path, from_package=False,
243+
):
242244
"""
243245
Get, update or create a DiscoveredLicense object then return it.
244246
Use the `project` and `detection_data` mapping to lookup and creates the
245247
DiscoveredLicense using its detection identifier as a unique key.
248+
249+
Additonally if `resource_path` is passed, add the file region where
250+
the license was detected to the DiscoveredLicense object, if not present
251+
already. `from_package` is True if the license detection was in a
252+
`extracted_license_statement` from a package metadata.
246253
"""
247254
detection_identifier = detection_data["identifier"]
248255

@@ -259,13 +266,22 @@ def update_or_create_license_detection(project, detection_data):
259266
detection_data,
260267
)
261268

269+
if resource_path:
270+
file_region = scancode.get_file_region(
271+
detection_data=detection_data,
272+
resource_path=resource_path,
273+
)
274+
license_detection.update_with_file_region(file_region)
275+
276+
license_detection.from_package = from_package
262277
return license_detection
263278

264279

265280
def _clean_license_detection_data(detection_data):
266281
detection_data = detection_data.copy()
267-
matches = detection_data.pop("sample_matches")
268-
detection_data["matches"] = matches
282+
if "sample_matches" in detection_data:
283+
matches = detection_data.pop("sample_matches")
284+
detection_data["matches"] = matches
269285
return detection_data
270286

271287

scanpipe/pipes/scancode.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
from extractcode import api as extractcode_api
4040
from packagedcode import get_package_handler
4141
from packagedcode import models as packagedcode_models
42+
from licensedcode.detection import FileRegion
4243
from scancode import Scanner
4344
from scancode import api as scancode_api
4445
from scancode import cli as scancode_cli
@@ -401,6 +402,67 @@ def add_resource_to_package(package_uid, resource, project):
401402
resource.discovered_packages.add(package)
402403

403404

405+
def collect_and_create_license_detections(project):
406+
"""
407+
Create instances of DiscoveredLicense for `project` from the parsed
408+
license detections present in the CodebaseResources and
409+
DiscoveredPackages of `project`.
410+
"""
411+
logger.info(f"Project {project} collect_license_detections:")
412+
413+
for resource in project.codebaseresources.has_license_detections():
414+
logger.info(f" Processing: {resource.path} for licenses")
415+
416+
for detection_data in resource.license_detections:
417+
pipes.update_or_create_license_detection(
418+
project=project,
419+
detection_data=detection_data,
420+
resource_path=resource.path,
421+
)
422+
423+
for resource in project.codebaseresources.has_package_data():
424+
425+
for package_mapping in resource.package_data:
426+
package_data = packagedcode_models.PackageData.from_dict(
427+
mapping=package_mapping,
428+
)
429+
430+
for detection in package_data.license_detections:
431+
pipes.update_or_create_license_detection(
432+
project=project,
433+
detection_data=detection,
434+
resource_path=resource.path,
435+
from_package=True,
436+
)
437+
438+
for detection in package_data.other_license_detections:
439+
pipes.update_or_create_license_detection(
440+
project=project,
441+
detection_data=detection,
442+
resource_path=resource.path,
443+
from_package=True,
444+
)
445+
446+
447+
def get_file_region(detection_data, resource_path):
448+
"""
449+
From a LicenseDetection mapping `detection_data`, create a FileRegion
450+
object containing information about where this license was detected
451+
exactly in a codebase, with `resource_path`, with start and end lines.
452+
"""
453+
start_line = min(
454+
[match['start_line'] for match in detection_data["matches"]]
455+
)
456+
end_line = max(
457+
[match['end_line'] for match in detection_data["matches"]]
458+
)
459+
return FileRegion(
460+
path=resource_path,
461+
start_line=start_line,
462+
end_line=end_line,
463+
)
464+
465+
404466
def assemble_packages(project):
405467
"""
406468
Create instances of DiscoveredPackage and DiscoveredDependency for `project`

0 commit comments

Comments
 (0)