-
Notifications
You must be signed in to change notification settings - Fork 109
add support to store packages/archives locally #1685
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
0cade5e
0515605
6d140c2
c845a20
ca3a1ac
993d80c
59fc6fa
dd80c83
3494bee
33fc3c7
ecf3a21
5c83405
5c452b0
d0f272f
ad6b14e
5ebe370
9384133
50fa74b
b117064
a773009
f4bad69
22c50af
7d553ca
81942b6
f818e62
18e66a3
04b6cef
e0a59f6
2b62163
5632bf7
d81ebd0
33dfe79
435b2d9
68bb105
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,7 @@ | |
# Visit https://github.com/aboutcode-org/scancode.io for support and download. | ||
|
||
from django.conf import settings | ||
from django.conf.urls.static import static | ||
from django.contrib.auth import views as auth_views | ||
from django.urls import include | ||
from django.urls import path | ||
|
@@ -54,6 +55,8 @@ | |
path("", RedirectView.as_view(url="project/")), | ||
] | ||
|
||
urlpatterns += static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do not use media for our storage, instead we are using our own thing. |
||
|
||
|
||
if settings.SCANCODEIO_ENABLE_ADMIN_SITE: | ||
urlpatterns.append(path("admin/", admin_site.urls)) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -165,6 +165,7 @@ class Meta: | |
"pipeline", | ||
"execute_now", | ||
"selected_groups", | ||
"use_local_storage", | ||
] | ||
|
||
def __init__(self, *args, **kwargs): | ||
|
@@ -178,6 +179,12 @@ def __init__(self, *args, **kwargs): | |
pipeline_choices = scanpipe_app.get_pipeline_choices(include_addon=False) | ||
self.fields["pipeline"].choices = pipeline_choices | ||
|
||
self.fields["use_local_storage"].label = "Store packages locally" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Leave forms for later |
||
self.fields[ | ||
"use_local_storage" | ||
].help_text = "If checked, packages will be stored on the local filesystem." | ||
self.fields["use_local_storage"].widget.attrs.update({"class": "checkbox"}) | ||
|
||
def clean_name(self): | ||
return " ".join(self.cleaned_data["name"].split()) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
# Generated by Django 5.1.1 on 2025-07-09 | ||
|
||
import django.db.models.deletion | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No model change so no migrations. |
||
import uuid | ||
from django.db import migrations, models | ||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
('scanpipe', '0067_discoveredpackage_notes'), | ||
] | ||
|
||
operations = [ | ||
migrations.CreateModel( | ||
name='PackageArchive', | ||
fields=[ | ||
('uuid', models.UUIDField( | ||
db_index=True, default=uuid.uuid4, editable=False, primary_key=True, | ||
serialize=False, verbose_name='UUID' | ||
)), | ||
('checksum_sha256', models.CharField( | ||
db_index=True, help_text='SHA256 checksum of the package archive file.', | ||
max_length=64, unique=True | ||
)), | ||
('storage_path', models.CharField( | ||
blank=True, help_text='Path to the stored archive file', max_length=1024 | ||
)), | ||
('created_date', models.DateTimeField( | ||
auto_now_add=True, help_text='Date when the archive was added to storage.' | ||
)), | ||
('package_file', models.FileField( | ||
blank=True, help_text='The actual package archive file ( ZIP or TAR).', | ||
null=True, upload_to='packages/' | ||
)), | ||
], | ||
options={ | ||
'indexes': [models.Index(fields=['checksum_sha256'], name='checksum_idx')], | ||
}, | ||
), | ||
|
||
migrations.CreateModel( | ||
name='DownloadedPackage', | ||
fields=[ | ||
('uuid', models.UUIDField( | ||
db_index=True, default=uuid.uuid4, editable=False, primary_key=True, | ||
serialize=False, verbose_name='UUID' | ||
)), | ||
('url', models.URLField( | ||
blank=True, db_index=True, help_text='URL from which the package was downloaded, if applicable.', | ||
max_length=1024 | ||
)), | ||
('filename', models.CharField( | ||
help_text='Name of the package file.', max_length=255 | ||
)), | ||
('download_date', models.DateTimeField( | ||
auto_now_add=True, help_text='Date when the package was downloaded or added.' | ||
)), | ||
('scan_log', models.TextField( | ||
blank=True, help_text='Log output from scanning the package.' | ||
)), | ||
('scan_date', models.DateTimeField( | ||
blank=True, help_text='Date when the package was scanned.', null=True | ||
)), | ||
('project', models.ForeignKey( | ||
editable=False, on_delete=django.db.models.deletion.CASCADE, | ||
related_name='downloadedpackages', to='scanpipe.project' | ||
)), | ||
('package_archive', models.ForeignKey( | ||
help_text='The stored archive file associated with this package.', | ||
on_delete=django.db.models.deletion.CASCADE, to='scanpipe.packagearchive' | ||
)), | ||
], | ||
options={ | ||
'indexes': [models.Index(fields=['url'], name='url_idx')], | ||
'constraints': [ | ||
models.UniqueConstraint( | ||
condition=models.Q(('url__gt', '')), | ||
fields=('url', 'project'), | ||
name='scanpipe_downloadedpackage_unique_url_project' | ||
), | ||
], | ||
}, | ||
), | ||
|
||
migrations.AddField( | ||
model_name='project', | ||
name='use_local_storage', | ||
field=models.BooleanField( | ||
default=False, help_text='Store packages locally if enabled.' | ||
), | ||
), | ||
] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -585,6 +585,7 @@ class Project(UUIDPKModel, ExtraDataFieldMixin, UpdateMixin, models.Model): | |
) | ||
notes = models.TextField(blank=True) | ||
settings = models.JSONField(default=dict, blank=True) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Based on the latest review, we do not need models yet. |
||
labels = TaggableManager(through=UUIDTaggedItem, ordering=["name"]) | ||
purl = models.CharField( | ||
max_length=2048, | ||
|
@@ -597,6 +598,10 @@ class Project(UUIDPKModel, ExtraDataFieldMixin, UpdateMixin, models.Model): | |
), | ||
) | ||
|
||
labels = TaggableManager(through=UUIDTaggedItem) | ||
use_local_storage = models.BooleanField( | ||
default=False, help_text="Store packages locally if enabled." | ||
) | ||
objects = ProjectQuerySet.as_manager() | ||
|
||
class Meta: | ||
|
@@ -4393,6 +4398,116 @@ def success(self): | |
return self.response_status_code in (200, 201, 202) | ||
|
||
|
||
class PackageArchive(UUIDPKModel): | ||
""" | ||
Stores metadata about a package archive file stored in the project's storage. | ||
Each archive is uniquely identified by its SHA256 checksum. | ||
""" | ||
|
||
checksum_sha256 = models.CharField( | ||
max_length=64, | ||
unique=True, | ||
db_index=True, | ||
help_text=_("SHA256 checksum of the package archive file."), | ||
) | ||
storage_path = models.CharField( | ||
max_length=1024, | ||
blank=True, | ||
help_text=_("Path to the stored archive file"), | ||
) | ||
package_file = models.FileField( | ||
upload_to="packages/", | ||
null=True, | ||
blank=True, | ||
help_text=_("The actual package archive file ( ZIP or TAR)."), | ||
) | ||
created_date = models.DateTimeField( | ||
auto_now_add=True, | ||
help_text=_("Date when the archive was added to storage."), | ||
) | ||
|
||
class Meta: | ||
pass | ||
|
||
def __str__(self): | ||
return ( | ||
f"Archive {self.checksum_sha256[:8]} at " | ||
f" {self.storage_path or self.package_file.name}" | ||
) | ||
|
||
|
||
class DownloadedPackage(UUIDPKModel): | ||
""" | ||
Tracks packages downloaded or provided as input for a project, linked to a | ||
PackageArchive. Each instance represents a package associated with a project, | ||
including its source URL (if downloaded) and scan details. | ||
""" | ||
|
||
project = models.ForeignKey( | ||
Project, | ||
related_name="downloadedpackages", | ||
on_delete=models.CASCADE, | ||
editable=False, | ||
) | ||
url = models.URLField( | ||
max_length=1024, | ||
db_index=True, | ||
blank=True, | ||
help_text=_("URL from which the package was downloaded, if applicable."), | ||
) | ||
filename = models.CharField( | ||
max_length=255, | ||
help_text=_("Name of the package file."), | ||
) | ||
download_date = models.DateTimeField( | ||
auto_now_add=True, | ||
help_text=_("Date when the package was downloaded or added."), | ||
) | ||
scan_log = models.TextField( | ||
blank=True, | ||
help_text=_("Log output from scanning the package."), | ||
) | ||
scan_date = models.DateTimeField( | ||
null=True, | ||
blank=True, | ||
help_text=_("Date when the package was scanned."), | ||
) | ||
package_archive = models.ForeignKey( | ||
PackageArchive, | ||
on_delete=models.CASCADE, | ||
help_text=_("The stored archive file associated with this package."), | ||
) | ||
scancode_version = models.CharField( | ||
max_length=50, | ||
blank=True, | ||
help_text=_("ScanCode version used for scanning."), | ||
) | ||
pipeline_name = models.CharField( | ||
max_length=100, | ||
blank=True, | ||
help_text=_("Pipeline used to process the package."), | ||
) | ||
|
||
class Meta: | ||
indexes = [ | ||
models.Index(fields=["url"], name="url_idx"), | ||
] | ||
constraints = [ | ||
models.UniqueConstraint( | ||
fields=["url", "project"], | ||
condition=Q(url__gt=""), | ||
name="%(app_label)s_%(class)s_unique_url_project", | ||
), | ||
models.UniqueConstraint( | ||
fields=["project", "package_archive"], | ||
name="%(app_label)s_%(class)s_unique_project_archive", | ||
), | ||
] | ||
|
||
def __str__(self): | ||
return f"{self.filename} for project {self.project.name}" | ||
|
||
|
||
@receiver(models.signals.post_save, sender=settings.AUTH_USER_MODEL) | ||
def create_auth_token(sender, instance=None, created=False, **kwargs): | ||
"""Create an API key token on user creation, using the signal system.""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,9 +20,15 @@ | |
# ScanCode.io is a free software code scanning tool from nexB Inc. and others. | ||
# Visit https://github.com/aboutcode-org/scancode.io for support and download. | ||
|
||
import logging | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's use instead a modification of the super class, and not a modification to each of the pipelines. |
||
from pathlib import Path | ||
|
||
from scanpipe.pipelines.analyze_root_filesystem import RootFS | ||
from scanpipe.pipes import docker | ||
from scanpipe.pipes import rootfs | ||
from scanpipe.pipes.fetch import store_package_archive | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class Docker(RootFS): | ||
|
@@ -36,6 +42,7 @@ def steps(cls): | |
cls.find_images_os_and_distro, | ||
cls.collect_images_information, | ||
cls.collect_and_create_codebase_resources, | ||
cls.store_package_archives, | ||
cls.collect_and_create_system_packages, | ||
cls.flag_uninteresting_codebase_resources, | ||
cls.flag_empty_files, | ||
|
@@ -74,6 +81,40 @@ def collect_and_create_codebase_resources(self): | |
"""Collect and labels all image files as CodebaseResources.""" | ||
for image in self.images: | ||
docker.create_codebase_resources(self.project, image) | ||
self.package_files = [] | ||
for resource in self.project.codebaseresources.filter(extension=".deb"): | ||
self.package_files.append(resource.path) | ||
logger.debug(f"Found package file: {resource.path}") | ||
|
||
def store_package_archives(self): | ||
"""Store identified package archives.""" | ||
if not self.project.use_local_storage: | ||
logger.info( | ||
f"Local storage is disabled for project: {self.project.name}." | ||
"Skipping package storage." | ||
) | ||
return [] | ||
|
||
logger.info( | ||
f"Storing package archives for project: {self.project.name}," | ||
"files: {self.package_files}" | ||
) | ||
stored_files = [] | ||
for package_path in self.package_files: | ||
if not Path(package_path).exists(): | ||
logger.error(f"Invalid or missing package path: {package_path}") | ||
continue | ||
package_path_str = str(package_path) | ||
logger.info(f"Storing package archive: {package_path_str}") | ||
try: | ||
result = store_package_archive( | ||
self.project, url=None, file_path=package_path_str | ||
) | ||
logger.info(f"Stored package archive {package_path_str}: {result}") | ||
stored_files.append(result) | ||
except Exception as e: | ||
logger.error(f"Failed to store {package_path_str}: {e}") | ||
return stored_files | ||
|
||
def collect_and_create_system_packages(self): | ||
"""Collect installed system packages for each layer based on the distro.""" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Based on our review session what about using this instead?