-
Notifications
You must be signed in to change notification settings - Fork 109
add support to store packages/archives locally #1685
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 5 commits
0cade5e
0515605
6d140c2
c845a20
ca3a1ac
993d80c
59fc6fa
dd80c83
3494bee
33fc3c7
ecf3a21
5c83405
5c452b0
d0f272f
ad6b14e
5ebe370
9384133
50fa74b
b117064
a773009
f4bad69
22c50af
7d553ca
81942b6
f818e62
18e66a3
04b6cef
e0a59f6
2b62163
5632bf7
d81ebd0
33dfe79
435b2d9
68bb105
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,7 @@ | |
# Visit https://github.com/aboutcode-org/scancode.io for support and download. | ||
|
||
from django.conf import settings | ||
from django.conf.urls.static import static | ||
from django.contrib.auth import views as auth_views | ||
from django.urls import include | ||
from django.urls import path | ||
|
@@ -54,6 +55,8 @@ | |
path("", RedirectView.as_view(url="project/")), | ||
] | ||
|
||
urlpatterns += static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do not use media for our storage, instead we are using our own thing. |
||
|
||
|
||
if settings.SCANCODEIO_ENABLE_ADMIN_SITE: | ||
urlpatterns.append(path("admin/", admin_site.urls)) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -165,6 +165,7 @@ class Meta: | |
"pipeline", | ||
"execute_now", | ||
"selected_groups", | ||
"use_local_storage", | ||
] | ||
|
||
def __init__(self, *args, **kwargs): | ||
|
@@ -178,6 +179,11 @@ def __init__(self, *args, **kwargs): | |
pipeline_choices = scanpipe_app.get_pipeline_choices(include_addon=False) | ||
self.fields["pipeline"].choices = pipeline_choices | ||
|
||
self.fields["use_local_storage"].label = "Store packages locally" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Leave forms for later |
||
self.fields["use_local_storage"].help_text = "If checked, " \ | ||
"packages will be stored on the local filesystem." | ||
self.fields["use_local_storage"].widget.attrs.update({"class": "checkbox"}) | ||
|
||
def clean_name(self): | ||
return " ".join(self.cleaned_data["name"].split()) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
# Generated by Django 5.1.1 on 2025-05-10 06:55 | ||
|
||
import django.db.models.deletion | ||
import uuid | ||
from django.db import migrations, models | ||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
('scanpipe', '0067_discoveredpackage_notes'), | ||
] | ||
|
||
operations = [ | ||
migrations.CreateModel( | ||
name='PackageArchive', | ||
fields=[ | ||
('uuid', models.UUIDField(db_index=True, default=uuid.uuid4, editable=False, primary_key=True, serialize=False, verbose_name='UUID')), | ||
('checksum_sha256', models.CharField(db_index=True, help_text='SHA256 checksum of the package archive file.', max_length=64, unique=True)), | ||
('storage_path', models.CharField(help_text='Path to the stored archive file (e.g., file:///path/to/file).', max_length=1024)), | ||
('created_date', models.DateTimeField(auto_now_add=True, help_text='Date when the archive was added to storage.')), | ||
], | ||
options={ | ||
'indexes': [models.Index(fields=['checksum_sha256'], name='checksum_idx')], | ||
}, | ||
), | ||
migrations.CreateModel( | ||
name='DownloadedPackage', | ||
fields=[ | ||
('uuid', models.UUIDField(db_index=True, default=uuid.uuid4, editable=False, primary_key=True, serialize=False, verbose_name='UUID')), | ||
('url', models.URLField(blank=True, db_index=True, help_text='URL from which the package was downloaded, if applicable.', max_length=1024)), | ||
('filename', models.CharField(help_text='Name of the package file.', max_length=255)), | ||
('download_date', models.DateTimeField(auto_now_add=True, help_text='Date when the package was downloaded or added.')), | ||
('scan_log', models.TextField(blank=True, help_text='Log output from scanning the package.')), | ||
('scan_date', models.DateTimeField(blank=True, help_text='Date when the package was scanned.', null=True)), | ||
('project', models.ForeignKey(editable=False, on_delete=django.db.models.deletion.CASCADE, related_name='downloadedpackages', to='scanpipe.project')), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure we want to put projects There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need to think how we are going to handle same package archive used in two different projects using different pipelines, or done with different SCIO versions. Example, same package archive is scanned with Additionally, we need to look into having a help text show up with projects which were run on the same package. Consider this when you build the models, but we can also update them later as this is preliminary anyway. |
||
('package_archive', models.ForeignKey(help_text='The stored archive file associated with this package.', on_delete=django.db.models.deletion.CASCADE, to='scanpipe.packagearchive')), | ||
], | ||
options={ | ||
'indexes': [models.Index(fields=['url'], name='url_idx')], | ||
'constraints': [models.UniqueConstraint(condition=models.Q(('url__gt', '')), fields=('url', 'project'), name='scanpipe_downloadedpackage_unique_url_project')], | ||
}, | ||
), | ||
] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# Generated by Django 5.1.1 on 2025-05-12 09:41 | ||
|
||
from django.db import migrations, models | ||
|
||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you please merge your migrations into one file, since they are for the same fields? |
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
('scanpipe', '0068_packagearchive_downloadedpackage'), | ||
] | ||
|
||
operations = [ | ||
migrations.AddField( | ||
model_name='packagearchive', | ||
name='package_file', | ||
field=models.FileField(blank=True, help_text='The actual package archive file (e.g., ZIP or TAR).', null=True, upload_to='packages/'), | ||
), | ||
migrations.AlterField( | ||
model_name='packagearchive', | ||
name='storage_path', | ||
field=models.CharField(blank=True, help_text='Path to the stored archive file (e.g., file:///path/to/file).', max_length=1024), | ||
), | ||
] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# Generated by Django 5.1.1 on 2025-05-26 09:19 | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you please merge your migrations into one file, since they are for the same fields? |
||
from django.db import migrations, models | ||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
('scanpipe', '0069_packagearchive_package_file_and_more'), | ||
] | ||
|
||
operations = [ | ||
migrations.AddField( | ||
model_name='project', | ||
name='use_local_storage', | ||
field=models.BooleanField(default=False, help_text='Store packages locally if enabled.'), | ||
), | ||
migrations.AlterField( | ||
model_name='packagearchive', | ||
name='package_file', | ||
field=models.FileField(blank=True, help_text='The actual package archive file ( ZIP or TAR).', null=True, upload_to='packages/'), | ||
), | ||
migrations.AlterField( | ||
model_name='packagearchive', | ||
name='storage_path', | ||
field=models.CharField(blank=True, help_text='Path to the stored archive file', max_length=1024), | ||
), | ||
] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,9 +20,15 @@ | |
# ScanCode.io is a free software code scanning tool from nexB Inc. and others. | ||
# Visit https://github.com/aboutcode-org/scancode.io for support and download. | ||
|
||
from scanpipe.pipelines.analyze_root_filesystem import RootFS | ||
import logging | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's use instead a modification of the super class, and not a modification to each of the pipelines. |
||
from pathlib import Path | ||
|
||
from scanpipe.pipelines import RootFS | ||
from scanpipe.pipes import docker | ||
from scanpipe.pipes import rootfs | ||
from scanpipe.pipes.fetch import store_package_archive | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class Docker(RootFS): | ||
|
@@ -36,6 +42,7 @@ def steps(cls): | |
cls.find_images_os_and_distro, | ||
cls.collect_images_information, | ||
cls.collect_and_create_codebase_resources, | ||
cls.store_package_archives, | ||
cls.collect_and_create_system_packages, | ||
cls.flag_uninteresting_codebase_resources, | ||
cls.flag_empty_files, | ||
|
@@ -74,6 +81,38 @@ def collect_and_create_codebase_resources(self): | |
"""Collect and labels all image files as CodebaseResources.""" | ||
for image in self.images: | ||
docker.create_codebase_resources(self.project, image) | ||
self.package_files = [] | ||
for resource in self.project.codebaseresources.filter(extension=".deb"): | ||
self.package_files.append(resource.path) | ||
logger.debug(f"Found package file: {resource.path}") | ||
|
||
def store_package_archives(self): | ||
"""Store identified package archives.""" | ||
if not self.project.use_local_storage: | ||
logger.info(f"Local storage is disabled for project: {self.project.name}." | ||
"Skipping package storage.") | ||
return [] | ||
|
||
logger.info( | ||
f"Storing package archives for project: {self.project.name}," | ||
"files: {self.package_files}" | ||
) | ||
stored_files = [] | ||
for package_path in self.package_files: | ||
if not Path(package_path).exists(): | ||
logger.error(f"Invalid or missing package path: {package_path}") | ||
continue | ||
package_path_str = str(package_path) | ||
logger.info(f"Storing package archive: {package_path_str}") | ||
try: | ||
result = store_package_archive( | ||
self.project, url=None, file_path=package_path_str | ||
) | ||
logger.info(f"Stored package archive {package_path_str}: {result}") | ||
stored_files.append(result) | ||
except Exception as e: | ||
logger.error(f"Failed to store {package_path_str}: {e}") | ||
return stored_files | ||
|
||
def collect_and_create_system_packages(self): | ||
"""Collect installed system packages for each layer based on the distro.""" | ||
|
Uh oh!
There was an error while loading. Please reload this page.