From a5343b1f0ea0865397c2272e11f7baa6cafb0741 Mon Sep 17 00:00:00 2001 From: Aayush Kumar Date: Thu, 12 Jun 2025 15:51:19 +0530 Subject: [PATCH 01/14] Add support for storing top-level paths of the codebase Signed-off-by: Aayush Kumar --- scanpipe/pipes/rootfs.py | 7 +++++++ scanpipe/tests/test_pipelines.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/scanpipe/pipes/rootfs.py b/scanpipe/pipes/rootfs.py index 9c623491a..144b8239d 100644 --- a/scanpipe/pipes/rootfs.py +++ b/scanpipe/pipes/rootfs.py @@ -139,6 +139,13 @@ def get_res(parent, fname): rootfs_path=rootfs_path, ) + if with_dir: + rootfs_path = pipes.normalize_path("") + yield Resource( + location=location, + rootfs_path=rootfs_path, + ) + for top, dirs, files in os.walk(location): for f in files: yield get_res(parent=top, fname=f) diff --git a/scanpipe/tests/test_pipelines.py b/scanpipe/tests/test_pipelines.py index 0852bc841..854c396f5 100644 --- a/scanpipe/tests/test_pipelines.py +++ b/scanpipe/tests/test_pipelines.py @@ -1209,7 +1209,7 @@ def test_scanpipe_rootfs_pipeline_integration(self): exitcode, out = pipeline.execute() self.assertEqual(0, exitcode, msg=out) - self.assertEqual(16, project1.codebaseresources.count()) + self.assertEqual(17, project1.codebaseresources.count()) self.assertEqual(2, project1.discoveredpackages.count()) self.assertEqual(0, project1.discovereddependencies.count()) From dfab26b60bf17064cf1920b59d5d36f43c8f786b Mon Sep 17 00:00:00 2001 From: Aayush Kumar Date: Thu, 12 Jun 2025 18:36:59 +0530 Subject: [PATCH 02/14] Add `ancestor` field to CodebasResource to track parent path of a resource Signed-off-by: Aayush Kumar --- ...0073_codebaseresource_ancestor_and_more.py | 22 +++++++++++++++++++ scanpipe/models.py | 9 ++++++++ scanpipe/pipes/__init__.py | 2 ++ 3 files changed, 33 insertions(+) create mode 100644 scanpipe/migrations/0073_codebaseresource_ancestor_and_more.py diff --git a/scanpipe/migrations/0073_codebaseresource_ancestor_and_more.py b/scanpipe/migrations/0073_codebaseresource_ancestor_and_more.py new file mode 100644 index 000000000..6d6952e6e --- /dev/null +++ b/scanpipe/migrations/0073_codebaseresource_ancestor_and_more.py @@ -0,0 +1,22 @@ +# Generated by Django 5.1.9 on 2025-06-12 10:32 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('scanpipe', '0072_discovereddependency_uuid_unique'), + ] + + operations = [ + migrations.AddField( + model_name='codebaseresource', + name='ancestor', + field=models.CharField(blank=True, help_text="Path of the immediate parent directory of this resource. Its '.' for top-level resources.", max_length=2000, null=True), + ), + migrations.AddIndex( + model_name='codebaseresource', + index=models.Index(fields=['project', 'ancestor'], name='scanpipe_co_project_f1a160_idx'), + ), + ] diff --git a/scanpipe/models.py b/scanpipe/models.py index 7bc6d1207..cdcd94c3f 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -2695,6 +2695,14 @@ class CodebaseResource( 'Eg.: "/usr/bin/bash" for a path of "tarball-extract/rootfs/usr/bin/bash"' ), ) + + ancestor = models.CharField( + max_length=2000, + null=True, + blank=True, + help_text="Path of the immediate parent directory of this resource. Its '.' for top-level resources.", + ) + status = models.CharField( blank=True, max_length=50, @@ -2788,6 +2796,7 @@ class Meta: models.Index(fields=["compliance_alert"]), models.Index(fields=["is_binary"]), models.Index(fields=["is_text"]), + models.Index(fields=["project", "ancestor"]), ] constraints = [ models.UniqueConstraint( diff --git a/scanpipe/pipes/__init__.py b/scanpipe/pipes/__init__.py index 18a5d72c7..d5e0cdd73 100644 --- a/scanpipe/pipes/__init__.py +++ b/scanpipe/pipes/__init__.py @@ -71,6 +71,7 @@ def make_codebase_resource(project, location, save=True, **extra_fields): from scanpipe.pipes import flag relative_path = Path(location).relative_to(project.codebase_path) + parent_path = str(relative_path.parent) try: resource_data = scancode.get_resource_info(location=str(location)) except OSError as error: @@ -91,6 +92,7 @@ def make_codebase_resource(project, location, save=True, **extra_fields): codebase_resource = CodebaseResource( project=project, path=relative_path, + ancestor=parent_path, **resource_data, ) From 71ab4d7b98c07c5353c899dc4bab45dafc0607b6 Mon Sep 17 00:00:00 2001 From: Aayush Kumar Date: Fri, 13 Jun 2025 16:03:24 +0530 Subject: [PATCH 03/14] fix line too long error in scanpipe/models.py Signed-off-by: Aayush Kumar --- scanpipe/models.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scanpipe/models.py b/scanpipe/models.py index cdcd94c3f..f68e508e6 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -2700,7 +2700,10 @@ class CodebaseResource( max_length=2000, null=True, blank=True, - help_text="Path of the immediate parent directory of this resource. Its '.' for top-level resources.", + help_text=_( + "Path of the immediate parent directory of a resource. " + "For top level resources the value is '.'" + ), ) status = models.CharField( From e417f9deab8440e0c3fe37fafc15ee52f57671ca Mon Sep 17 00:00:00 2001 From: Aayush Kumar Date: Fri, 13 Jun 2025 17:15:02 +0530 Subject: [PATCH 04/14] update tests Signed-off-by: Aayush Kumar --- .../rootfs/basic-rootfs_root_filesystems.json | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/scanpipe/tests/data/rootfs/basic-rootfs_root_filesystems.json b/scanpipe/tests/data/rootfs/basic-rootfs_root_filesystems.json index 970d67200..b64c4115f 100644 --- a/scanpipe/tests/data/rootfs/basic-rootfs_root_filesystems.json +++ b/scanpipe/tests/data/rootfs/basic-rootfs_root_filesystems.json @@ -340,6 +340,42 @@ ], "dependencies": [], "files": [ + { + "path": "basic-rootfs.tar.gz-extract", + "type": "directory", + "name": "basic-rootfs.tar.gz-extract", + "status": "scanned", + "for_packages": [], + "tag": "", + "extension": ".tar.gz-extract", + "programming_language": "", + "detected_license_expression": "", + "detected_license_expression_spdx": "", + "license_detections": [], + "license_clues": [], + "percentage_of_license_text": null, + "copyrights": [], + "holders": [], + "authors": [], + "package_data": [], + "emails": [], + "urls": [], + "md5": "", + "sha1": "", + "sha256": "", + "sha512": "", + "sha1_git": "", + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_legal": false, + "is_manifest": false, + "is_readme": false, + "is_top_level": true, + "is_key_file": false, + "extra_data": {} + }, { "path": "basic-rootfs.tar.gz-extract/etc", "type": "directory", From ecb4ab67c8a9e261ac8fb28d6621b299cef927e5 Mon Sep 17 00:00:00 2001 From: Aayush Kumar Date: Sat, 14 Jun 2025 17:51:30 +0530 Subject: [PATCH 05/14] rename `ancestor` field to `parent_directory_path` Signed-off-by: Aayush Kumar --- ...0073_codebaseresource_ancestor_and_more.py | 22 ------------------- ...resource_parent_directory_path_and_more.py | 22 +++++++++++++++++++ scanpipe/models.py | 6 ++--- scanpipe/pipes/__init__.py | 6 ++++- 4 files changed, 30 insertions(+), 26 deletions(-) delete mode 100644 scanpipe/migrations/0073_codebaseresource_ancestor_and_more.py create mode 100644 scanpipe/migrations/0073_codebaseresource_parent_directory_path_and_more.py diff --git a/scanpipe/migrations/0073_codebaseresource_ancestor_and_more.py b/scanpipe/migrations/0073_codebaseresource_ancestor_and_more.py deleted file mode 100644 index 6d6952e6e..000000000 --- a/scanpipe/migrations/0073_codebaseresource_ancestor_and_more.py +++ /dev/null @@ -1,22 +0,0 @@ -# Generated by Django 5.1.9 on 2025-06-12 10:32 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('scanpipe', '0072_discovereddependency_uuid_unique'), - ] - - operations = [ - migrations.AddField( - model_name='codebaseresource', - name='ancestor', - field=models.CharField(blank=True, help_text="Path of the immediate parent directory of this resource. Its '.' for top-level resources.", max_length=2000, null=True), - ), - migrations.AddIndex( - model_name='codebaseresource', - index=models.Index(fields=['project', 'ancestor'], name='scanpipe_co_project_f1a160_idx'), - ), - ] diff --git a/scanpipe/migrations/0073_codebaseresource_parent_directory_path_and_more.py b/scanpipe/migrations/0073_codebaseresource_parent_directory_path_and_more.py new file mode 100644 index 000000000..561a459c5 --- /dev/null +++ b/scanpipe/migrations/0073_codebaseresource_parent_directory_path_and_more.py @@ -0,0 +1,22 @@ +# Generated by Django 5.1.9 on 2025-06-14 10:11 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('scanpipe', '0072_discovereddependency_uuid_unique'), + ] + + operations = [ + migrations.AddField( + model_name='codebaseresource', + name='parent_directory_path', + field=models.CharField(blank=True, help_text='Path of the immediate parent directory of a resource. For top level resources the value is set to None', max_length=2000, null=True), + ), + migrations.AddIndex( + model_name='codebaseresource', + index=models.Index(fields=['project', 'parent_directory_path'], name='scanpipe_co_project_f4a24b_idx'), + ), + ] diff --git a/scanpipe/models.py b/scanpipe/models.py index f68e508e6..6f01f469e 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -2696,13 +2696,13 @@ class CodebaseResource( ), ) - ancestor = models.CharField( + parent_directory_path = models.CharField( max_length=2000, null=True, blank=True, help_text=_( "Path of the immediate parent directory of a resource. " - "For top level resources the value is '.'" + "For top level resources the value is set to None" ), ) @@ -2799,7 +2799,7 @@ class Meta: models.Index(fields=["compliance_alert"]), models.Index(fields=["is_binary"]), models.Index(fields=["is_text"]), - models.Index(fields=["project", "ancestor"]), + models.Index(fields=["project", "parent_directory_path"]), ] constraints = [ models.UniqueConstraint( diff --git a/scanpipe/pipes/__init__.py b/scanpipe/pipes/__init__.py index d5e0cdd73..b8aa36cac 100644 --- a/scanpipe/pipes/__init__.py +++ b/scanpipe/pipes/__init__.py @@ -72,6 +72,10 @@ def make_codebase_resource(project, location, save=True, **extra_fields): relative_path = Path(location).relative_to(project.codebase_path) parent_path = str(relative_path.parent) + + if parent_path == ".": + parent_path = None + try: resource_data = scancode.get_resource_info(location=str(location)) except OSError as error: @@ -92,7 +96,7 @@ def make_codebase_resource(project, location, save=True, **extra_fields): codebase_resource = CodebaseResource( project=project, path=relative_path, - ancestor=parent_path, + parent_directory_path=parent_path, **resource_data, ) From 94c276f1d44775cfdbd15ca7e9dab402c88ea68e Mon Sep 17 00:00:00 2001 From: Aayush Kumar Date: Sat, 14 Jun 2025 23:59:01 +0530 Subject: [PATCH 06/14] add save() method to CodebaseResource to ensure `parent_directory_path` is always set Signed-off-by: Aayush Kumar --- scanpipe/models.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scanpipe/models.py b/scanpipe/models.py index 6f01f469e..a98a36681 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -2812,6 +2812,11 @@ class Meta: def __str__(self): return self.path + def save(self, *args, **kwargs): + if self.path and not self.parent_directory_path: + self.parent_directory_path = parent_directory(str(self.path), with_trail=False) + super().save(*args, **kwargs) + def get_absolute_url(self): return reverse("resource_detail", args=[self.project.slug, self.path]) From 08687b0db3cfa20dc6d965e7405bc882b23ab862 Mon Sep 17 00:00:00 2001 From: Aayush Kumar Date: Sun, 15 Jun 2025 00:23:59 +0530 Subject: [PATCH 07/14] add tests Signed-off-by: Aayush Kumar --- scanpipe/tests/test_models.py | 10 ++++++ scanpipe/tests/test_pipelines.py | 58 ++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/scanpipe/tests/test_models.py b/scanpipe/tests/test_models.py index 67601d601..2a877a227 100644 --- a/scanpipe/tests/test_models.py +++ b/scanpipe/tests/test_models.py @@ -1645,6 +1645,16 @@ def test_scanpipe_can_compute_compliance_alert_for_license_exceptions(self): resource.update(detected_license_expression=license_expression) self.assertEqual("warning", resource.compute_compliance_alert()) + def test_scanpipe_codebase_root_parent_directory_path(self): + resource1 = self.project1.codebaseresources.create(path="file") + + self.assertEqual("", resource1.parent_directory_path) + + def test_scanpipe_codebase_regular_parent_directory_path(self): + resource2 = self.project1.codebaseresources.create(path="dir1/dir2/file") + + self.assertEqual("dir1/dir2", resource2.parent_directory_path) + def test_scanpipe_scan_fields_model_mixin_methods(self): expected = [ "detected_license_expression", diff --git a/scanpipe/tests/test_pipelines.py b/scanpipe/tests/test_pipelines.py index 854c396f5..ff251d709 100644 --- a/scanpipe/tests/test_pipelines.py +++ b/scanpipe/tests/test_pipelines.py @@ -863,6 +863,64 @@ def test_scanpipe_scan_codebase_pipeline_integration(self): expected_file = self.data / "scancode" / "is-npm-1.0.0_scan_codebase.json" self.assertPipelineResultEqual(expected_file, result_file) + def test_scanpipe_scan_codebase_creates_top_level_paths(self): + pipeline_name = "scan_codebase" + project1 = make_project() + + filename = "is-npm-1.0.0.tgz" + input_location = self.data / "scancode" / filename + project1.copy_input_from(input_location) + + run = project1.add_pipeline(pipeline_name) + pipeline = run.make_pipeline_instance() + + exitcode, out = pipeline.execute() + self.assertEqual(0, exitcode, msg=out) + + expected_top_level_paths = ["is-npm-1.0.0.tgz", "is-npm-1.0.0.tgz-extract"] + + top_level_resources = project1.codebaseresources.filter( + parent_directory_path=None + ) + top_level_paths = [res.path for res in top_level_resources] + + self.assertListEqual(top_level_paths, expected_top_level_paths) + + def test_scanpipe_scan_codebase_creates_parent_directory_path_field(self): + pipeline_name = "scan_codebase" + project1 = make_project() + + filename = "is-npm-1.0.0.tgz" + input_location = self.data / "scancode" / filename + project1.copy_input_from(input_location) + + run = project1.add_pipeline(pipeline_name) + pipeline = run.make_pipeline_instance() + + exitcode, out = pipeline.execute() + self.assertEqual(0, exitcode, msg=out) + + expected_top_level_paths = ["is-npm-1.0.0.tgz", "is-npm-1.0.0.tgz-extract"] + expected_nested_paths = [ + "is-npm-1.0.0.tgz-extract/package/index.js", + "is-npm-1.0.0.tgz-extract/package/package.json", + "is-npm-1.0.0.tgz-extract/package/readme.md", + ] + + top_level_resources = project1.codebaseresources.filter( + parent_directory_path=None + ) + top_level_paths = [res.path for res in top_level_resources] + + self.assertListEqual(top_level_paths, expected_top_level_paths) + + nested_resources = project1.codebaseresources.filter( + parent_directory_path="is-npm-1.0.0.tgz-extract/package" + ) + nested_paths = [res.path for res in nested_resources] + + self.assertListEqual(nested_paths, expected_nested_paths) + def test_scanpipe_inspect_packages_creates_packages_npm(self): pipeline_name = "inspect_packages" project1 = make_project() From 474ce179340b14f0a66ab1668580478661f447db Mon Sep 17 00:00:00 2001 From: Aayush Kumar Date: Sun, 15 Jun 2025 00:29:18 +0530 Subject: [PATCH 08/14] fix code format Signed-off-by: Aayush Kumar --- scanpipe/models.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scanpipe/models.py b/scanpipe/models.py index a98a36681..964acd7fd 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -2814,7 +2814,9 @@ def __str__(self): def save(self, *args, **kwargs): if self.path and not self.parent_directory_path: - self.parent_directory_path = parent_directory(str(self.path), with_trail=False) + self.parent_directory_path = parent_directory( + str(self.path), with_trail=False + ) super().save(*args, **kwargs) def get_absolute_url(self): From 8476d63942a415993c63af9ed9be6d646fd5b993 Mon Sep 17 00:00:00 2001 From: Aayush Kumar Date: Tue, 17 Jun 2025 00:35:47 +0530 Subject: [PATCH 09/14] rename parent_directory_path field to parent_path Signed-off-by: Aayush Kumar --- ...e.py => 0073_codebaseresource_parent_path_and_more.py} | 6 +++--- scanpipe/models.py | 8 ++++---- scanpipe/pipes/__init__.py | 2 +- scanpipe/tests/test_models.py | 8 ++++---- scanpipe/tests/test_pipelines.py | 8 ++++---- 5 files changed, 16 insertions(+), 16 deletions(-) rename scanpipe/migrations/{0073_codebaseresource_parent_directory_path_and_more.py => 0073_codebaseresource_parent_path_and_more.py} (73%) diff --git a/scanpipe/migrations/0073_codebaseresource_parent_directory_path_and_more.py b/scanpipe/migrations/0073_codebaseresource_parent_path_and_more.py similarity index 73% rename from scanpipe/migrations/0073_codebaseresource_parent_directory_path_and_more.py rename to scanpipe/migrations/0073_codebaseresource_parent_path_and_more.py index 561a459c5..ee1f5dd4a 100644 --- a/scanpipe/migrations/0073_codebaseresource_parent_directory_path_and_more.py +++ b/scanpipe/migrations/0073_codebaseresource_parent_path_and_more.py @@ -1,4 +1,4 @@ -# Generated by Django 5.1.9 on 2025-06-14 10:11 +# Generated by Django 5.1.9 on 2025-06-16 17:42 from django.db import migrations, models @@ -12,11 +12,11 @@ class Migration(migrations.Migration): operations = [ migrations.AddField( model_name='codebaseresource', - name='parent_directory_path', + name='parent_path', field=models.CharField(blank=True, help_text='Path of the immediate parent directory of a resource. For top level resources the value is set to None', max_length=2000, null=True), ), migrations.AddIndex( model_name='codebaseresource', - index=models.Index(fields=['project', 'parent_directory_path'], name='scanpipe_co_project_f4a24b_idx'), + index=models.Index(fields=['project', 'parent_path'], name='scanpipe_co_project_008448_idx'), ), ] diff --git a/scanpipe/models.py b/scanpipe/models.py index 964acd7fd..41afebbd9 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -2696,7 +2696,7 @@ class CodebaseResource( ), ) - parent_directory_path = models.CharField( + parent_path = models.CharField( max_length=2000, null=True, blank=True, @@ -2799,7 +2799,7 @@ class Meta: models.Index(fields=["compliance_alert"]), models.Index(fields=["is_binary"]), models.Index(fields=["is_text"]), - models.Index(fields=["project", "parent_directory_path"]), + models.Index(fields=["project", "parent_path"]), ] constraints = [ models.UniqueConstraint( @@ -2813,8 +2813,8 @@ def __str__(self): return self.path def save(self, *args, **kwargs): - if self.path and not self.parent_directory_path: - self.parent_directory_path = parent_directory( + if self.path and not self.parent_path: + self.parent_path = parent_directory( str(self.path), with_trail=False ) super().save(*args, **kwargs) diff --git a/scanpipe/pipes/__init__.py b/scanpipe/pipes/__init__.py index b8aa36cac..63dc4e4c2 100644 --- a/scanpipe/pipes/__init__.py +++ b/scanpipe/pipes/__init__.py @@ -96,7 +96,7 @@ def make_codebase_resource(project, location, save=True, **extra_fields): codebase_resource = CodebaseResource( project=project, path=relative_path, - parent_directory_path=parent_path, + parent_path=parent_path, **resource_data, ) diff --git a/scanpipe/tests/test_models.py b/scanpipe/tests/test_models.py index 2a877a227..1912f0ba1 100644 --- a/scanpipe/tests/test_models.py +++ b/scanpipe/tests/test_models.py @@ -1645,15 +1645,15 @@ def test_scanpipe_can_compute_compliance_alert_for_license_exceptions(self): resource.update(detected_license_expression=license_expression) self.assertEqual("warning", resource.compute_compliance_alert()) - def test_scanpipe_codebase_root_parent_directory_path(self): + def test_scanpipe_codebase_root_parent_path(self): resource1 = self.project1.codebaseresources.create(path="file") - self.assertEqual("", resource1.parent_directory_path) + self.assertEqual("", resource1.parent_path) - def test_scanpipe_codebase_regular_parent_directory_path(self): + def test_scanpipe_codebase_regular_parent_path(self): resource2 = self.project1.codebaseresources.create(path="dir1/dir2/file") - self.assertEqual("dir1/dir2", resource2.parent_directory_path) + self.assertEqual("dir1/dir2", resource2.parent_path) def test_scanpipe_scan_fields_model_mixin_methods(self): expected = [ diff --git a/scanpipe/tests/test_pipelines.py b/scanpipe/tests/test_pipelines.py index ff251d709..ffc368ad5 100644 --- a/scanpipe/tests/test_pipelines.py +++ b/scanpipe/tests/test_pipelines.py @@ -880,13 +880,13 @@ def test_scanpipe_scan_codebase_creates_top_level_paths(self): expected_top_level_paths = ["is-npm-1.0.0.tgz", "is-npm-1.0.0.tgz-extract"] top_level_resources = project1.codebaseresources.filter( - parent_directory_path=None + parent_path=None ) top_level_paths = [res.path for res in top_level_resources] self.assertListEqual(top_level_paths, expected_top_level_paths) - def test_scanpipe_scan_codebase_creates_parent_directory_path_field(self): + def test_scanpipe_scan_codebase_creates_parent_path_field(self): pipeline_name = "scan_codebase" project1 = make_project() @@ -908,14 +908,14 @@ def test_scanpipe_scan_codebase_creates_parent_directory_path_field(self): ] top_level_resources = project1.codebaseresources.filter( - parent_directory_path=None + parent_path=None ) top_level_paths = [res.path for res in top_level_resources] self.assertListEqual(top_level_paths, expected_top_level_paths) nested_resources = project1.codebaseresources.filter( - parent_directory_path="is-npm-1.0.0.tgz-extract/package" + parent_path="is-npm-1.0.0.tgz-extract/package" ) nested_paths = [res.path for res in nested_resources] From 62e94622fbe761ab5a05111cc32429a6fb9dd65c Mon Sep 17 00:00:00 2001 From: Aayush Kumar Date: Tue, 17 Jun 2025 00:38:49 +0530 Subject: [PATCH 10/14] fix code format Signed-off-by: Aayush Kumar --- scanpipe/models.py | 4 +--- scanpipe/tests/test_pipelines.py | 8 ++------ 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/scanpipe/models.py b/scanpipe/models.py index 41afebbd9..f176e69ad 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -2814,9 +2814,7 @@ def __str__(self): def save(self, *args, **kwargs): if self.path and not self.parent_path: - self.parent_path = parent_directory( - str(self.path), with_trail=False - ) + self.parent_path = parent_directory(str(self.path), with_trail=False) super().save(*args, **kwargs) def get_absolute_url(self): diff --git a/scanpipe/tests/test_pipelines.py b/scanpipe/tests/test_pipelines.py index ffc368ad5..14cdc117a 100644 --- a/scanpipe/tests/test_pipelines.py +++ b/scanpipe/tests/test_pipelines.py @@ -879,9 +879,7 @@ def test_scanpipe_scan_codebase_creates_top_level_paths(self): expected_top_level_paths = ["is-npm-1.0.0.tgz", "is-npm-1.0.0.tgz-extract"] - top_level_resources = project1.codebaseresources.filter( - parent_path=None - ) + top_level_resources = project1.codebaseresources.filter(parent_path=None) top_level_paths = [res.path for res in top_level_resources] self.assertListEqual(top_level_paths, expected_top_level_paths) @@ -907,9 +905,7 @@ def test_scanpipe_scan_codebase_creates_parent_path_field(self): "is-npm-1.0.0.tgz-extract/package/readme.md", ] - top_level_resources = project1.codebaseresources.filter( - parent_path=None - ) + top_level_resources = project1.codebaseresources.filter(parent_path=None) top_level_paths = [res.path for res in top_level_resources] self.assertListEqual(top_level_paths, expected_top_level_paths) From 15296e9676c27d1451c887306e6dd3df305b95e7 Mon Sep 17 00:00:00 2001 From: Aayush Kumar Date: Tue, 17 Jun 2025 17:49:33 +0530 Subject: [PATCH 11/14] minor fixes and adjustments following review feedback Signed-off-by: Aayush Kumar --- .../0073_codebaseresource_parent_path_and_more.py | 2 +- scanpipe/models.py | 10 ++++++---- scanpipe/pipes/rootfs.py | 3 ++- scanpipe/tests/test_models.py | 2 +- scanpipe/tests/test_pipelines.py | 6 +++--- 5 files changed, 13 insertions(+), 10 deletions(-) diff --git a/scanpipe/migrations/0073_codebaseresource_parent_path_and_more.py b/scanpipe/migrations/0073_codebaseresource_parent_path_and_more.py index ee1f5dd4a..b5bd8fc44 100644 --- a/scanpipe/migrations/0073_codebaseresource_parent_path_and_more.py +++ b/scanpipe/migrations/0073_codebaseresource_parent_path_and_more.py @@ -13,7 +13,7 @@ class Migration(migrations.Migration): migrations.AddField( model_name='codebaseresource', name='parent_path', - field=models.CharField(blank=True, help_text='Path of the immediate parent directory of a resource. For top level resources the value is set to None', max_length=2000, null=True), + field=models.CharField(blank=True, help_text='The path of the resource\'s parent directory. Set to None for top-level (root) resources. Used to efficiently retrieve a directory\'s contents.', max_length=2000, null=True), ), migrations.AddIndex( model_name='codebaseresource', diff --git a/scanpipe/models.py b/scanpipe/models.py index f176e69ad..0e0d6f5db 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -2701,8 +2701,9 @@ class CodebaseResource( null=True, blank=True, help_text=_( - "Path of the immediate parent directory of a resource. " - "For top level resources the value is set to None" + "The path of the resource's parent directory. " + "Set to None for top-level (root) resources. " + "Used to efficiently retrieve a directory's contents." ), ) @@ -2814,7 +2815,7 @@ def __str__(self): def save(self, *args, **kwargs): if self.path and not self.parent_path: - self.parent_path = parent_directory(str(self.path), with_trail=False) + self.parent_path = self.parent_directory() super().save(*args, **kwargs) def get_absolute_url(self): @@ -2887,7 +2888,8 @@ def get_path_segments_with_subpath(self): def parent_directory(self): """Return the parent path for this CodebaseResource or None.""" - return parent_directory(self.path, with_trail=False) + parent_path = parent_directory(str(self.path), with_trail=False) + return None if parent_path == "" else parent_path def has_parent(self): """ diff --git a/scanpipe/pipes/rootfs.py b/scanpipe/pipes/rootfs.py index 144b8239d..95325d38d 100644 --- a/scanpipe/pipes/rootfs.py +++ b/scanpipe/pipes/rootfs.py @@ -139,8 +139,9 @@ def get_res(parent, fname): rootfs_path=rootfs_path, ) + # Explicitly yields the root directory as a resource when `with_dir` is True if with_dir: - rootfs_path = pipes.normalize_path("") + rootfs_path = "/" yield Resource( location=location, rootfs_path=rootfs_path, diff --git a/scanpipe/tests/test_models.py b/scanpipe/tests/test_models.py index 1912f0ba1..142e46a98 100644 --- a/scanpipe/tests/test_models.py +++ b/scanpipe/tests/test_models.py @@ -1648,7 +1648,7 @@ def test_scanpipe_can_compute_compliance_alert_for_license_exceptions(self): def test_scanpipe_codebase_root_parent_path(self): resource1 = self.project1.codebaseresources.create(path="file") - self.assertEqual("", resource1.parent_path) + self.assertIsNone(resource1.parent_path) def test_scanpipe_codebase_regular_parent_path(self): resource2 = self.project1.codebaseresources.create(path="dir1/dir2/file") diff --git a/scanpipe/tests/test_pipelines.py b/scanpipe/tests/test_pipelines.py index 14cdc117a..10643282c 100644 --- a/scanpipe/tests/test_pipelines.py +++ b/scanpipe/tests/test_pipelines.py @@ -880,7 +880,7 @@ def test_scanpipe_scan_codebase_creates_top_level_paths(self): expected_top_level_paths = ["is-npm-1.0.0.tgz", "is-npm-1.0.0.tgz-extract"] top_level_resources = project1.codebaseresources.filter(parent_path=None) - top_level_paths = [res.path for res in top_level_resources] + top_level_paths = [resource.path for resource in top_level_resources] self.assertListEqual(top_level_paths, expected_top_level_paths) @@ -906,14 +906,14 @@ def test_scanpipe_scan_codebase_creates_parent_path_field(self): ] top_level_resources = project1.codebaseresources.filter(parent_path=None) - top_level_paths = [res.path for res in top_level_resources] + top_level_paths = [resource.path for resource in top_level_resources] self.assertListEqual(top_level_paths, expected_top_level_paths) nested_resources = project1.codebaseresources.filter( parent_path="is-npm-1.0.0.tgz-extract/package" ) - nested_paths = [res.path for res in nested_resources] + nested_paths = [resource.path for resource in nested_resources] self.assertListEqual(nested_paths, expected_nested_paths) From 5bebfb2288bc290a899648c4d167a2e0e30295ad Mon Sep 17 00:00:00 2001 From: Aayush Kumar Date: Tue, 17 Jun 2025 18:26:14 +0530 Subject: [PATCH 12/14] Simplify return statement in `parent_directory` for better readability Signed-off-by: Aayush Kumar --- scanpipe/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scanpipe/models.py b/scanpipe/models.py index 0e0d6f5db..5448ccc38 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -2889,7 +2889,7 @@ def get_path_segments_with_subpath(self): def parent_directory(self): """Return the parent path for this CodebaseResource or None.""" parent_path = parent_directory(str(self.path), with_trail=False) - return None if parent_path == "" else parent_path + return parent_path or None def has_parent(self): """ From d1635776f937bf32ae7d31b4c85dd40985e6010b Mon Sep 17 00:00:00 2001 From: Aayush Kumar Date: Tue, 8 Jul 2025 20:03:32 +0530 Subject: [PATCH 13/14] bump migration Signed-off-by: Aayush Kumar --- ...nd_more.py => 0074_codebaseresource_parent_path_and_more.py} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename scanpipe/migrations/{0073_codebaseresource_parent_path_and_more.py => 0074_codebaseresource_parent_path_and_more.py} (91%) diff --git a/scanpipe/migrations/0073_codebaseresource_parent_path_and_more.py b/scanpipe/migrations/0074_codebaseresource_parent_path_and_more.py similarity index 91% rename from scanpipe/migrations/0073_codebaseresource_parent_path_and_more.py rename to scanpipe/migrations/0074_codebaseresource_parent_path_and_more.py index b5bd8fc44..332bf3faa 100644 --- a/scanpipe/migrations/0073_codebaseresource_parent_path_and_more.py +++ b/scanpipe/migrations/0074_codebaseresource_parent_path_and_more.py @@ -6,7 +6,7 @@ class Migration(migrations.Migration): dependencies = [ - ('scanpipe', '0072_discovereddependency_uuid_unique'), + ('scanpipe', '0073_add_sha1_git_checksum'), ] operations = [ From 2b5b2f7548c71917ed18e81f26515b6e45579a7e Mon Sep 17 00:00:00 2001 From: Aayush Kumar Date: Wed, 9 Jul 2025 01:22:06 +0530 Subject: [PATCH 14/14] update parent_path to display root files on empty string instead of None to align with the code format Signed-off-by: Aayush Kumar --- .../migrations/0074_codebaseresource_parent_path_and_more.py | 2 +- scanpipe/models.py | 3 +-- scanpipe/pipes/__init__.py | 2 +- scanpipe/tests/test_models.py | 2 +- scanpipe/tests/test_pipelines.py | 4 ++-- 5 files changed, 6 insertions(+), 7 deletions(-) diff --git a/scanpipe/migrations/0074_codebaseresource_parent_path_and_more.py b/scanpipe/migrations/0074_codebaseresource_parent_path_and_more.py index 332bf3faa..efd41fe53 100644 --- a/scanpipe/migrations/0074_codebaseresource_parent_path_and_more.py +++ b/scanpipe/migrations/0074_codebaseresource_parent_path_and_more.py @@ -13,7 +13,7 @@ class Migration(migrations.Migration): migrations.AddField( model_name='codebaseresource', name='parent_path', - field=models.CharField(blank=True, help_text='The path of the resource\'s parent directory. Set to None for top-level (root) resources. Used to efficiently retrieve a directory\'s contents.', max_length=2000, null=True), + field=models.CharField(blank=True, help_text='The path of the resource\'s parent directory. Set to None for top-level (root) resources. Used to efficiently retrieve a directory\'s contents.', max_length=2000), ), migrations.AddIndex( model_name='codebaseresource', diff --git a/scanpipe/models.py b/scanpipe/models.py index 5448ccc38..944446d5a 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -2698,7 +2698,6 @@ class CodebaseResource( parent_path = models.CharField( max_length=2000, - null=True, blank=True, help_text=_( "The path of the resource's parent directory. " @@ -2815,7 +2814,7 @@ def __str__(self): def save(self, *args, **kwargs): if self.path and not self.parent_path: - self.parent_path = self.parent_directory() + self.parent_path = self.parent_directory() or "" super().save(*args, **kwargs) def get_absolute_url(self): diff --git a/scanpipe/pipes/__init__.py b/scanpipe/pipes/__init__.py index 63dc4e4c2..1352c6d59 100644 --- a/scanpipe/pipes/__init__.py +++ b/scanpipe/pipes/__init__.py @@ -74,7 +74,7 @@ def make_codebase_resource(project, location, save=True, **extra_fields): parent_path = str(relative_path.parent) if parent_path == ".": - parent_path = None + parent_path = "" try: resource_data = scancode.get_resource_info(location=str(location)) diff --git a/scanpipe/tests/test_models.py b/scanpipe/tests/test_models.py index 142e46a98..1912f0ba1 100644 --- a/scanpipe/tests/test_models.py +++ b/scanpipe/tests/test_models.py @@ -1648,7 +1648,7 @@ def test_scanpipe_can_compute_compliance_alert_for_license_exceptions(self): def test_scanpipe_codebase_root_parent_path(self): resource1 = self.project1.codebaseresources.create(path="file") - self.assertIsNone(resource1.parent_path) + self.assertEqual("", resource1.parent_path) def test_scanpipe_codebase_regular_parent_path(self): resource2 = self.project1.codebaseresources.create(path="dir1/dir2/file") diff --git a/scanpipe/tests/test_pipelines.py b/scanpipe/tests/test_pipelines.py index 10643282c..40b567ba0 100644 --- a/scanpipe/tests/test_pipelines.py +++ b/scanpipe/tests/test_pipelines.py @@ -879,7 +879,7 @@ def test_scanpipe_scan_codebase_creates_top_level_paths(self): expected_top_level_paths = ["is-npm-1.0.0.tgz", "is-npm-1.0.0.tgz-extract"] - top_level_resources = project1.codebaseresources.filter(parent_path=None) + top_level_resources = project1.codebaseresources.filter(parent_path="") top_level_paths = [resource.path for resource in top_level_resources] self.assertListEqual(top_level_paths, expected_top_level_paths) @@ -905,7 +905,7 @@ def test_scanpipe_scan_codebase_creates_parent_path_field(self): "is-npm-1.0.0.tgz-extract/package/readme.md", ] - top_level_resources = project1.codebaseresources.filter(parent_path=None) + top_level_resources = project1.codebaseresources.filter(parent_path="") top_level_paths = [resource.path for resource in top_level_resources] self.assertListEqual(top_level_paths, expected_top_level_paths)