Skip to content

Commit 267518b

Browse files
authored
Merge pull request #3042 from nexB/datafilehandler-yield-package-first
Update DatafileHandler default methods
2 parents 6cf0585 + 78547be commit 267518b

File tree

4 files changed

+73
-33
lines changed

4 files changed

+73
-33
lines changed

CHANGELOG.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,13 @@ Important API changes:
6060
under the ``venv`` subdirectory. You mus be aware of this if you use ScanCode
6161
from a git clone
6262

63+
- ``DatafileHandler.assemble()``, ``DatafileHandler.assemble_from_many()``, and
64+
the other ``.assemble()``` methods from the other Package handlers from
65+
packagedcode, have been updated to yield Package items before Dependency or
66+
Resource items. This is particulary important in the case where we are calling
67+
the ``assemble()`` method outside of the scancode-toolkit context, where we
68+
need to ensure that a Package exists before we assocate a Resource or
69+
Dependency to it.
6370

6471
Copyright detection:
6572
~~~~~~~~~~~~~~~~~~~~

src/packagedcode/alpine.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -81,16 +81,6 @@ def assemble(cls, package_data, resource, codebase, package_adder):
8181

8282
package.license_expression = cls.compute_normalized_license(package)
8383

84-
85-
dependent_packages = package_data.dependencies
86-
if dependent_packages:
87-
yield from models.Dependency.from_dependent_packages(
88-
dependent_packages=dependent_packages,
89-
datafile_path=resource.path,
90-
datasource_id=package_data.datasource_id,
91-
package_uid=package_uid,
92-
)
93-
9484
root_path = Path(root_resource.path)
9585
# a file ref extends from the root of the filesystem
9686
file_references_by_path = {
@@ -118,6 +108,15 @@ def assemble(cls, package_data, resource, codebase, package_adder):
118108
yield package
119109
yield from resources
120110

111+
dependent_packages = package_data.dependencies
112+
if dependent_packages:
113+
yield from models.Dependency.from_dependent_packages(
114+
dependent_packages=dependent_packages,
115+
datafile_path=resource.path,
116+
datasource_id=package_data.datasource_id,
117+
package_uid=package_uid,
118+
)
119+
121120

122121
class AlpineApkbuildHandler(models.DatafileHandler):
123122
datasource_id = 'alpine_apkbuild'

src/packagedcode/debian.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -241,14 +241,18 @@ def assemble(cls, package_data, resource, codebase, package_adder):
241241
package_file_references.extend(package_data.file_references)
242242
package_uid = package.package_uid
243243

244+
dependencies = []
244245
dependent_packages = package_data.dependencies
245246
if dependent_packages:
246-
yield from models.Dependency.from_dependent_packages(
247-
dependent_packages=dependent_packages,
248-
datafile_path=resource.path,
249-
datasource_id=package_data.datasource_id,
250-
package_uid=package_uid,
247+
deps = list(
248+
models.Dependency.from_dependent_packages(
249+
dependent_packages=dependent_packages,
250+
datafile_path=resource.path,
251+
datasource_id=package_data.datasource_id,
252+
package_uid=package_uid,
253+
)
251254
)
255+
dependencies.extend(deps)
252256

253257
# Multi-Arch can be: "foreign", "same", "allowed", "all", "optional" or
254258
# empty/non-present. See https://wiki.debian.org/Multiarch/HOWTO
@@ -312,12 +316,15 @@ def assemble(cls, package_data, resource, codebase, package_adder):
312316
# yield possible dependencies
313317
dependent_packages = package_data.dependencies
314318
if dependent_packages:
315-
yield from models.Dependency.from_dependent_packages(
316-
dependent_packages=dependent_packages,
317-
datafile_path=res.path,
318-
datasource_id=package_data.datasource_id,
319-
package_uid=package_uid,
319+
deps = list(
320+
models.Dependency.from_dependent_packages(
321+
dependent_packages=dependent_packages,
322+
datafile_path=res.path,
323+
datasource_id=package_data.datasource_id,
324+
package_uid=package_uid,
325+
)
320326
)
327+
dependencies.extend(deps)
321328

322329
resources.append(res)
323330

@@ -353,6 +360,7 @@ def assemble(cls, package_data, resource, codebase, package_adder):
353360

354361
yield package
355362
yield from resources
363+
yield from dependencies
356364

357365

358366
class DebianDistrolessInstalledDatabaseHandler(models.DatafileHandler):

src/packagedcode/models.py

Lines changed: 39 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -914,6 +914,12 @@ def assemble(cls, package_data, resource, codebase, package_adder=add_to_package
914914
not be further processed,
915915
- a Dependency to add to top-level dependencies
916916
917+
Package items must be yielded before Dependency or Resource items. This
918+
is to ensure that a Package is created before we associate a Resource or
919+
Dependency to a Package. This is particulary important in the case where
920+
we are calling the `assemble()` method outside of the scancode-toolkit
921+
context.
922+
917923
The approach is to find and process all the neighboring related datafiles
918924
to this datafile at once.
919925
@@ -938,14 +944,14 @@ def assemble(cls, package_data, resource, codebase, package_adder=add_to_package
938944
if not package.license_expression:
939945
package.license_expression = cls.compute_normalized_license(package)
940946

947+
yield package
948+
941949
cls.assign_package_to_resources(
942950
package=package,
943951
resource=resource,
944952
codebase=codebase,
945953
package_adder=package_adder,
946954
)
947-
948-
yield package
949955
else:
950956
# we have no package, so deps are not for a specific package uid
951957
package_uid = None
@@ -1038,6 +1044,13 @@ def assemble_from_many(cls, pkgdata_resources, codebase, package_adder=add_to_pa
10381044
This is a convenience method that subclasses can reuse when overriding
10391045
`assemble()`
10401046
1047+
Like in ``DatafileHandler.assemble()``, Package items must be yielded
1048+
before Dependency or Resource items. This is to ensure that a Package is
1049+
created before we associate a Resource or Dependency to a Package. This
1050+
is particulary important in the case where we are calling the
1051+
``assemble()`` method outside of the scancode-toolkit context, as
1052+
``assemble()`` can call ``assemble_from_many()``.
1053+
10411054
NOTE: ATTENTION!: this may not work well for datafile that yield
10421055
multiple PackageData for unrelated Packages
10431056
"""
@@ -1047,6 +1060,12 @@ def assemble_from_many(cls, pkgdata_resources, codebase, package_adder=add_to_pa
10471060

10481061
# process each package in sequence. The first item creates a package and
10491062
# the other only update
1063+
# We are saving the Packages, Dependencies, and Resources in lists until
1064+
# after we go through `pkgdata_resources` for all Package data, then we
1065+
# yield Packages, then Dependencies, then Resources.
1066+
dependencies = []
1067+
resources = []
1068+
resources_from_package = []
10501069
for package_data, resource in pkgdata_resources:
10511070
if not base_resource:
10521071
base_resource = resource
@@ -1059,8 +1078,6 @@ def assemble_from_many(cls, pkgdata_resources, codebase, package_adder=add_to_pa
10591078
datafile_path=resource.path,
10601079
)
10611080
package_uid = package.package_uid
1062-
if package_uid:
1063-
package_adder(package_uid, resource, codebase)
10641081
else:
10651082
# FIXME: What is the package_data is NOT for the same package as package?
10661083
# FIXME: What if the update did not do anything? (it does return True or False)
@@ -1069,31 +1086,40 @@ def assemble_from_many(cls, pkgdata_resources, codebase, package_adder=add_to_pa
10691086
package_data=package_data,
10701087
datafile_path=resource.path,
10711088
)
1072-
if package_uid:
1073-
package_adder(package_uid, resource, codebase)
1089+
1090+
if package_uid:
1091+
resources_from_package.append((package_uid, resource,))
10741092

10751093
# in all cases yield possible dependencies
10761094
dependent_packages = package_data.dependencies
10771095
if dependent_packages:
1078-
yield from Dependency.from_dependent_packages(
1096+
p_deps = Dependency.from_dependent_packages(
10791097
dependent_packages=dependent_packages,
10801098
datafile_path=resource.path,
10811099
datasource_id=package_data.datasource_id,
10821100
package_uid=package_uid,
10831101
)
1102+
dependencies.extend(list(p_deps))
10841103

10851104
# we yield this as we do not want this further processed
1086-
yield resource
1087-
1088-
# the whole parent subtree of the base_resource is for this package
1089-
if package_uid:
1090-
for res in base_resource.walk(codebase):
1091-
package_adder(package_uid, res, codebase)
1105+
resources.append(resource)
10921106

1107+
# Yield Packages, Dependencies, and Resources
10931108
if package:
10941109
if not package.license_expression:
10951110
package.license_expression = cls.compute_normalized_license(package)
10961111
yield package
1112+
yield from dependencies
1113+
yield from resources
1114+
1115+
# Associate Package to Resources once they have been yielded
1116+
for package_uid, resource in resources_from_package:
1117+
package_adder(package_uid, resource, codebase)
1118+
1119+
# the whole parent subtree of the base_resource is for this package
1120+
if package_uid:
1121+
for res in base_resource.walk(codebase):
1122+
package_adder(package_uid, res, codebase)
10971123

10981124
@classmethod
10991125
def assemble_from_many_datafiles(

0 commit comments

Comments
 (0)