9
9
10
10
import os
11
11
import logging
12
+ from collections import Counter
12
13
from pathlib import Path
13
14
14
15
from commoncode import fileutils
@@ -137,6 +138,7 @@ def parse(cls, location):
137
138
debian_data = get_paragraph_data_from_file (location = location ),
138
139
datasource_id = cls .datasource_id ,
139
140
package_type = cls .default_package_type ,
141
+ distro = 'debian' ,
140
142
)
141
143
142
144
@classmethod
@@ -157,15 +159,19 @@ class DebianControlFileInSourceHandler(models.DatafileHandler):
157
159
158
160
@classmethod
159
161
def parse (cls , location ):
160
- # TODO: we cannot know the distro from the name only
161
162
# NOTE: a control file in a source repo or debina.tar tarball can contain more than one package
163
+ debian_packages = []
162
164
for debian_data in get_paragraphs_data_from_file (location = location ):
163
- yield build_package_data (
164
- debian_data ,
165
- datasource_id = cls .datasource_id ,
166
- package_type = cls .default_package_type ,
165
+ debian_packages .append (
166
+ build_package_data (
167
+ debian_data = debian_data ,
168
+ datasource_id = cls .datasource_id ,
169
+ package_type = cls .default_package_type ,
170
+ )
167
171
)
168
172
173
+ yield from populate_debian_namespace (debian_packages )
174
+
169
175
@classmethod
170
176
def assign_package_to_resources (cls , package , resource , codebase , package_adder ):
171
177
# two levels up
@@ -191,11 +197,19 @@ def parse(cls, location):
191
197
location = location ,
192
198
remove_pgp_signature = True ,
193
199
)
194
- yield build_package_data (
200
+
201
+ package_data_from_file = build_package_data_from_package_filename (
202
+ filename = os .path .basename (location ),
203
+ datasource_id = cls .datasource_id ,
204
+ package_type = cls .default_package_type ,
205
+ )
206
+ package_data = build_package_data (
195
207
debian_data = debian_data ,
196
208
datasource_id = cls .datasource_id ,
197
209
package_type = cls .default_package_type ,
198
210
)
211
+ package_data .update_purl_fields (package_data = package_data_from_file )
212
+ yield package_data
199
213
200
214
@classmethod
201
215
def assign_package_to_resources (cls , package , resource , codebase , package_adder ):
@@ -214,13 +228,18 @@ class DebianInstalledStatusDatabaseHandler(models.DatafileHandler):
214
228
def parse (cls , location ):
215
229
# note that we do not know yet the distro at this stage
216
230
# we could get it... but we get that later during assemble()
217
- for debian_data in get_paragraphs_data_from_file (location ):
218
- yield build_package_data (
219
- debian_data ,
220
- datasource_id = cls .datasource_id ,
221
- package_type = cls .default_package_type ,
231
+ debian_packages = []
232
+ for debian_data in get_paragraphs_data_from_file (location = location ):
233
+ debian_packages .append (
234
+ build_package_data (
235
+ debian_data = debian_data ,
236
+ datasource_id = cls .datasource_id ,
237
+ package_type = cls .default_package_type ,
238
+ )
222
239
)
223
240
241
+ yield from populate_debian_namespace (debian_packages )
242
+
224
243
@classmethod
225
244
def assemble (cls , package_data , resource , codebase , package_adder ):
226
245
# get the root resource of the rootfs
@@ -260,7 +279,7 @@ def assemble(cls, package_data, resource, codebase, package_adder):
260
279
261
280
# We only need to adjust the md5sum/list path in the case of `same`
262
281
qualifiers = package_data .qualifiers or {}
263
- architecture = qualifiers .get ('architecture ' )
282
+ architecture = qualifiers .get ('arch ' )
264
283
265
284
multi_arch = package_data .extra_data .get ('multi_arch' )
266
285
@@ -305,6 +324,7 @@ def assemble(cls, package_data, resource, codebase, package_adder):
305
324
package .update (
306
325
package_data = package_data ,
307
326
datafile_path = res .path ,
327
+ check_compatible = False ,
308
328
replace = False ,
309
329
include_version = False ,
310
330
include_qualifiers = False ,
@@ -379,14 +399,18 @@ def parse(cls, location):
379
399
rootfs installation. distroless is derived from Debian but each package
380
400
has its own status file.
381
401
"""
382
- for debian_data in get_paragraphs_data_from_file (location ):
383
- yield build_package_data (
384
- debian_data ,
385
- datasource_id = cls .datasource_id ,
386
- package_type = cls .default_package_type ,
387
- distro = 'distroless' ,
402
+ debian_packages = []
403
+ for debian_data in get_paragraphs_data_from_file (location = location ):
404
+ debian_packages .append (
405
+ build_package_data (
406
+ debian_data = debian_data ,
407
+ datasource_id = cls .datasource_id ,
408
+ package_type = cls .default_package_type ,
409
+ )
388
410
)
389
411
412
+ yield from populate_debian_namespace (debian_packages )
413
+
390
414
@classmethod
391
415
def assemble (cls , package_data , resource , codebase , package_adder ):
392
416
# get the root resource of the rootfs
@@ -523,6 +547,9 @@ def build_package_data_from_package_filename(filename, datasource_id, package_ty
523
547
"""
524
548
525
549
# TODO: we cannot know the distro from the name only
550
+ # PURLs without namespace is invalid, so we need to
551
+ # have a default value for this
552
+ distro = 'debian'
526
553
deb = DebArchive .from_filename (filename = filename )
527
554
528
555
if deb .architecture :
@@ -538,6 +565,7 @@ def build_package_data_from_package_filename(filename, datasource_id, package_ty
538
565
datasource_id = datasource_id ,
539
566
type = package_type ,
540
567
name = deb .name ,
568
+ namespace = distro ,
541
569
version = version ,
542
570
qualifiers = qualifiers ,
543
571
)
@@ -598,7 +626,7 @@ def build_package_data(debian_data, datasource_id, package_type='deb', distro=No
598
626
qualifiers = {}
599
627
architecture = debian_data .get ('architecture' )
600
628
if architecture :
601
- qualifiers ['architecture ' ] = architecture
629
+ qualifiers ['arch ' ] = architecture
602
630
603
631
extra_data = {}
604
632
# Multi-Arch can be: "foreign", "same", "allowed", "all", "optional" or
@@ -628,13 +656,27 @@ def build_package_data(debian_data, datasource_id, package_type='deb', distro=No
628
656
if keyword :
629
657
keywords .append (keyword )
630
658
659
+ # Get distro/namespace information from clues in package data
660
+ if not distro :
661
+ if version :
662
+ for clue , namespace in version_clues_for_namespace .items ():
663
+ if clue in version :
664
+ distro = namespace
665
+ break
666
+
667
+ if maintainer :
668
+ for clue , namespace in maintainer_clues_for_namespace .items ():
669
+ if clue in maintainer :
670
+ distro = namespace
671
+ break
672
+
631
673
source_packages = []
632
674
source = debian_data .get ('source' )
633
675
if source :
634
676
source_pkg_purl = PackageURL (
635
677
type = package_type ,
636
678
name = source ,
637
- namespace = distro
679
+ namespace = distro ,
638
680
).to_string ()
639
681
640
682
source_packages .append (source_pkg_purl )
@@ -656,6 +698,46 @@ def build_package_data(debian_data, datasource_id, package_type='deb', distro=No
656
698
)
657
699
658
700
701
+ def populate_debian_namespace (packages ):
702
+ """
703
+ For an iterable of debian `packages`, populate the
704
+ most frequently occuring namespace, or the default
705
+ namespace 'debian' in packages without namespace.
706
+ """
707
+ if not packages :
708
+ return
709
+
710
+ namespaces_with_count = Counter ([
711
+ package .namespace
712
+ for package in packages
713
+ ])
714
+ distro = max (namespaces_with_count , key = namespaces_with_count .get )
715
+ if not distro :
716
+ distro = 'debian'
717
+
718
+ for package in packages :
719
+ if not package .namespace :
720
+ package .namespace = distro
721
+ yield package
722
+
723
+
724
+ version_clues_for_namespace = {
725
+ 'deb' : 'debian' ,
726
+ 'ubuntu' : 'ubuntu' ,
727
+ }
728
+
729
+
730
+ maintainer_clues_for_namespace = {
731
+ 'packages.debian.org' : 'debian' ,
732
+ 'lists.debian.org' : 'debian' ,
733
+ 'lists.alioth.debian.org' : 'debian' ,
734
+ '@debian.org' : 'debian' ,
735
+ 'debian-init-diversity@' : 'debian' ,
736
+ 'lists.ubuntu.com' : 'ubuntu' ,
737
+ '@canonical.com' : 'ubuntu' ,
738
+ }
739
+
740
+
659
741
ignored_root_dirs = {
660
742
'/.' ,
661
743
'/bin' ,
0 commit comments