aboutcode-org · VarshaUN · Jan 6, 2025 · Feb 26, 2025 · Feb 26, 2025 · Feb 26, 2025
diff --git a/src/packagedcode/pypi.py b/src/packagedcode/pypi.py
@@ -45,6 +45,7 @@
 from packagedcode.utils import yield_dependencies_from_package_data
 from packagedcode.utils import yield_dependencies_from_package_resource
 from packagedcode.utils import get_base_purl
+from packagedcode.utils import is_private_package
 
 try:
     from zipfile import Path as ZipPath
@@ -473,7 +474,8 @@ def parse(cls, location, package_only=False):
         description = project_data.get('description') or ''
         description = description.strip()
 
-        urls, extra_data = get_urls(metainfo=project_data, name=name, version=version)
+        is_private = is_private_package(project_data.get('classifiers', []))
+        urls, extra_data = get_urls(metainfo=project_data, name=name, version=version,is_private=is_private)
 
         extracted_license_statement, license_file = get_declared_license(project_data)
         if license_file:
@@ -504,12 +506,13 @@ def parse(cls, location, package_only=False):
             keywords=get_keywords(project_data),
             parties=get_pyproject_toml_parties(project_data),
             dependencies=dependencies,
+            is_private=is_private,
             extra_data=extra_data,
             **urls,
+            download_url=urls.get('download'),
         )
         yield models.PackageData.from_data(package_data, package_only)
 
-
 def is_poetry_pyproject_toml(location):
     with open(location, 'r') as file:
         data = file.read()
@@ -698,6 +701,8 @@ def parse(cls, location, package_only=False):
                 )
                 dependencies.append(dependency.to_dict())
 
+        is_private = is_private_package(poetry_data.get('classifiers', []))
+
         package_data = dict(
             datasource_id=cls.datasource_id,
             type=cls.default_package_type,
@@ -709,6 +714,7 @@ def parse(cls, location, package_only=False):
             keywords=get_keywords(poetry_data),
             parties=get_pyproject_toml_parties(poetry_data),
             extra_data=extra_data,
+            is_private=is_private,
             dependencies=dependencies,
             **urls,
         )
@@ -976,6 +982,9 @@ def parse_metadata(location, datasource_id, package_type, package_only=False):
     if license_file:
         extra_data['license_file'] = license_file
 
+    classifiers = get_attribute(meta, 'Classifier', multiple=True)
+    is_private = is_private_package(classifiers)
+
     # FIXME: We are getting dependencies from other sibling files, this is duplicated
     # data at the package_data level, is this necessary? We also have the entire dependency
     # relationships here at requires.txt present in ``.egg-info`` should we store these
@@ -996,6 +1005,7 @@ def parse_metadata(location, datasource_id, package_type, package_only=False):
         dependencies=dependencies,
         file_references=file_references,
         extra_data=extra_data,
+        is_private=is_private,
         **urls,
     )
     return models.PackageData.from_data(package_data, package_only)
@@ -1161,7 +1171,9 @@ def parse(cls, location, package_only=False):
             # search for possible dunder versions here and elsewhere
             version = detect_version_attribute(location)
 
-        urls, extra_data = get_urls(metainfo=setup_args, name=name, version=version)
+        is_private = is_private_package(setup_args.get('classifiers', []))
+
+        urls, extra_data = get_urls(metainfo=setup_args, name=name, version=version,is_private=is_private)
 
         dependencies = get_setup_py_dependencies(setup_args)
         python_requires = get_setup_py_python_requires(setup_args)
@@ -1171,6 +1183,7 @@ def parse(cls, location, package_only=False):
         if license_file:
             extra_data['license_file'] = license_file
 
+
         package_data = dict(
             datasource_id=cls.datasource_id,
             type=cls.default_package_type,
@@ -1182,6 +1195,7 @@ def parse(cls, location, package_only=False):
             extracted_license_statement=extracted_license_statement,
             dependencies=dependencies,
             keywords=get_keywords(setup_args),
+            is_private=is_private,
             extra_data=extra_data,
             **urls,
         )
@@ -1300,6 +1314,9 @@ def parse(cls, location, package_only=False):
                 extracted_license_statement = ''
             extracted_license_statement += f" license_files: {license_file_references}"
 
+        classifiers = parser.get('metadata', 'classifiers', fallback='').splitlines()
+        is_private = is_private_package(classifiers)
+
         package_data = dict(
             datasource_id=cls.datasource_id,
             type=cls.default_package_type,
@@ -1309,6 +1326,7 @@ def parse(cls, location, package_only=False):
             homepage_url=metadata.get('url'),
             primary_language=cls.default_primary_language,
             dependencies=dependent_packages,
+            is_private=is_private,
             extracted_license_statement=extracted_license_statement,
         )
         yield models.PackageData.from_data(package_data, package_only)
@@ -2243,7 +2261,7 @@ def get_pypi_urls(name, version, **kwargs):
     )
 
 
-def get_urls(metainfo, name, version, poetry=False):
+def get_urls(metainfo, name, version, is_private=False, poetry=False):
     """
     Return a mapping of standard URLs and a mapping of extra-data URls for URLs
     of this package:
@@ -2285,6 +2303,9 @@ def get_urls(metainfo, name, version, poetry=False):
     # Project-URL: Say Thanks!
 
     extra_data = {}
+    if is_private:
+        return {}, {}
+
     urls = get_pypi_urls(name, version)
 
     def add_url(_url, _utype=None, _attribute=None):

diff --git a/src/packagedcode/utils.py b/src/packagedcode/utils.py
@@ -304,3 +304,6 @@ def is_simple_path(path):
 
 def is_simple_path_pattern(path):
     return path.endswith('*') and path.count('*') == 1
+
+def is_private_package(classifiers):
+    return any('Private ::' in classifier for classifier in classifiers if classifier)
diff --git a/tests/packagedcode/data/pypi/develop/PKG-INFO b/tests/packagedcode/data/pypi/develop/PKG-INFO
@@ -0,0 +1,5 @@
+Metadata-Version: 2.1
+Name: example_egg
+Version: 1.0.0
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Private :: Do Not Upload
diff --git a/tests/packagedcode/data/pypi/develop/private-classifier-egg-info-expected.json b/tests/packagedcode/data/pypi/develop/private-classifier-egg-info-expected.json
@@ -0,0 +1,49 @@
+[
+  {
+    "type": "pypi",
+    "namespace": null,
+    "name": "example_egg",
+    "version": "1.0.0",
+    "qualifiers": {},
+    "subpath": null,
+    "primary_language": "Python",
+    "description": "",
+    "release_date": null,
+    "parties": [],
+    "keywords": [
+      "Development Status :: 5 - Production/Stable",
+      "Private :: Do Not Upload"
+    ],
+    "homepage_url": null,
+    "download_url": null,
+    "size": null,
+    "sha1": null,
+    "md5": null,
+    "sha256": null,
+    "sha512": null,
+    "bug_tracking_url": null,
+    "code_view_url": null,
+    "vcs_url": null,
+    "copyright": null,
+    "holder": null,
+    "declared_license_expression": null,
+    "declared_license_expression_spdx": null,
+    "license_detections": [],
+    "other_license_expression": null,
+    "other_license_expression_spdx": null,
+    "other_license_detections": [],
+    "extracted_license_statement": null,
+    "notice_text": null,
+    "source_packages": [],
+    "file_references": [],
+    "is_private": true,
+    "is_virtual": false,
+    "extra_data": {},
+    "dependencies": [],
+    "repository_homepage_url": "https://pypi.org/project/example_egg",
+    "repository_download_url": "https://pypi.org/packages/source/e/example_egg/example_egg-1.0.0.tar.gz",
+    "api_data_url": "https://pypi.org/pypi/example_egg/1.0.0/json",
+    "datasource_id": "pypi_egg_info",
+    "purl": "pkg:pypi/example-egg@1.0.0"
+  }
+]
diff --git a/...gedcode/data/pypi/pyproject-toml/standard/private-classifier-pyproject.toml-expected.json b/...gedcode/data/pypi/pyproject-toml/standard/private-classifier-pyproject.toml-expected.json
@@ -0,0 +1,114 @@
+[
+  {
+    "type": "pypi",
+    "namespace": null,
+    "name": "titanic_ml",
+    "version": "0.1.0",
+    "qualifiers": {},
+    "subpath": null,
+    "primary_language": "Python",
+    "description": "titanic_ml example package",
+    "release_date": null,
+    "parties": [
+      {
+        "type": "person",
+        "role": "author",
+        "name": "Niels Zeilemaker",
+        "email": "nielszeilemaker@xebia.com",
+        "url": null
+      }
+    ],
+    "keywords": [
+      "Development Status :: 4 - Beta",
+      "Intended Audience :: Developers",
+      "Private :: Do Not Upload"
+    ],
+    "homepage_url": null,
+    "download_url": null,
+    "size": null,
+    "sha1": null,
+    "md5": null,
+    "sha256": null,
+    "sha512": null,
+    "bug_tracking_url": null,
+    "code_view_url": null,
+    "vcs_url": null,
+    "copyright": null,
+    "holder": null,
+    "declared_license_expression": null,
+    "declared_license_expression_spdx": null,
+    "license_detections": [],
+    "other_license_expression": null,
+    "other_license_expression_spdx": null,
+    "other_license_detections": [],
+    "extracted_license_statement": null,
+    "notice_text": null,
+    "source_packages": [],
+    "file_references": [],
+    "is_private": true,
+    "is_virtual": false,
+    "extra_data": {},
+    "dependencies": [
+      {
+        "purl": "pkg:pypi/pyspark",
+        "extracted_requirement": null,
+        "scope": "install",
+        "is_runtime": true,
+        "is_optional": false,
+        "is_pinned": false,
+        "is_direct": true,
+        "resolved_package": {},
+        "extra_data": {}
+      },
+      {
+        "purl": "pkg:pypi/sklearn",
+        "extracted_requirement": null,
+        "scope": "install",
+        "is_runtime": true,
+        "is_optional": false,
+        "is_pinned": false,
+        "is_direct": true,
+        "resolved_package": {},
+        "extra_data": {}
+      },
+      {
+        "purl": "pkg:pypi/tox",
+        "extracted_requirement": null,
+        "scope": "dev",
+        "is_runtime": true,
+        "is_optional": true,
+        "is_pinned": false,
+        "is_direct": true,
+        "resolved_package": {},
+        "extra_data": {}
+      },
+      {
+        "purl": "pkg:pypi/pre-commit",
+        "extracted_requirement": null,
+        "scope": "dev",
+        "is_runtime": true,
+        "is_optional": true,
+        "is_pinned": false,
+        "is_direct": true,
+        "resolved_package": {},
+        "extra_data": {}
+      },
+      {
+        "purl": "pkg:pypi/bump2version",
+        "extracted_requirement": null,
+        "scope": "dev",
+        "is_runtime": true,
+        "is_optional": true,
+        "is_pinned": false,
+        "is_direct": true,
+        "resolved_package": {},
+        "extra_data": {}
+      }
+    ],
+    "repository_homepage_url": null,
+    "repository_download_url": null,
+    "api_data_url": null,
+    "datasource_id": "pypi_pyproject_toml",
+    "purl": "pkg:pypi/titanic-ml@0.1.0"
+  }
+]
diff --git a/...s/packagedcode/data/pypi/pyproject-toml/standard/python-private-classifier/pyproject.toml b/...s/packagedcode/data/pypi/pyproject-toml/standard/python-private-classifier/pyproject.toml
@@ -0,0 +1,29 @@
+#Taken from : https://xebia.com/blog/minimal-pyproject-toml-example/
+
+[project]
+name = "titanic_ml"
+description = "titanic_ml example package"
+version = "0.1.0"
+authors = [
+    { name = "Niels Zeilemaker", email = "nielszeilemaker@xebia.com" }
+]
+dependencies = [
+    "pyspark[ml]",
+    "sklearn"
+]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "Private :: Do Not Upload"
+]
+
+[project.optional-dependencies]
+dev = [
+    "tox",
+    "pre-commit",
+    "bump2version"
+]
+
+[build-system]
+build-backend = "flit_core.buildapi"
+requires = ["flit_core >=3.2,<4"]
diff --git a/tests/packagedcode/data/pypi/setup.py/private-classifier-setup.py b/tests/packagedcode/data/pypi/setup.py/private-classifier-setup.py
@@ -0,0 +1,10 @@
+from setuptools import setup
+
+setup(
+    name="example_setup",
+    version="1.0.0",
+    classifiers=[
+        "Development Status :: 5 - Production/Stable",
+        "Private :: Do Not Upload",
+    ],
+)