From 088228942759fbed22150d36ff3f755f060390ea Mon Sep 17 00:00:00 2001 From: Chin Yeung Li Date: Wed, 9 Apr 2025 18:30:01 +0800 Subject: [PATCH 1/6] #596 - Added code for golang collector and updated the miner code for golang Signed-off-by: Chin Yeung Li --- minecode/collectors/golang.py | 170 ++++++++++++++++++++++++++++++++++ minecode/miners/golang.py | 42 +++++++++ 2 files changed, 212 insertions(+) create mode 100644 minecode/collectors/golang.py diff --git a/minecode/collectors/golang.py b/minecode/collectors/golang.py new file mode 100644 index 00000000..f355d785 --- /dev/null +++ b/minecode/collectors/golang.py @@ -0,0 +1,170 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# +import logging + +import requests + +from packageurl import PackageURL + +from minecode import priority_router +from minecode.collectors.generic import map_fetchcode_supported_package +from minecode.miners.golang import build_packages_from_gitlab + + +logger = logging.getLogger(__name__) +handler = logging.StreamHandler() +logger.addHandler(handler) +logger.setLevel(logging.INFO) + + +def extract_golang_purl(purl): + """ + Extract the name, namespace and version of a given purl. + """ + # Strip "pkg:golang/" + purl_body = purl[len("pkg:golang/") :] + + # Extract namespace, name, and version + parts = purl_body.split("/") + version = parts[-1].split("@")[-1] + namespace = parts[1] + name = parts[2].partition("@")[0] + + return namespace, name, version + + +def gitlab_get_package_json(namespace, name): + """ + Return the contents of the JSON file of the package. + """ + # Create URLs using purl fields + url = f"https://gitlab.com/api/v4/projects/{namespace}%2F{name}" + + try: + response = requests.get(url) + response.raise_for_status() + return response.json() + except requests.exceptions.HTTPError as err: + logger.error(f"HTTP error occurred: {err}") + + +def gitlab_get_all_package_version_author(namespace, name): + """ + Return a list of all version numbers along with author and author email + for the package. + """ + repo_tags = f"https://gitlab.com/api/v4/projects/{namespace}%2F{name}/repository/tags" + try: + response = requests.get(repo_tags) + response.raise_for_status() + data = response.json() + version_author_list = [] + # Get all available versions + for item in data: + if not item["release"]: + continue + version = item["release"]["tag_name"] + author = item["commit"]["author_name"] + author_email = item["commit"]["author_email"] + version_author_list.append((version, author, author_email)) + return version_author_list + except requests.exceptions.HTTPError as err: + logger.error(f"HTTP error occurred: {err}") + + +def map_golang_package(package_url, package_json, pipelines, priority=0): + """ + Add a pypi `package_url` to the PackageDB. + + Return an error string if any errors are encountered during the process + """ + from minecode.model_utils import add_package_to_scan_queue + from minecode.model_utils import merge_or_create_package + + error = "" + + if not package_json: + error = f"Package does not exist on PyPI: {package_url}" + logger.error(error) + return error + + packages = build_packages_from_gitlab(package_json, package_url) + + for package in packages: + db_package, _, _, error = merge_or_create_package(package, visit_level=0) + if error: + break + + # Submit package for scanning + if db_package: + add_package_to_scan_queue(package=db_package, pipelines=pipelines, priority=priority) + + return error + + +# It may need indexing GitHub PURLs that requires a GitHub API token. +# Please add your GitHub API key to the `.env` file, for example: `GH_TOKEN=your-github-api`. +@priority_router.route("pkg:golang/.*") +def process_request_dir_listed(purl_str, **kwargs): + """ + Process `priority_resource_uri` containing a GitHub Package URL (PURL). + + This involves obtaining Package information for the PURL using + https://github.com/aboutcode-org/fetchcode and using it to create a new + PackageDB entry. The package is then added to the scan queue afterwards. + """ + from minecode.model_utils import DEFAULT_PIPELINES + + addon_pipelines = kwargs.get("addon_pipelines", []) + pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines) + priority = kwargs.get("priority", 0) + + try: + # FIXME: This is not working for some reasons. + # It'll work if I input the same updated_purl_str in the UI + namespace, name, version = extract_golang_purl(purl_str) + if purl_str.startswith("pkg:golang/github"): + # Construct the GitHub purl + github_purl = f"pkg:github/{namespace}/{name}@{version}" + package_url = PackageURL.from_string(github_purl) + error_msg = map_fetchcode_supported_package(package_url, pipelines, priority) + if error_msg: + return error_msg + elif purl_str.startswith("pkg:golang/gitlab"): + package_url = PackageURL.from_string(purl_str) + package_json = gitlab_get_package_json(namespace, name) + repo_version_author_list = gitlab_get_all_package_version_author(namespace, name) + if version: + for repo_version, author, email in repo_version_author_list: + # Check the version along with stripping the first + # character 'v' in the repo_version + if version == repo_version or version == repo_version[1:]: + download_url = f"https://gitlab.com/api/v4/projects/{namespace}%2F{name}/repository/archive.zip?sha={repo_version}" + response = requests.head(download_url, allow_redirects=True) + redirected_download_url = response.url + package_json["download_url"] = redirected_download_url + package_json["author"] = author + package_json["email"] = email + error_msg = map_golang_package( + package_url, package_json, pipelines, priority + ) + break + else: + for repo_version, author, email in repo_version_author_list: + download_url = f"https://gitlab.com/api/v4/projects/{namespace}%2F{name}/repository/archive.zip?sha={repo_version}" + response = requests.head(download_url, allow_redirects=True) + redirected_download_url = response.url + package_json["download_url"] = redirected_download_url + package_json["author"] = author + package_json["email"] = email + error_msg = map_golang_package(package_url, package_json, pipelines, priority) + + except ValueError as e: + error = f"error occurred when parsing {purl_str}: {e}" + return error diff --git a/minecode/miners/golang.py b/minecode/miners/golang.py index e54593b6..52164549 100644 --- a/minecode/miners/golang.py +++ b/minecode/miners/golang.py @@ -237,3 +237,45 @@ def build_golang_package(package_data, purl): vcs_url=vcs_url, ) return package + + +def build_packages_from_gitlab(metadata_dict, purl): + """ + Yield ScannedPackage built from Gitlab. + + The metadata_dict is a dictionary. + + purl: String value of the package url of the ResourceURI object + """ + id = metadata_dict["id"] + name = metadata_dict["name"] + version = purl.version + description = metadata_dict["description"] + repository_homepage_url = metadata_dict["http_url_to_repo"] + download_url = metadata_dict["download_url"] + author = metadata_dict["author"] + email = metadata_dict["email"] + + license_url = f"https://gitlab.com/api/v4/projects/{id}/repository/files/LICENSE/raw" + extracted_license_statement = [license_url] + + common_data = dict( + name=name, + version=version, + description=description, + repository_homepage_url=repository_homepage_url, + extracted_license_statement=extracted_license_statement, + download_url=download_url, + ) + + if author: + parties = common_data.get("parties") + if not parties: + common_data["parties"] = [] + common_data["parties"].append(scan_models.Party(name=author, role="author", email=email)) + + package = scan_models.PackageData.from_data(common_data) + + package.datasource_id = "golang_api_metadata" + package.set_purl(purl) + yield package From 1c1fbd78c68003a16f9fb7895e109cb83adc73c9 Mon Sep 17 00:00:00 2001 From: Chin Yeung Li Date: Fri, 11 Apr 2025 16:15:38 +0800 Subject: [PATCH 2/6] #596 - Working in progress to handle bitbucket.org (It contains a lot of test code that will need to be removed). Signed-off-by: Chin Yeung Li --- minecode/collectors/golang.py | 198 ++++++++++++++++++++++++++-------- minecode/miners/bitbucket.py | 37 +++++++ minecode/miners/gitlab.py | 57 ++++++++++ minecode/miners/golang.py | 43 +------- minecode/model_utils.py | 7 +- 5 files changed, 253 insertions(+), 89 deletions(-) diff --git a/minecode/collectors/golang.py b/minecode/collectors/golang.py index f355d785..f46bd095 100644 --- a/minecode/collectors/golang.py +++ b/minecode/collectors/golang.py @@ -14,8 +14,8 @@ from minecode import priority_router from minecode.collectors.generic import map_fetchcode_supported_package -from minecode.miners.golang import build_packages_from_gitlab - +from minecode.miners.gitlab import build_packages_from_json_golang +from minecode.miners.bitbucket import build_bitbucket_packages logger = logging.getLogger(__name__) handler = logging.StreamHandler() @@ -23,28 +23,49 @@ logger.setLevel(logging.INFO) -def extract_golang_purl(purl): +def extract_golang__subset_purl(purl_str): """ - Extract the name, namespace and version of a given purl. + Extract the first two swgments after github.com or bitbucket.org and + version For instance, pkg:golang/github.com/rickar/cal/v2/aa@2.1.23 + Return + subset_path: rickar/cal + version: 2.1.23 """ # Strip "pkg:golang/" - purl_body = purl[len("pkg:golang/") :] + purl_body = purl_str[len("pkg:golang/") :] # Extract namespace, name, and version parts = purl_body.split("/") - version = parts[-1].split("@")[-1] - namespace = parts[1] - name = parts[2].partition("@")[0] + version = "" + if "@" in purl_str: + version = purl_str.rpartition("@")[2] + subset_path = parts[1] + "/" + parts[2] + + return subset_path, version - return namespace, name, version + +def gitlab_updated_purl(purl_str): + """ + Return the path between "pkg:golang/gitlab.com/" and version with + replacing "/" with "%2F" and version + """ + version = "" + if "@" in purl_str: + version = purl_str.rpartition("@")[2] + subset = purl_str.partition("pkg:golang/gitlab.com/")[2].partition("@")[0] + subset_path = subset.replace("/", "%2F") + return subset_path, version -def gitlab_get_package_json(namespace, name): +def get_package_json(subset_path, type): """ Return the contents of the JSON file of the package. """ # Create URLs using purl fields - url = f"https://gitlab.com/api/v4/projects/{namespace}%2F{name}" + if type == "gitlab": + url = f"https://gitlab.com/api/v4/projects/{subset_path}" + elif type == "bitbucket": + url = f"https://api.bitbucket.org/2.0/repositories/{subset_path}" try: response = requests.get(url) @@ -54,12 +75,12 @@ def gitlab_get_package_json(namespace, name): logger.error(f"HTTP error occurred: {err}") -def gitlab_get_all_package_version_author(namespace, name): +def gitlab_get_all_package_version_author(subset_path): """ Return a list of all version numbers along with author and author email for the package. """ - repo_tags = f"https://gitlab.com/api/v4/projects/{namespace}%2F{name}/repository/tags" + repo_tags = f"https://gitlab.com/api/v4/projects/{subset_path}/repository/tags" try: response = requests.get(repo_tags) response.raise_for_status() @@ -67,9 +88,7 @@ def gitlab_get_all_package_version_author(namespace, name): version_author_list = [] # Get all available versions for item in data: - if not item["release"]: - continue - version = item["release"]["tag_name"] + version = item["name"] author = item["commit"]["author_name"] author_email = item["commit"]["author_email"] version_author_list.append((version, author, author_email)) @@ -78,9 +97,33 @@ def gitlab_get_all_package_version_author(namespace, name): logger.error(f"HTTP error occurred: {err}") -def map_golang_package(package_url, package_json, pipelines, priority=0): +def bitbucket_get_all_package_version_author(subset_path): + """ + Return a list of all version numbers along with author for the package. + """ + repo_tags = f"https://api.bitbucket.org/2.0/repositories/{subset_path}/refs/tags" + try: + response = requests.get(repo_tags) + response.raise_for_status() + data = response.json() + version_author_list = [] + if data["size"] > 0: + # Get all available versions + for item in data["values"]: + version = item["name"] + print(version) + author = "" + if item["tagger"]["type"] == "author": + author = item["tagger"]["raw"] + version_author_list.append((version, author)) + return version_author_list + except requests.exceptions.HTTPError as err: + logger.error(f"HTTP error occurred: {err}") + + +def map_golang_package(package_url, package_json, pipelines, priority=0, filename=None): """ - Add a pypi `package_url` to the PackageDB. + Add a golang `package_url` to the PackageDB. Return an error string if any errors are encountered during the process """ @@ -90,14 +133,18 @@ def map_golang_package(package_url, package_json, pipelines, priority=0): error = "" if not package_json: - error = f"Package does not exist on PyPI: {package_url}" + error = f"Package does not exist: {package_url}" logger.error(error) return error - packages = build_packages_from_gitlab(package_json, package_url) + purl_str = package_url.to_string() + if purl_str.startswith("pkg:golang/gitlab"): + packages = build_packages_from_json_golang(package_json, package_url) + elif purl_str.startswith("pkg:golang/bitbucket"): + packages = build_bitbucket_packages(package_json, package_url) for package in packages: - db_package, _, _, error = merge_or_create_package(package, visit_level=0) + db_package, _, _, error = merge_or_create_package(package, visit_level=0, filename=filename) if error: break @@ -108,10 +155,31 @@ def map_golang_package(package_url, package_json, pipelines, priority=0): return error +def process_download_metadata(download_url, package_json): + """ + Return the download_url and the filename + """ + response = requests.head(download_url, allow_redirects=True) + redirected_download_url = response.url + # Sometimes, the filename obtained from a + # downloaded URL, even after following a redirect, + # does not match the actual name of the downloaded + # file. To retrieve the correct filename, it is + # necessary to examine the "Content-Disposition" + # header. + content_disposition = response.headers.get("Content-Disposition") + if content_disposition: + filename = content_disposition.split("filename=")[-1].strip('"') + else: + filename = redirected_download_url.rpartition("/")[2] + package_json["download_url"] = redirected_download_url + + return package_json, filename + # It may need indexing GitHub PURLs that requires a GitHub API token. # Please add your GitHub API key to the `.env` file, for example: `GH_TOKEN=your-github-api`. @priority_router.route("pkg:golang/.*") -def process_request_dir_listed(purl_str, **kwargs): +def process_requests(purl_str, **kwargs): """ Process `priority_resource_uri` containing a GitHub Package URL (PURL). @@ -127,43 +195,83 @@ def process_request_dir_listed(purl_str, **kwargs): try: # FIXME: This is not working for some reasons. - # It'll work if I input the same updated_purl_str in the UI - namespace, name, version = extract_golang_purl(purl_str) + # It'll work if I input the same github_purl in the UI if purl_str.startswith("pkg:golang/github"): + subset_path, version = extract_golang__subset_purl(purl_str) # Construct the GitHub purl - github_purl = f"pkg:github/{namespace}/{name}@{version}" + github_purl = f"pkg:github/{subset_path}@{version}" package_url = PackageURL.from_string(github_purl) error_msg = map_fetchcode_supported_package(package_url, pipelines, priority) if error_msg: return error_msg elif purl_str.startswith("pkg:golang/gitlab"): package_url = PackageURL.from_string(purl_str) - package_json = gitlab_get_package_json(namespace, name) - repo_version_author_list = gitlab_get_all_package_version_author(namespace, name) - if version: + subset_path, version = gitlab_updated_purl(purl_str) + package_json = get_package_json(subset_path, "gitlab") + if not package_json: + error = f"package not found: {purl_str}" + return error + repo_version_author_list = gitlab_get_all_package_version_author(subset_path) + if repo_version_author_list: for repo_version, author, email in repo_version_author_list: # Check the version along with stripping the first # character 'v' in the repo_version - if version == repo_version or version == repo_version[1:]: - download_url = f"https://gitlab.com/api/v4/projects/{namespace}%2F{name}/repository/archive.zip?sha={repo_version}" - response = requests.head(download_url, allow_redirects=True) - redirected_download_url = response.url - package_json["download_url"] = redirected_download_url - package_json["author"] = author - package_json["email"] = email + if not version or version in {repo_version, repo_version[1:]}: + download_url = f"https://gitlab.com/api/v4/projects/{subset_path}/repository/archive.zip?sha={repo_version}" + updated_json, filename = process_download_metadata(download_url, package_json) + updated_json["author"] = author + updated_json["email"] = email error_msg = map_golang_package( - package_url, package_json, pipelines, priority + package_url, updated_json, pipelines, priority, filename=filename ) - break + if version: + break else: - for repo_version, author, email in repo_version_author_list: - download_url = f"https://gitlab.com/api/v4/projects/{namespace}%2F{name}/repository/archive.zip?sha={repo_version}" - response = requests.head(download_url, allow_redirects=True) - redirected_download_url = response.url - package_json["download_url"] = redirected_download_url - package_json["author"] = author - package_json["email"] = email - error_msg = map_golang_package(package_url, package_json, pipelines, priority) + # The repo does not have any tag (i.e. it only has one version) + download_url = ( + f"https://gitlab.com/api/v4/projects/{subset_path}/repository/archive.zip" + ) + updated_json, filename = process_download_metadata(download_url, package_json) + error_msg = map_golang_package( + package_url, package_json, pipelines, priority, filename=filename + ) + elif purl_str.startswith("pkg:golang/bitbucket"): + package_url = PackageURL.from_string(purl_str) + subset_path, version = extract_golang__subset_purl(purl_str) + package_json = get_package_json(subset_path, "bitbucket") + if not package_json: + error = f"package not found: {purl_str}" + return error + repo_version_author_list = bitbucket_get_all_package_version_author(subset_path) + if repo_version_author_list: + for repo_version, author in repo_version_author_list: + # Check the version along with stripping the first + # character 'v' in the repo_version + if not version or version in {repo_version, repo_version[1:]}: + download_url = f"https://bitbucket.org/{subset_path}/get/{repo_version}.zip" + updated_json, filename = process_download_metadata(download_url, package_json) + updated_json["author"] = author + if repo_version.startswith("v"): + collected_version = repo_version[1:] + else: + collected_version = repo_version + updated_json["version"] = collected_version + + error_msg = map_golang_package( + package_url, updated_json, pipelines, priority, filename=filename + ) + if version: + break + else: + # The repo does not have any tag (i.e. it only has one version) + # Get the main branch name for the download url + main_branch = package_json["mainbranch"]["name"] + download_url = f"https://bitbucket.org/{subset_path}/get/{main_branch}.zip" + updated_json, filename = process_download_metadata(download_url, package_json) + + error_msg = map_golang_package( + package_url, package_json, pipelines, priority, filename=filename + ) except ValueError as e: error = f"error occurred when parsing {purl_str}: {e}" diff --git a/minecode/miners/bitbucket.py b/minecode/miners/bitbucket.py index be3f968f..7413deb9 100644 --- a/minecode/miners/bitbucket.py +++ b/minecode/miners/bitbucket.py @@ -300,3 +300,40 @@ def build_bitbucket_repo_package(repo_data, purl): ) package.set_purl(purl) return package + + +def build_bitbucket_packages(metadata_dict, purl): + """ + Yield ScannedPackage built from Bitbucket. + The metadata_dict is a dictionary. + purl: String value of the package url of the ResourceURI object + """ + name = metadata_dict["name"] + description = metadata_dict["description"] + homepage_url = metadata_dict["links"]["html"]["href"] + version = metadata_dict["version"] + size = metadata_dict["size"] + primary_language = metadata_dict["language"] + + common_data = dict( + name=name, + version=version, + description=description, + homepage_url=homepage_url, + size=size, + primary_language=primary_language, + ) + + download_data = dict( + datasource_id="bitbucket_pkginfo", + download_url=metadata_dict["download_url"], + ) + + common_data.update(download_data) + print("COMMON DICT") + print(common_data) + package = scan_models.PackageData.from_data(common_data) + + package.datasource_id = "bitbucket_api_metadata" + package.set_purl(purl) + yield package diff --git a/minecode/miners/gitlab.py b/minecode/miners/gitlab.py index 8a27f98c..87ef76bb 100644 --- a/minecode/miners/gitlab.py +++ b/minecode/miners/gitlab.py @@ -134,3 +134,60 @@ def build_packages_from_json(metadata, purl=None): package = scan_models.Package(**common_data) package.set_purl(purl) yield package + + +def build_packages_from_json_golang(content, purl=None): + """ + Yield Package built from gitlab json content + metadata: Json metadata content + purl: String value of the package url of the ResourceURI object + """ + import requests + + id = content.get("id") + name = content.get("name") + repository_homepage_url = content.get("http_url_to_repo") + version = "" + if purl: + version = purl.version + + author = "" + email = "" + if "author" in content: + author = content.get("author") + if "email" in content: + email = content.get("email") + + license_url = f"https://gitlab.com/api/v4/projects/{id}/repository/files/LICENSE/raw" + response = requests.get(license_url) + extracted_license_statement = [] + if response.status_code == 200: + extracted_license_statement = [response.text] + + common_data = dict( + name=name, + version=version, + description=content.get("description"), + homepage_url=content.get("web_url"), + repository_homepage_url=repository_homepage_url, + extracted_license_statement=extracted_license_statement, + download_url=content.get("download_url"), + ) + + if repository_homepage_url: + repository_homepage_url = form_vcs_url("git", repository_homepage_url) + common_data["vcs_url"] = repository_homepage_url + common_data["code_view_url"] = repository_homepage_url + common_data["release_date"] = parse_date(content.get("created_at")) + + if author: + parties = common_data.get("parties") + if not parties: + common_data["parties"] = [] + common_data["parties"].append(scan_models.Party(name=author, role="author", email=email)) + + package = scan_models.PackageData.from_data(common_data) + + package.datasource_id = "golang_api_metadata" + package.set_purl(purl) + yield package diff --git a/minecode/miners/golang.py b/minecode/miners/golang.py index 52164549..66a8726b 100644 --- a/minecode/miners/golang.py +++ b/minecode/miners/golang.py @@ -19,6 +19,7 @@ from minecode.miners import Mapper from minecode.miners import NonPersistentHttpVisitor from minecode.utils import form_vcs_url +from minecode.utils import parse_date class GoLangSeed(seed.Seeder): @@ -237,45 +238,3 @@ def build_golang_package(package_data, purl): vcs_url=vcs_url, ) return package - - -def build_packages_from_gitlab(metadata_dict, purl): - """ - Yield ScannedPackage built from Gitlab. - - The metadata_dict is a dictionary. - - purl: String value of the package url of the ResourceURI object - """ - id = metadata_dict["id"] - name = metadata_dict["name"] - version = purl.version - description = metadata_dict["description"] - repository_homepage_url = metadata_dict["http_url_to_repo"] - download_url = metadata_dict["download_url"] - author = metadata_dict["author"] - email = metadata_dict["email"] - - license_url = f"https://gitlab.com/api/v4/projects/{id}/repository/files/LICENSE/raw" - extracted_license_statement = [license_url] - - common_data = dict( - name=name, - version=version, - description=description, - repository_homepage_url=repository_homepage_url, - extracted_license_statement=extracted_license_statement, - download_url=download_url, - ) - - if author: - parties = common_data.get("parties") - if not parties: - common_data["parties"] = [] - common_data["parties"].append(scan_models.Party(name=author, role="author", email=email)) - - package = scan_models.PackageData.from_data(common_data) - - package.datasource_id = "golang_api_metadata" - package.set_purl(purl) - yield package diff --git a/minecode/model_utils.py b/minecode/model_utils.py index f31245ab..50f04800 100644 --- a/minecode/model_utils.py +++ b/minecode/model_utils.py @@ -228,7 +228,7 @@ def merge_packages(existing_package, new_package_data, replace=False): return updated_fields -def merge_or_create_package(scanned_package, visit_level, override=False): +def merge_or_create_package(scanned_package, visit_level, override=False, filename=None): """ Update Package from ``scanned_package`` instance if `visit_level` is greater than the mining level of the existing package. @@ -348,10 +348,13 @@ def merge_or_create_package(scanned_package, visit_level, override=False): existing_related_package = existing_related_packages.first() package_content = scanned_package.extra_data.get("package_content") + if not filename: + filename = fileutils.file_name(package_uri) + package_data = dict( # FIXME: we should get the file_name in the # PackageData object instead. - filename=fileutils.file_name(package_uri), + filename=filename, # TODO: update the PackageDB model release_date=scanned_package.release_date, mining_level=mining_level, From b0f1a22b657186c82ff35bfe632c98cc48a49d25 Mon Sep 17 00:00:00 2001 From: Chin Yeung Li Date: Mon, 14 Apr 2025 18:52:07 +0800 Subject: [PATCH 3/6] #596 - Added the following data collection for golang ``` pkg:golang/github.com/* pkg:golang/gitlab.com/* pkg:golang/bitbucket.org/* ``` Signed-off-by: Chin Yeung Li --- minecode/collectors/bitbucket.py | 49 +++++++++++ minecode/collectors/generic.py | 5 +- minecode/collectors/github.py | 34 ++++++++ minecode/collectors/gitlab.py | 44 ++++++++++ minecode/collectors/golang.py | 143 ++++++++++++++++--------------- minecode/miners/bitbucket.py | 52 ++++++++++- minecode/miners/gitlab.py | 4 +- minecode/miners/golang.py | 1 - 8 files changed, 253 insertions(+), 79 deletions(-) create mode 100644 minecode/collectors/bitbucket.py create mode 100644 minecode/collectors/gitlab.py diff --git a/minecode/collectors/bitbucket.py b/minecode/collectors/bitbucket.py new file mode 100644 index 00000000..420299d1 --- /dev/null +++ b/minecode/collectors/bitbucket.py @@ -0,0 +1,49 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging + +import requests + + +""" +Collect bitbucket packages from bitbucket registries. +""" + +logger = logging.getLogger(__name__) +handler = logging.StreamHandler() +logger.addHandler(handler) +logger.setLevel(logging.INFO) + + +def bitbucket_get_all_package_version_author(subset_path): + """ + Return a list of all version numbers along with author for the package. + """ + repo_tags = f"https://api.bitbucket.org/2.0/repositories/{subset_path}/refs/tags" + version_author_list = [] + try: + while repo_tags: + response = requests.get(repo_tags) + response.raise_for_status() + data = response.json() + if data["size"] > 0: + # Get all available versions + for item in data["values"]: + version = item["name"] + author = "" + if "tagger" in item and item["tagger"]: + if item["tagger"]["type"] == "author": + author = item["tagger"]["raw"] + version_author_list.append((version, author)) + # Handle pagination + repo_tags = data.get("next", None) + return version_author_list + except requests.exceptions.HTTPError as err: + logger.error(f"HTTP error occurred: {err}") diff --git a/minecode/collectors/generic.py b/minecode/collectors/generic.py index 032be878..6080e516 100644 --- a/minecode/collectors/generic.py +++ b/minecode/collectors/generic.py @@ -100,7 +100,7 @@ def packagedata_from_dict(package_data): return PackageData.from_data(cleaned_package_data) -def map_fetchcode_supported_package(package_url, pipelines, priority=0): +def map_fetchcode_supported_package(package_url, pipelines, priority=0, from_go_lang=False): """ Add a `package_url` supported by fetchcode to the PackageDB. @@ -116,6 +116,9 @@ def map_fetchcode_supported_package(package_url, pipelines, priority=0): logger.error(error) return error + if from_go_lang: + packages[0].type = "golang" + packages[0].namespace = "github.com/" + packages[0].namespace package_data = packages[0].to_dict() # Remove obsolete Package fields see https://github.com/aboutcode-org/fetchcode/issues/108 diff --git a/minecode/collectors/github.py b/minecode/collectors/github.py index fbf6337e..63ec3b63 100644 --- a/minecode/collectors/github.py +++ b/minecode/collectors/github.py @@ -13,6 +13,40 @@ from minecode.collectors.generic import map_fetchcode_supported_package +def github_get_all_versions(subset_path): + """ + Fetch all versions (tags) from a GitHub repository using the API + Returns a list of all version tags in the repository + """ + import requests + + url = f"https://api.github.com/repos/{subset_path}/tags" + version_list = [] + page = 1 + + while True: + response = requests.get( + url, + params={"page": page, "per_page": 100}, # Max 100 per page + headers={"Accept": "application/vnd.github.v3+json"}, + ) + response.raise_for_status() + + data = response.json() + if not data: + break + + for tag in data: + version_list.append(tag["name"]) + page += 1 + + # Check if we've reached the last page + if "next" not in response.links: + break + + return version_list + + # Indexing GitHub PURLs requires a GitHub API token. # Please add your GitHub API key to the `.env` file, for example: `GH_TOKEN=your-github-api`. @priority_router.route("pkg:github/.*") diff --git a/minecode/collectors/gitlab.py b/minecode/collectors/gitlab.py new file mode 100644 index 00000000..a57ca1bd --- /dev/null +++ b/minecode/collectors/gitlab.py @@ -0,0 +1,44 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging + +import requests + + +""" +Collect gitlab packages from gitlab registries. +""" + +logger = logging.getLogger(__name__) +handler = logging.StreamHandler() +logger.addHandler(handler) +logger.setLevel(logging.INFO) + + +def gitlab_get_all_package_version_author(subset_path): + """ + Return a list of all version numbers along with author and author email + for the package. + """ + repo_tags = f"https://gitlab.com/api/v4/projects/{subset_path}/repository/tags" + try: + response = requests.get(repo_tags) + response.raise_for_status() + data = response.json() + version_author_list = [] + # Get all available versions + for item in data: + version = item["name"] + author = item["commit"]["author_name"] + author_email = item["commit"]["author_email"] + version_author_list.append((version, author, author_email)) + return version_author_list + except requests.exceptions.HTTPError as err: + logger.error(f"HTTP error occurred: {err}") diff --git a/minecode/collectors/golang.py b/minecode/collectors/golang.py index f46bd095..6560a112 100644 --- a/minecode/collectors/golang.py +++ b/minecode/collectors/golang.py @@ -14,6 +14,10 @@ from minecode import priority_router from minecode.collectors.generic import map_fetchcode_supported_package +from minecode.collectors.gitlab import gitlab_get_all_package_version_author +from minecode.collectors.github import github_get_all_versions +from minecode.collectors.bitbucket import bitbucket_get_all_package_version_author + from minecode.miners.gitlab import build_packages_from_json_golang from minecode.miners.bitbucket import build_bitbucket_packages @@ -23,7 +27,7 @@ logger.setLevel(logging.INFO) -def extract_golang__subset_purl(purl_str): +def extract_golang_subset_purl(purl_str): """ Extract the first two swgments after github.com or bitbucket.org and version For instance, pkg:golang/github.com/rickar/cal/v2/aa@2.1.23 @@ -39,7 +43,7 @@ def extract_golang__subset_purl(purl_str): version = "" if "@" in purl_str: version = purl_str.rpartition("@")[2] - subset_path = parts[1] + "/" + parts[2] + subset_path = parts[1] + "/" + parts[2].partition("@")[0] return subset_path, version @@ -75,52 +79,6 @@ def get_package_json(subset_path, type): logger.error(f"HTTP error occurred: {err}") -def gitlab_get_all_package_version_author(subset_path): - """ - Return a list of all version numbers along with author and author email - for the package. - """ - repo_tags = f"https://gitlab.com/api/v4/projects/{subset_path}/repository/tags" - try: - response = requests.get(repo_tags) - response.raise_for_status() - data = response.json() - version_author_list = [] - # Get all available versions - for item in data: - version = item["name"] - author = item["commit"]["author_name"] - author_email = item["commit"]["author_email"] - version_author_list.append((version, author, author_email)) - return version_author_list - except requests.exceptions.HTTPError as err: - logger.error(f"HTTP error occurred: {err}") - - -def bitbucket_get_all_package_version_author(subset_path): - """ - Return a list of all version numbers along with author for the package. - """ - repo_tags = f"https://api.bitbucket.org/2.0/repositories/{subset_path}/refs/tags" - try: - response = requests.get(repo_tags) - response.raise_for_status() - data = response.json() - version_author_list = [] - if data["size"] > 0: - # Get all available versions - for item in data["values"]: - version = item["name"] - print(version) - author = "" - if item["tagger"]["type"] == "author": - author = item["tagger"]["raw"] - version_author_list.append((version, author)) - return version_author_list - except requests.exceptions.HTTPError as err: - logger.error(f"HTTP error occurred: {err}") - - def map_golang_package(package_url, package_json, pipelines, priority=0, filename=None): """ Add a golang `package_url` to the PackageDB. @@ -176,6 +134,7 @@ def process_download_metadata(download_url, package_json): return package_json, filename + # It may need indexing GitHub PURLs that requires a GitHub API token. # Please add your GitHub API key to the `.env` file, for example: `GH_TOKEN=your-github-api`. @priority_router.route("pkg:golang/.*") @@ -194,16 +153,33 @@ def process_requests(purl_str, **kwargs): priority = kwargs.get("priority", 0) try: - # FIXME: This is not working for some reasons. - # It'll work if I input the same github_purl in the UI if purl_str.startswith("pkg:golang/github"): - subset_path, version = extract_golang__subset_purl(purl_str) - # Construct the GitHub purl - github_purl = f"pkg:github/{subset_path}@{version}" - package_url = PackageURL.from_string(github_purl) - error_msg = map_fetchcode_supported_package(package_url, pipelines, priority) - if error_msg: - return error_msg + subset_path, version = extract_golang_subset_purl(purl_str) + if version: + # Construct the GitHub purl + github_purl = f"pkg:github/{subset_path}@{version}" + package_url = PackageURL.from_string(github_purl) + error_msg = map_fetchcode_supported_package( + package_url, pipelines, priority, from_go_lang=True + ) + if error_msg: + return error_msg + else: + version_list = github_get_all_versions(subset_path) + for v in version_list: + # Construct the GitHub purl + # Strip the 'version' or 'v' from the collected version + if v.startswith("version"): + v = v.partition("version")[2] + elif v.startswith("v"): + v = v[1:] + github_purl = f"pkg:github/{subset_path}@{v}" + package_url = PackageURL.from_string(github_purl) + error_msg = map_fetchcode_supported_package( + package_url, pipelines, priority, from_go_lang=True + ) + if error_msg: + return error_msg elif purl_str.startswith("pkg:golang/gitlab"): package_url = PackageURL.from_string(purl_str) subset_path, version = gitlab_updated_purl(purl_str) @@ -218,13 +194,28 @@ def process_requests(purl_str, **kwargs): # character 'v' in the repo_version if not version or version in {repo_version, repo_version[1:]}: download_url = f"https://gitlab.com/api/v4/projects/{subset_path}/repository/archive.zip?sha={repo_version}" - updated_json, filename = process_download_metadata(download_url, package_json) + updated_json, filename = process_download_metadata( + download_url, package_json + ) updated_json["author"] = author updated_json["email"] = email - error_msg = map_golang_package( - package_url, updated_json, pipelines, priority, filename=filename - ) - if version: + if not version: + if repo_version.startswith("v"): + updated_purl_str = ( + PackageURL.to_string(package_url) + "@" + repo_version[1:] + ) + else: + updated_purl_str = ( + PackageURL.to_string(package_url) + "@" + repo_version + ) + updated_purl = PackageURL.from_string(updated_purl_str) + error_msg = map_golang_package( + updated_purl, updated_json, pipelines, priority, filename=filename + ) + else: + error_msg = map_golang_package( + package_url, updated_json, pipelines, priority, filename=filename + ) break else: # The repo does not have any tag (i.e. it only has one version) @@ -233,35 +224,45 @@ def process_requests(purl_str, **kwargs): ) updated_json, filename = process_download_metadata(download_url, package_json) error_msg = map_golang_package( - package_url, package_json, pipelines, priority, filename=filename + package_url, updated_json, pipelines, priority, filename=filename ) elif purl_str.startswith("pkg:golang/bitbucket"): package_url = PackageURL.from_string(purl_str) - subset_path, version = extract_golang__subset_purl(purl_str) + subset_path, version = extract_golang_subset_purl(purl_str) package_json = get_package_json(subset_path, "bitbucket") if not package_json: error = f"package not found: {purl_str}" return error repo_version_author_list = bitbucket_get_all_package_version_author(subset_path) + package_json["repo_workspace_name"] = subset_path if repo_version_author_list: + found_match = False for repo_version, author in repo_version_author_list: # Check the version along with stripping the first # character 'v' in the repo_version if not version or version in {repo_version, repo_version[1:]}: + found_match = True download_url = f"https://bitbucket.org/{subset_path}/get/{repo_version}.zip" - updated_json, filename = process_download_metadata(download_url, package_json) + updated_json, filename = process_download_metadata( + download_url, package_json + ) updated_json["author"] = author - if repo_version.startswith("v"): - collected_version = repo_version[1:] - else: - collected_version = repo_version - updated_json["version"] = collected_version + if not version: + if repo_version.startswith("v"): + collected_version = repo_version[1:] + else: + collected_version = repo_version + updated_purl_str = purl_str + "@" + collected_version + package_url = PackageURL.from_string(updated_purl_str) error_msg = map_golang_package( package_url, updated_json, pipelines, priority, filename=filename ) if version: break + if not found_match: + error_msg = f"The package version not found: {version}" + return error_msg else: # The repo does not have any tag (i.e. it only has one version) # Get the main branch name for the download url @@ -270,7 +271,7 @@ def process_requests(purl_str, **kwargs): updated_json, filename = process_download_metadata(download_url, package_json) error_msg = map_golang_package( - package_url, package_json, pipelines, priority, filename=filename + package_url, updated_json, pipelines, priority, filename=filename ) except ValueError as e: diff --git a/minecode/miners/bitbucket.py b/minecode/miners/bitbucket.py index 7413deb9..e69ad327 100644 --- a/minecode/miners/bitbucket.py +++ b/minecode/miners/bitbucket.py @@ -4,6 +4,7 @@ import json import logging +import requests from packagedcode import models as scan_models from packageurl import PackageURL @@ -302,6 +303,45 @@ def build_bitbucket_repo_package(repo_data, purl): return package +def get_bitbucket_license_info(repo_path): + """ + Fetch license information from a Bitbucket repository. + Returns the detected license text based on the common license filenames + """ + + # Bitbucket API endpoint for repository sources (where license file typically is) + url = f"https://api.bitbucket.org/2.0/repositories/{repo_path}/src" + + try: + while url: + response = requests.get(url) + response.raise_for_status() + + # Check common license file names + common_license_file_name = [ + "LICENSE", + "LICENSE.MD", + "LICENSE.TXT", + "COPYING", + "COPYING.TXT", + ] + data = response.json() + # Search for license files in the root directory + for item in data["values"]: + if item["path"].upper() in common_license_file_name: + # Found a license file - fetch its content + license_url = f"https://api.bitbucket.org/2.0/repositories/{repo_path}/src/HEAD/{item['path']}" + license_response = requests.get(license_url) + license_response.raise_for_status() + return license_response.text + # Handle pagination + url = data.get("next", None) + return None # No license file found + except requests.exceptions.RequestException as e: + print(f"Error fetching license info: {e}") + return None + + def build_bitbucket_packages(metadata_dict, purl): """ Yield ScannedPackage built from Bitbucket. @@ -311,17 +351,23 @@ def build_bitbucket_packages(metadata_dict, purl): name = metadata_dict["name"] description = metadata_dict["description"] homepage_url = metadata_dict["links"]["html"]["href"] - version = metadata_dict["version"] size = metadata_dict["size"] primary_language = metadata_dict["language"] + if "repo_workspace_name" in metadata_dict: + repo_path = metadata_dict["repo_workspace_name"] + else: + repo_path = "" + license_text = get_bitbucket_license_info(repo_path) + extracted_license_statement = [license_text] + common_data = dict( name=name, - version=version, description=description, homepage_url=homepage_url, size=size, primary_language=primary_language, + extracted_license_statement=extracted_license_statement, ) download_data = dict( @@ -330,8 +376,6 @@ def build_bitbucket_packages(metadata_dict, purl): ) common_data.update(download_data) - print("COMMON DICT") - print(common_data) package = scan_models.PackageData.from_data(common_data) package.datasource_id = "bitbucket_api_metadata" diff --git a/minecode/miners/gitlab.py b/minecode/miners/gitlab.py index 87ef76bb..f26662df 100644 --- a/minecode/miners/gitlab.py +++ b/minecode/miners/gitlab.py @@ -8,6 +8,7 @@ # import json +import requests import packagedcode.models as scan_models from packageurl import PackageURL @@ -142,8 +143,6 @@ def build_packages_from_json_golang(content, purl=None): metadata: Json metadata content purl: String value of the package url of the ResourceURI object """ - import requests - id = content.get("id") name = content.get("name") repository_homepage_url = content.get("http_url_to_repo") @@ -172,6 +171,7 @@ def build_packages_from_json_golang(content, purl=None): repository_homepage_url=repository_homepage_url, extracted_license_statement=extracted_license_statement, download_url=content.get("download_url"), + primary_language="go", ) if repository_homepage_url: diff --git a/minecode/miners/golang.py b/minecode/miners/golang.py index 66a8726b..e54593b6 100644 --- a/minecode/miners/golang.py +++ b/minecode/miners/golang.py @@ -19,7 +19,6 @@ from minecode.miners import Mapper from minecode.miners import NonPersistentHttpVisitor from minecode.utils import form_vcs_url -from minecode.utils import parse_date class GoLangSeed(seed.Seeder): From 82cb1fb08236d217cef039146e655b3eaf30e10c Mon Sep 17 00:00:00 2001 From: Chin Yeung Li Date: Tue, 15 Apr 2025 16:26:01 +0800 Subject: [PATCH 4/6] Correct typo Signed-off-by: Chin Yeung Li --- minecode/collectors/golang.py | 88 +++++++++++++++++++++++- minecode/miners/golang.py | 26 +++++++ minecode/tests/collectors/test_cargo.py | 2 +- minecode/tests/collectors/test_golang.py | 88 ++++++++++++++++++++++++ 4 files changed, 202 insertions(+), 2 deletions(-) create mode 100644 minecode/tests/collectors/test_golang.py diff --git a/minecode/collectors/golang.py b/minecode/collectors/golang.py index 6560a112..6d02c97e 100644 --- a/minecode/collectors/golang.py +++ b/minecode/collectors/golang.py @@ -7,20 +7,24 @@ # See https://aboutcode.org for more information about nexB OSS projects. # import logging - import requests +from bs4 import BeautifulSoup + from packageurl import PackageURL from minecode import priority_router + from minecode.collectors.generic import map_fetchcode_supported_package from minecode.collectors.gitlab import gitlab_get_all_package_version_author from minecode.collectors.github import github_get_all_versions from minecode.collectors.bitbucket import bitbucket_get_all_package_version_author from minecode.miners.gitlab import build_packages_from_json_golang +from minecode.miners.golang import build_golang_generic_package from minecode.miners.bitbucket import build_bitbucket_packages + logger = logging.getLogger(__name__) handler = logging.StreamHandler() logger.addHandler(handler) @@ -100,9 +104,12 @@ def map_golang_package(package_url, package_json, pipelines, priority=0, filenam packages = build_packages_from_json_golang(package_json, package_url) elif purl_str.startswith("pkg:golang/bitbucket"): packages = build_bitbucket_packages(package_json, package_url) + else: + packages = build_golang_generic_package(package_json, package_url) for package in packages: db_package, _, _, error = merge_or_create_package(package, visit_level=0, filename=filename) + print(db_package) if error: break @@ -135,6 +142,57 @@ def process_download_metadata(download_url, package_json): return package_json, filename +def scrape_go_package(repo_path, version): + """ + Access the repository on pkg.go.dev and extract the project's metadata. + """ + url = f"https://pkg.go.dev/{repo_path}@v{version}" + try: + response = requests.get(url) + response.raise_for_status() + + # Parse HTML content + soup = BeautifulSoup(response.text, "html.parser") + + # Find the tag with the specific text + license_tag = soup.find("a", {"data-test-id": "UnitHeader-license"}) + license_text = license_tag.text if license_tag else "" + + # Find the tag inside the UnitMeta-repo div + repo_tag = soup.find("div", class_="UnitMeta-repo").find("a") + repo_url = repo_tag["href"] if repo_tag else "" + + download_url = f"https://proxy.golang.org/{repo_path}/@v/v{version}.zip" + + return { + "license_text": license_text, + "repository_homepage_url": repo_url, + "download_url": download_url, + } + + except requests.exceptions.RequestException as e: + return {"error": f"Request failed: {str(e)}"} + except Exception as e: + return {"error": f"An error occurred: {str(e)}"} + + +def scrape_package_versions(repo_path): + """ + Return all the version of a repo as a list that is fetched from pkg.go.dev. + """ + url = f"https://pkg.go.dev/{repo_path}?tab=versions" + response = requests.get(url) + + if response.status_code == 200: + soup = BeautifulSoup(response.text, "html.parser") + version_divs = soup.find_all("div", class_="Version-tag") + versions = [div.get_text(strip=True) for div in version_divs] + return versions + else: + print(f"Error fetching page: {response.status_code}") + return [] + + # It may need indexing GitHub PURLs that requires a GitHub API token. # Please add your GitHub API key to the `.env` file, for example: `GH_TOKEN=your-github-api`. @priority_router.route("pkg:golang/.*") @@ -153,6 +211,10 @@ def process_requests(purl_str, **kwargs): priority = kwargs.get("priority", 0) try: + """ + We retrieve metadata from APIs for GitHub, GitLab, and Bitbucket. + For the other cases, we will scrape data from pkg.go.dev + """ if purl_str.startswith("pkg:golang/github"): subset_path, version = extract_golang_subset_purl(purl_str) if version: @@ -273,6 +335,30 @@ def process_requests(purl_str, **kwargs): error_msg = map_golang_package( package_url, updated_json, pipelines, priority, filename=filename ) + else: + subset_path = "" + version = "" + subset_path = purl_str.partition("pkg:golang/")[2].partition("@")[0] + if "@" in purl_str: + version = purl_str.rpartition("@")[2] + if not version: + version_list = scrape_package_versions(subset_path) + for ver in version_list: + if ver.startswith("version"): + ver = ver.partition("version")[2] + elif ver.startswith("v"): + ver = ver[1:] + updated_purl_str = purl_str + "@" + ver + package_url = PackageURL.from_string(updated_purl_str) + package_json = scrape_go_package(subset_path, ver) + error_msg = map_golang_package(package_url, package_json, pipelines, priority) + else: + print("HAVE VERSION") + print(subset_path) + print(version) + package_url = PackageURL.from_string(purl_str) + package_json = scrape_go_package(subset_path, version) + error_msg = map_golang_package(package_url, package_json, pipelines, priority) except ValueError as e: error = f"error occurred when parsing {purl_str}: {e}" diff --git a/minecode/miners/golang.py b/minecode/miners/golang.py index e54593b6..b512d28a 100644 --- a/minecode/miners/golang.py +++ b/minecode/miners/golang.py @@ -237,3 +237,29 @@ def build_golang_package(package_data, purl): vcs_url=vcs_url, ) return package + + +def build_golang_generic_package(package_data, package_url): + """Return a single Golang package""" + homepage_url = "/".join(["https:/", package_url.namespace, package_url.name]) + license_text = package_data.get("license_text") + extracted_license_statement = [license_text] + print("NAME") + print(package_url.name) + print(package_url.namespace) + print(package_url.type) + + common_data = dict( + name=package_url.name, + namespace=package_url.namespace, + type=package_url.type, + primary_language="go", + repository_homepage_url=package_data.get("repository_homepage_url"), + homepage_url=homepage_url, + extracted_license_statement=extracted_license_statement, + download_url=package_data.get("download_url"), + ) + + package = scan_models.PackageData.from_data(common_data) + package.set_purl(package_url) + yield package diff --git a/minecode/tests/collectors/test_cargo.py b/minecode/tests/collectors/test_cargo.py index 63ccb3aa..ebaab2fb 100644 --- a/minecode/tests/collectors/test_cargo.py +++ b/minecode/tests/collectors/test_cargo.py @@ -41,7 +41,7 @@ def test_get_package_json(self): self.assertListEqual(list(keys), expected_list) self.assertEqual(json_contents["crate"]["id"], "sam") - def test_map_npm_package(self): + def test_map_cargo_package(self): package_count = packagedb.models.Package.objects.all().count() self.assertEqual(0, package_count) package_url = PackageURL.from_string("pkg:cargo/sam@0.3.1") diff --git a/minecode/tests/collectors/test_golang.py b/minecode/tests/collectors/test_golang.py new file mode 100644 index 00000000..7d2c439e --- /dev/null +++ b/minecode/tests/collectors/test_golang.py @@ -0,0 +1,88 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import json +import os + +from django.test import TestCase as DjangoTestCase + +from packageurl import PackageURL + +import packagedb +from minecode.collectors import golang +from minecode.utils_test import JsonBasedTesting + + +class GoLangPriorityQueueTests(JsonBasedTesting, DjangoTestCase): + test_data_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "testfiles") + + def setUp(self): + super().setUp() + self.expected_json_loc = self.get_test_loc("golang/glog.json") + with open(self.expected_json_loc) as f: + self.expected_json_contents = json.load(f) + + def test_extract_golang_subset_purl(self): + test1 = "pkg:golang/rickar/cal@2.1.23" + test2 = "pkg:golang/rickar/cal" + expected_path1 = "rickar/cal" + expected_version1 = "2.1.23" + + result_path1, result_version1 = golang.extract_golang_subset_purl(test1) + result_path2, result_version2 = golang.extract_golang_subset_purl(test2) + + self.assertEqual(expected_path1, result_path1) + self.assertEqual(result_version1, expected_version1) + + self.assertEqual(expected_path1, result_path2) + self.assertEqual(result_version2, "") + + def test_gitlab_updated_purl(self): + test1 = "pkg:golang/gitlab.com/gitlab-org/api/client-go@0.127.0" + test2 = "pkg:golang/gitlab.com/gitlab-org/api/client-go" + expected_path1 = "gitlab-org%2Fapi%2Fclient-go" + expected_version1 = "0.127.0" + + result_path1, result_version1 = golang.extract_golang_subset_purl(test1) + result_path2, result_version2 = golang.extract_golang_subset_purl(test2) + + self.assertEqual(expected_path1, result_path1) + self.assertEqual(result_version1, expected_version1) + + self.assertEqual(expected_path1, result_path2) + self.assertEqual(result_version2, "") + + def test_get_package_json_gitlab(self): + json_contents = golang.get_package_json.get_package_json("xx_network%2Fprimitives", "gitlab") + expected_id = "20321795" + expected_name = "primitives" + + self.assertEqual(json_contents.get("id"), expected_id) + self.assertEqual(json_contents.get("name"), expected_name) + + def test_get_package_json_bitbucket(self): + json_contents = golang.get_package_json.get_package_json("lebronto_kerovol/gwerror", "bitbucket") + expected_full_name = "lebronto_kerovol/gwerror" + expected_name = "gwerror" + + self.assertEqual(json_contents.get("full_name"), expected_full_name) + self.assertEqual(json_contents.get("name"), expected_name) + + def test_map_go_package(self): + package_count = packagedb.models.Package.objects.all().count() + self.assertEqual(0, package_count) + package_url = PackageURL.from_string("pkg:cargo/sam@0.3.1") + cargo.map_cargo_package(package_url, ("test_pipeline")) + package_count = packagedb.models.Package.objects.all().count() + self.assertEqual(1, package_count) + package = packagedb.models.Package.objects.all().first() + expected_purl_str = "pkg:cargo/sam@0.3.1" + expected_download_url = "https://static.crates.io/crates/sam/sam-0.3.1.crate" + self.assertEqual(expected_purl_str, package.purl) + self.assertEqual(expected_download_url, package.download_url) From 18f94132c8cdd2fc543c5486f15d4dc61110b1b3 Mon Sep 17 00:00:00 2001 From: Chin Yeung Li Date: Tue, 15 Apr 2025 18:52:39 +0800 Subject: [PATCH 5/6] #596 - Add on-demand package data collection for golang * Collect metadata from API for the following "namespace" ``` pkg:golang/github.com/* pkg:golang/gitlab.com/* pkg:golang/bitbucket.org/* ``` * Add tests * Add "golang" in the "supported_ecosystems" list in the api.py Signed-off-by: Chin Yeung Li --- minecode/collectors/bitbucket.py | 7 +- minecode/collectors/golang.py | 7 +- minecode/miners/bitbucket.py | 2 +- minecode/miners/golang.py | 4 - minecode/tests/collectors/test_bitbucket.py | 41 ++++++ minecode/tests/collectors/test_github.py | 38 +++++ minecode/tests/collectors/test_gitlab.py | 33 +++++ minecode/tests/collectors/test_golang.py | 88 +++++++++--- .../testfiles/golang/client-go_0.127.0.json | 41 ++++++ .../testfiles/golang/gg-core_0.3.64.json | 130 ++++++++++++++++++ packagedb/api.py | 1 + 11 files changed, 361 insertions(+), 31 deletions(-) create mode 100644 minecode/tests/collectors/test_bitbucket.py create mode 100644 minecode/tests/collectors/test_github.py create mode 100644 minecode/tests/collectors/test_gitlab.py create mode 100644 minecode/tests/testfiles/golang/client-go_0.127.0.json create mode 100644 minecode/tests/testfiles/golang/gg-core_0.3.64.json diff --git a/minecode/collectors/bitbucket.py b/minecode/collectors/bitbucket.py index 420299d1..81b60126 100644 --- a/minecode/collectors/bitbucket.py +++ b/minecode/collectors/bitbucket.py @@ -38,9 +38,10 @@ def bitbucket_get_all_package_version_author(subset_path): for item in data["values"]: version = item["name"] author = "" - if "tagger" in item and item["tagger"]: - if item["tagger"]["type"] == "author": - author = item["tagger"]["raw"] + if "target" in item and item["target"]: + if "author" in item["target"] and item["target"]["author"]: + if item["target"]["author"]["type"] == "author": + author = item["target"]["author"]["user"]["display_name"] version_author_list.append((version, author)) # Handle pagination repo_tags = data.get("next", None) diff --git a/minecode/collectors/golang.py b/minecode/collectors/golang.py index 6d02c97e..6567cde9 100644 --- a/minecode/collectors/golang.py +++ b/minecode/collectors/golang.py @@ -24,6 +24,7 @@ from minecode.miners.golang import build_golang_generic_package from minecode.miners.bitbucket import build_bitbucket_packages +from packagedb.models import PackageContentType logger = logging.getLogger(__name__) handler = logging.StreamHandler() @@ -108,8 +109,8 @@ def map_golang_package(package_url, package_json, pipelines, priority=0, filenam packages = build_golang_generic_package(package_json, package_url) for package in packages: + package.extra_data["package_content"] = PackageContentType.SOURCE_ARCHIVE db_package, _, _, error = merge_or_create_package(package, visit_level=0, filename=filename) - print(db_package) if error: break @@ -316,7 +317,6 @@ def process_requests(purl_str, **kwargs): collected_version = repo_version updated_purl_str = purl_str + "@" + collected_version package_url = PackageURL.from_string(updated_purl_str) - error_msg = map_golang_package( package_url, updated_json, pipelines, priority, filename=filename ) @@ -353,9 +353,6 @@ def process_requests(purl_str, **kwargs): package_json = scrape_go_package(subset_path, ver) error_msg = map_golang_package(package_url, package_json, pipelines, priority) else: - print("HAVE VERSION") - print(subset_path) - print(version) package_url = PackageURL.from_string(purl_str) package_json = scrape_go_package(subset_path, version) error_msg = map_golang_package(package_url, package_json, pipelines, priority) diff --git a/minecode/miners/bitbucket.py b/minecode/miners/bitbucket.py index e69ad327..3c6f47aa 100644 --- a/minecode/miners/bitbucket.py +++ b/minecode/miners/bitbucket.py @@ -338,7 +338,7 @@ def get_bitbucket_license_info(repo_path): url = data.get("next", None) return None # No license file found except requests.exceptions.RequestException as e: - print(f"Error fetching license info: {e}") + logger.error(f"Error fetching license info: {e}") return None diff --git a/minecode/miners/golang.py b/minecode/miners/golang.py index b512d28a..225581e5 100644 --- a/minecode/miners/golang.py +++ b/minecode/miners/golang.py @@ -244,10 +244,6 @@ def build_golang_generic_package(package_data, package_url): homepage_url = "/".join(["https:/", package_url.namespace, package_url.name]) license_text = package_data.get("license_text") extracted_license_statement = [license_text] - print("NAME") - print(package_url.name) - print(package_url.namespace) - print(package_url.type) common_data = dict( name=package_url.name, diff --git a/minecode/tests/collectors/test_bitbucket.py b/minecode/tests/collectors/test_bitbucket.py new file mode 100644 index 00000000..304af4ee --- /dev/null +++ b/minecode/tests/collectors/test_bitbucket.py @@ -0,0 +1,41 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import os + +from django.test import TestCase as DjangoTestCase + +from minecode.collectors import bitbucket +from minecode.utils_test import JsonBasedTesting + + +class BitbucketPriorityQueueTests(JsonBasedTesting, DjangoTestCase): + test_data_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "testfiles") + + def test_bitbucket_get_all_package_version_author(self): + repo_path = "creachadair/stringset" + version_author_list = bitbucket.bitbucket_get_all_package_version_author(repo_path) + expected = [ + ("v0.0.1", "M. J. Fromberger"), + ("v0.0.10", "M. J. Fromberger"), + ("v0.0.11", "M. J. Fromberger"), + ("v0.0.12", "M. J. Fromberger"), + ("v0.0.13", "M. J. Fromberger"), + ("v0.0.14", "M. J. Fromberger"), + ("v0.0.2", "M. J. Fromberger"), + ("v0.0.3", "M. J. Fromberger"), + ("v0.0.4", "M. J. Fromberger"), + ("v0.0.5", "M. J. Fromberger"), + ("v0.0.6", "M. J. Fromberger"), + ("v0.0.7", "M. J. Fromberger"), + ("v0.0.8", "M. J. Fromberger"), + ("v0.0.9", "M. J. Fromberger"), + ] + for item in version_author_list: + self.assertIn(item, expected) diff --git a/minecode/tests/collectors/test_github.py b/minecode/tests/collectors/test_github.py new file mode 100644 index 00000000..eb5d4525 --- /dev/null +++ b/minecode/tests/collectors/test_github.py @@ -0,0 +1,38 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import os + +from django.test import TestCase as DjangoTestCase + +from minecode.collectors import github +from minecode.utils_test import JsonBasedTesting + + +class GithubPriorityQueueTests(JsonBasedTesting, DjangoTestCase): + test_data_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "testfiles") + + def test_github_get_all_versions(self): + repo_path = "aboutcode-org/purldb" + versions = github.github_get_all_versions(repo_path) + expected = [ + "v6.0.0", + "v5.0.1", + "v5.0.0", + "v3.0.0", + "v2.0.0", + "purldb-toolkit-v0.1.0", + "purl2vcs-v2.0.0", + "purl2vcs-v1.0.2", + "pre-scan-queue-update", + "matchcode-toolkit-v3.0.0", + "matchcode-toolkit-v1.1.1", + ] + for item in versions: + self.assertIn(item, expected) diff --git a/minecode/tests/collectors/test_gitlab.py b/minecode/tests/collectors/test_gitlab.py new file mode 100644 index 00000000..5bfedbd5 --- /dev/null +++ b/minecode/tests/collectors/test_gitlab.py @@ -0,0 +1,33 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import os + +from django.test import TestCase as DjangoTestCase + +from minecode.collectors import gitlab +from minecode.utils_test import JsonBasedTesting + + +class GitlabPriorityQueueTests(JsonBasedTesting, DjangoTestCase): + test_data_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "testfiles") + + def test_gitlab_get_all_package_version_author(self): + repo_path = "xx_network%2Fprimitives" + version_author_list = gitlab.gitlab_get_all_package_version_author(repo_path) + expected = [ + ("v0.0.5", "Richard T. Carback III", "rick.carback@gmail.com"), + ("v0.0.4", "Richard T. Carback III", "rick.carback@gmail.com"), + ("v0.0.3", "Benjamin Wenger", "ben@privategrity.com"), + ("v0.0.2", "Richard T. Carback III", "rick.carback@gmail.com"), + ("v0.0.1", "Jonathan Wenger", "jono@elixxir.io"), + ("v0.0.0", "Sydney Anne Erickson", "sydney@elixxir.io"), + ] + for item in version_author_list: + self.assertIn(item, expected) diff --git a/minecode/tests/collectors/test_golang.py b/minecode/tests/collectors/test_golang.py index 7d2c439e..068dda53 100644 --- a/minecode/tests/collectors/test_golang.py +++ b/minecode/tests/collectors/test_golang.py @@ -22,15 +22,9 @@ class GoLangPriorityQueueTests(JsonBasedTesting, DjangoTestCase): test_data_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "testfiles") - def setUp(self): - super().setUp() - self.expected_json_loc = self.get_test_loc("golang/glog.json") - with open(self.expected_json_loc) as f: - self.expected_json_contents = json.load(f) - def test_extract_golang_subset_purl(self): - test1 = "pkg:golang/rickar/cal@2.1.23" - test2 = "pkg:golang/rickar/cal" + test1 = "pkg:golang/github.com/rickar/cal/v2/aa@2.1.23" + test2 = "pkg:golang/github.com/rickar/cal/v2/aa" expected_path1 = "rickar/cal" expected_version1 = "2.1.23" @@ -49,8 +43,8 @@ def test_gitlab_updated_purl(self): expected_path1 = "gitlab-org%2Fapi%2Fclient-go" expected_version1 = "0.127.0" - result_path1, result_version1 = golang.extract_golang_subset_purl(test1) - result_path2, result_version2 = golang.extract_golang_subset_purl(test2) + result_path1, result_version1 = golang.gitlab_updated_purl(test1) + result_path2, result_version2 = golang.gitlab_updated_purl(test2) self.assertEqual(expected_path1, result_path1) self.assertEqual(result_version1, expected_version1) @@ -59,30 +53,88 @@ def test_gitlab_updated_purl(self): self.assertEqual(result_version2, "") def test_get_package_json_gitlab(self): - json_contents = golang.get_package_json.get_package_json("xx_network%2Fprimitives", "gitlab") - expected_id = "20321795" + json_contents = golang.get_package_json("xx_network%2Fprimitives", "gitlab") + expected_id = 20321795 expected_name = "primitives" self.assertEqual(json_contents.get("id"), expected_id) self.assertEqual(json_contents.get("name"), expected_name) def test_get_package_json_bitbucket(self): - json_contents = golang.get_package_json.get_package_json("lebronto_kerovol/gwerror", "bitbucket") + json_contents = golang.get_package_json("lebronto_kerovol/gwerror", "bitbucket") expected_full_name = "lebronto_kerovol/gwerror" expected_name = "gwerror" self.assertEqual(json_contents.get("full_name"), expected_full_name) self.assertEqual(json_contents.get("name"), expected_name) - def test_map_go_package(self): + def test_map_go_package_gitlab(self): + package_count = packagedb.models.Package.objects.all().count() + self.assertEqual(0, package_count) + package_url = PackageURL.from_string( + "pkg:golang/gitlab.com/gitlab-org/api/client-go@0.127.0" + ) + + with open(self.get_test_loc("golang/client-go_0.127.0.json")) as file: + package_json = json.load(file) + golang.map_golang_package(package_url, package_json, ("test_pipeline")) + package_count = packagedb.models.Package.objects.all().count() + self.assertEqual(1, package_count) + package = packagedb.models.Package.objects.all().first() + expected_purl_str = "pkg:golang/gitlab.com/gitlab-org/api/client-go@0.127.0" + expected_download_url = "https://gitlab.com/api/v4/projects/gitlab-org%2Fapi%2Fclient-go/repository/archive.zip?sha=v0.127.0" + self.assertEqual(expected_purl_str, package.purl) + self.assertEqual(expected_download_url, package.download_url) + + def test_map_go_package_bitbucket(self): package_count = packagedb.models.Package.objects.all().count() self.assertEqual(0, package_count) - package_url = PackageURL.from_string("pkg:cargo/sam@0.3.1") - cargo.map_cargo_package(package_url, ("test_pipeline")) + package_url = PackageURL.from_string("pkg:golang/bitbucket.org/digi-sense/gg-core@0.3.64") + + with open(self.get_test_loc("golang/gg-core_0.3.64.json")) as file: + package_json = json.load(file) + golang.map_golang_package(package_url, package_json, ("test_pipeline")) + package_count = packagedb.models.Package.objects.all().count() + self.assertEqual(1, package_count) + package = packagedb.models.Package.objects.all().first() + expected_purl_str = "pkg:golang/bitbucket.org/digi-sense/gg-core@0.3.64" + expected_download_url = "https://bitbucket.org/digi-sense/gg-core/get/v0.3.64.zip" + self.assertEqual(expected_purl_str, package.purl) + self.assertEqual(expected_download_url, package.download_url) + + def test_map_go_package_others(self): + package_count = packagedb.models.Package.objects.all().count() + self.assertEqual(0, package_count) + package_url = PackageURL.from_string("pkg:golang/golang.org/x/oauth2@0.29.0") + + package_json = golang.scrape_go_package("golang.org/x/oauth2", "0.29.0") + golang.map_golang_package(package_url, package_json, ("test_pipeline")) package_count = packagedb.models.Package.objects.all().count() self.assertEqual(1, package_count) package = packagedb.models.Package.objects.all().first() - expected_purl_str = "pkg:cargo/sam@0.3.1" - expected_download_url = "https://static.crates.io/crates/sam/sam-0.3.1.crate" + expected_purl_str = "pkg:golang/golang.org/x/oauth2@0.29.0" + expected_download_url = "https://proxy.golang.org/golang.org/x/oauth2/@v/v0.29.0.zip" self.assertEqual(expected_purl_str, package.purl) self.assertEqual(expected_download_url, package.download_url) + + def test_process_download_metadata(self): + url = "https://bitbucket.org/digi-sense/gg-core/get/v0.3.64.zip" + _package_json, filename = golang.process_download_metadata(url, {}) + exprected_filename = "digi-sense-gg-core-9d3dfdc43161.zip" + self.assertEqual(exprected_filename, filename) + + def test_scrape_go_package(self): + metadata_dict = golang.scrape_go_package("golang.org/x/oauth2", "0.29.0") + expected_dict = { + "license_text": "BSD-3-Clause", + "repository_homepage_url": "https://cs.opensource.google/go/x/oauth2", + "download_url": "https://proxy.golang.org/golang.org/x/oauth2/@v/v0.29.0.zip", + } + self.assertEqual(expected_dict, metadata_dict) + + def test_scrape_package_versions(self): + versions = golang.scrape_package_versions("golang.org/x/oauth2") + # The version list may expand overtime, as of writing the test, + # there are 29 releases + expected_version_len = 29 + self.assertGreaterEqual(len(versions), expected_version_len) diff --git a/minecode/tests/testfiles/golang/client-go_0.127.0.json b/minecode/tests/testfiles/golang/client-go_0.127.0.json new file mode 100644 index 00000000..bf4d2f99 --- /dev/null +++ b/minecode/tests/testfiles/golang/client-go_0.127.0.json @@ -0,0 +1,41 @@ +{ + "id": 65271576, + "description": "GitLab Go SDK\r\n\r\nThis project has been migrated from `github.com/xanzy/go-gitlab`.", + "name": "client-go", + "name_with_namespace": "GitLab.org / API / client-go", + "path": "client-go", + "path_with_namespace": "gitlab-org/api/client-go", + "created_at": "2024-12-10T14:14:37.357Z", + "default_branch": "main", + "tag_list": [ + "gitlab", + "go", + "sdk" + ], + "topics": [ + "gitlab", + "go", + "sdk" + ], + "ssh_url_to_repo": "git@gitlab.com:gitlab-org/api/client-go.git", + "http_url_to_repo": "https://gitlab.com/gitlab-org/api/client-go.git", + "web_url": "https://gitlab.com/gitlab-org/api/client-go", + "readme_url": "https://gitlab.com/gitlab-org/api/client-go/-/blob/main/README.md", + "forks_count": 36, + "avatar_url": null, + "star_count": 54, + "last_activity_at": "2025-04-15T04:41:12.062Z", + "namespace": { + "id": 98461187, + "name": "API", + "path": "api", + "kind": "group", + "full_path": "gitlab-org/api", + "parent_id": 9970, + "avatar_url": null, + "web_url": "https://gitlab.com/groups/gitlab-org/api" + }, + "download_url": "https://gitlab.com/api/v4/projects/gitlab-org%2Fapi%2Fclient-go/repository/archive.zip?sha=v0.127.0", + "author": "Patrick Rice", + "email": "patrick.rice@kingland.com" + } diff --git a/minecode/tests/testfiles/golang/gg-core_0.3.64.json b/minecode/tests/testfiles/golang/gg-core_0.3.64.json new file mode 100644 index 00000000..2e5c6fef --- /dev/null +++ b/minecode/tests/testfiles/golang/gg-core_0.3.64.json @@ -0,0 +1,130 @@ +{ + "type": "repository", + "full_name": "digi-sense/gg-core", + "links": { + "self": { + "href": "https://api.bitbucket.org/2.0/repositories/digi-sense/gg-core" + }, + "html": { + "href": "https://bitbucket.org/digi-sense/gg-core" + }, + "avatar": { + "href": "https://bytebucket.org/ravatar/%7B1bc29c37-3104-464d-ba27-bffa8847bcd6%7D?ts=3152224" + }, + "pullrequests": { + "href": "https://api.bitbucket.org/2.0/repositories/digi-sense/gg-core/pullrequests" + }, + "commits": { + "href": "https://api.bitbucket.org/2.0/repositories/digi-sense/gg-core/commits" + }, + "forks": { + "href": "https://api.bitbucket.org/2.0/repositories/digi-sense/gg-core/forks" + }, + "watchers": { + "href": "https://api.bitbucket.org/2.0/repositories/digi-sense/gg-core/watchers" + }, + "branches": { + "href": "https://api.bitbucket.org/2.0/repositories/digi-sense/gg-core/refs/branches" + }, + "tags": { + "href": "https://api.bitbucket.org/2.0/repositories/digi-sense/gg-core/refs/tags" + }, + "downloads": { + "href": "https://api.bitbucket.org/2.0/repositories/digi-sense/gg-core/downloads" + }, + "source": { + "href": "https://api.bitbucket.org/2.0/repositories/digi-sense/gg-core/src" + }, + "clone": [ + { + "name": "https", + "href": "https://bitbucket.org/digi-sense/gg-core.git" + }, + { + "name": "ssh", + "href": "git@bitbucket.org:digi-sense/gg-core.git" + } + ], + "hooks": { + "href": "https://api.bitbucket.org/2.0/repositories/digi-sense/gg-core/hooks" + } + }, + "name": "gg-core", + "slug": "gg-core", + "description": "", + "scm": "git", + "website": null, + "owner": { + "display_name": "Digi Sense", + "links": { + "self": { + "href": "https://api.bitbucket.org/2.0/workspaces/%7B2b6cccfa-727a-4d9a-82e0-bd8a163847a0%7D" + }, + "avatar": { + "href": "https://bitbucket.org/account/digi-sense/avatar/" + }, + "html": { + "href": "https://bitbucket.org/%7B2b6cccfa-727a-4d9a-82e0-bd8a163847a0%7D/" + } + }, + "type": "team", + "uuid": "{2b6cccfa-727a-4d9a-82e0-bd8a163847a0}", + "username": "digi-sense" + }, + "workspace": { + "type": "workspace", + "uuid": "{2b6cccfa-727a-4d9a-82e0-bd8a163847a0}", + "name": "Digi Sense", + "slug": "digi-sense", + "links": { + "avatar": { + "href": "https://bitbucket.org/workspaces/digi-sense/avatar/?ts=1732269844" + }, + "html": { + "href": "https://bitbucket.org/digi-sense/" + }, + "self": { + "href": "https://api.bitbucket.org/2.0/workspaces/digi-sense" + } + } + }, + "is_private": false, + "project": { + "type": "project", + "key": "BASE", + "uuid": "{32414877-738a-4002-96a4-893a906d88c8}", + "name": "base", + "links": { + "self": { + "href": "https://api.bitbucket.org/2.0/workspaces/digi-sense/projects/BASE" + }, + "html": { + "href": "https://bitbucket.org/digi-sense/workspace/projects/BASE" + }, + "avatar": { + "href": "https://bitbucket.org/digi-sense/workspace/projects/BASE/avatar/32?ts=1632311978" + } + } + }, + "fork_policy": "allow_forks", + "created_on": "2022-07-08T14:40:46.166913+00:00", + "updated_on": "2025-04-14T16:01:39.490611+00:00", + "size": 47007088, + "language": "go", + "uuid": "{1bc29c37-3104-464d-ba27-bffa8847bcd6}", + "mainbranch": { + "name": "master", + "type": "branch" + }, + "override_settings": { + "default_merge_strategy": true, + "branching_model": true + }, + "parent": null, + "enforced_signed_commits": null, + "has_issues": false, + "has_wiki": false, + "repo_workspace_name": "digi-sense/gg-core", + "download_url": "https://bitbucket.org/digi-sense/gg-core/get/v0.3.64.zip", + "author": "" + } diff --git a/packagedb/api.py b/packagedb/api.py index 62262735..04c1e41c 100644 --- a/packagedb/api.py +++ b/packagedb/api.py @@ -975,6 +975,7 @@ def _reindex_package(package, reindexed_packages, **kwargs): "pypi", "cargo", "gem", + "golang", ] unique_packages, unsupported_packages, unsupported_vers = get_resolved_packages( From 905195af68a6615b8d00b8f3e7a37eebbd4ec8d3 Mon Sep 17 00:00:00 2001 From: Chin Yeung Date: Wed, 16 Apr 2025 15:04:14 +0800 Subject: [PATCH 6/6] Update minecode/collectors/golang.py Signed-off-by: Chin Yeung Li Co-authored-by: Jono Yang --- minecode/collectors/golang.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/minecode/collectors/golang.py b/minecode/collectors/golang.py index 6567cde9..6a5f9bf2 100644 --- a/minecode/collectors/golang.py +++ b/minecode/collectors/golang.py @@ -34,7 +34,7 @@ def extract_golang_subset_purl(purl_str): """ - Extract the first two swgments after github.com or bitbucket.org and + Extract the first two segments after github.com or bitbucket.org and version For instance, pkg:golang/github.com/rickar/cal/v2/aa@2.1.23 Return subset_path: rickar/cal