From 088228942759fbed22150d36ff3f755f060390ea Mon Sep 17 00:00:00 2001 From: Chin Yeung Li Date: Wed, 9 Apr 2025 18:30:01 +0800 Subject: [PATCH 01/19] #596 - Added code for golang collector and updated the miner code for golang Signed-off-by: Chin Yeung Li --- minecode/collectors/golang.py | 170 ++++++++++++++++++++++++++++++++++ minecode/miners/golang.py | 42 +++++++++ 2 files changed, 212 insertions(+) create mode 100644 minecode/collectors/golang.py diff --git a/minecode/collectors/golang.py b/minecode/collectors/golang.py new file mode 100644 index 00000000..f355d785 --- /dev/null +++ b/minecode/collectors/golang.py @@ -0,0 +1,170 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# +import logging + +import requests + +from packageurl import PackageURL + +from minecode import priority_router +from minecode.collectors.generic import map_fetchcode_supported_package +from minecode.miners.golang import build_packages_from_gitlab + + +logger = logging.getLogger(__name__) +handler = logging.StreamHandler() +logger.addHandler(handler) +logger.setLevel(logging.INFO) + + +def extract_golang_purl(purl): + """ + Extract the name, namespace and version of a given purl. + """ + # Strip "pkg:golang/" + purl_body = purl[len("pkg:golang/") :] + + # Extract namespace, name, and version + parts = purl_body.split("/") + version = parts[-1].split("@")[-1] + namespace = parts[1] + name = parts[2].partition("@")[0] + + return namespace, name, version + + +def gitlab_get_package_json(namespace, name): + """ + Return the contents of the JSON file of the package. + """ + # Create URLs using purl fields + url = f"https://gitlab.com/api/v4/projects/{namespace}%2F{name}" + + try: + response = requests.get(url) + response.raise_for_status() + return response.json() + except requests.exceptions.HTTPError as err: + logger.error(f"HTTP error occurred: {err}") + + +def gitlab_get_all_package_version_author(namespace, name): + """ + Return a list of all version numbers along with author and author email + for the package. + """ + repo_tags = f"https://gitlab.com/api/v4/projects/{namespace}%2F{name}/repository/tags" + try: + response = requests.get(repo_tags) + response.raise_for_status() + data = response.json() + version_author_list = [] + # Get all available versions + for item in data: + if not item["release"]: + continue + version = item["release"]["tag_name"] + author = item["commit"]["author_name"] + author_email = item["commit"]["author_email"] + version_author_list.append((version, author, author_email)) + return version_author_list + except requests.exceptions.HTTPError as err: + logger.error(f"HTTP error occurred: {err}") + + +def map_golang_package(package_url, package_json, pipelines, priority=0): + """ + Add a pypi `package_url` to the PackageDB. + + Return an error string if any errors are encountered during the process + """ + from minecode.model_utils import add_package_to_scan_queue + from minecode.model_utils import merge_or_create_package + + error = "" + + if not package_json: + error = f"Package does not exist on PyPI: {package_url}" + logger.error(error) + return error + + packages = build_packages_from_gitlab(package_json, package_url) + + for package in packages: + db_package, _, _, error = merge_or_create_package(package, visit_level=0) + if error: + break + + # Submit package for scanning + if db_package: + add_package_to_scan_queue(package=db_package, pipelines=pipelines, priority=priority) + + return error + + +# It may need indexing GitHub PURLs that requires a GitHub API token. +# Please add your GitHub API key to the `.env` file, for example: `GH_TOKEN=your-github-api`. +@priority_router.route("pkg:golang/.*") +def process_request_dir_listed(purl_str, **kwargs): + """ + Process `priority_resource_uri` containing a GitHub Package URL (PURL). + + This involves obtaining Package information for the PURL using + https://github.com/aboutcode-org/fetchcode and using it to create a new + PackageDB entry. The package is then added to the scan queue afterwards. + """ + from minecode.model_utils import DEFAULT_PIPELINES + + addon_pipelines = kwargs.get("addon_pipelines", []) + pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines) + priority = kwargs.get("priority", 0) + + try: + # FIXME: This is not working for some reasons. + # It'll work if I input the same updated_purl_str in the UI + namespace, name, version = extract_golang_purl(purl_str) + if purl_str.startswith("pkg:golang/github"): + # Construct the GitHub purl + github_purl = f"pkg:github/{namespace}/{name}@{version}" + package_url = PackageURL.from_string(github_purl) + error_msg = map_fetchcode_supported_package(package_url, pipelines, priority) + if error_msg: + return error_msg + elif purl_str.startswith("pkg:golang/gitlab"): + package_url = PackageURL.from_string(purl_str) + package_json = gitlab_get_package_json(namespace, name) + repo_version_author_list = gitlab_get_all_package_version_author(namespace, name) + if version: + for repo_version, author, email in repo_version_author_list: + # Check the version along with stripping the first + # character 'v' in the repo_version + if version == repo_version or version == repo_version[1:]: + download_url = f"https://gitlab.com/api/v4/projects/{namespace}%2F{name}/repository/archive.zip?sha={repo_version}" + response = requests.head(download_url, allow_redirects=True) + redirected_download_url = response.url + package_json["download_url"] = redirected_download_url + package_json["author"] = author + package_json["email"] = email + error_msg = map_golang_package( + package_url, package_json, pipelines, priority + ) + break + else: + for repo_version, author, email in repo_version_author_list: + download_url = f"https://gitlab.com/api/v4/projects/{namespace}%2F{name}/repository/archive.zip?sha={repo_version}" + response = requests.head(download_url, allow_redirects=True) + redirected_download_url = response.url + package_json["download_url"] = redirected_download_url + package_json["author"] = author + package_json["email"] = email + error_msg = map_golang_package(package_url, package_json, pipelines, priority) + + except ValueError as e: + error = f"error occurred when parsing {purl_str}: {e}" + return error diff --git a/minecode/miners/golang.py b/minecode/miners/golang.py index e54593b6..52164549 100644 --- a/minecode/miners/golang.py +++ b/minecode/miners/golang.py @@ -237,3 +237,45 @@ def build_golang_package(package_data, purl): vcs_url=vcs_url, ) return package + + +def build_packages_from_gitlab(metadata_dict, purl): + """ + Yield ScannedPackage built from Gitlab. + + The metadata_dict is a dictionary. + + purl: String value of the package url of the ResourceURI object + """ + id = metadata_dict["id"] + name = metadata_dict["name"] + version = purl.version + description = metadata_dict["description"] + repository_homepage_url = metadata_dict["http_url_to_repo"] + download_url = metadata_dict["download_url"] + author = metadata_dict["author"] + email = metadata_dict["email"] + + license_url = f"https://gitlab.com/api/v4/projects/{id}/repository/files/LICENSE/raw" + extracted_license_statement = [license_url] + + common_data = dict( + name=name, + version=version, + description=description, + repository_homepage_url=repository_homepage_url, + extracted_license_statement=extracted_license_statement, + download_url=download_url, + ) + + if author: + parties = common_data.get("parties") + if not parties: + common_data["parties"] = [] + common_data["parties"].append(scan_models.Party(name=author, role="author", email=email)) + + package = scan_models.PackageData.from_data(common_data) + + package.datasource_id = "golang_api_metadata" + package.set_purl(purl) + yield package From 1c1fbd78c68003a16f9fb7895e109cb83adc73c9 Mon Sep 17 00:00:00 2001 From: Chin Yeung Li Date: Fri, 11 Apr 2025 16:15:38 +0800 Subject: [PATCH 02/19] #596 - Working in progress to handle bitbucket.org (It contains a lot of test code that will need to be removed). Signed-off-by: Chin Yeung Li --- minecode/collectors/golang.py | 198 ++++++++++++++++++++++++++-------- minecode/miners/bitbucket.py | 37 +++++++ minecode/miners/gitlab.py | 57 ++++++++++ minecode/miners/golang.py | 43 +------- minecode/model_utils.py | 7 +- 5 files changed, 253 insertions(+), 89 deletions(-) diff --git a/minecode/collectors/golang.py b/minecode/collectors/golang.py index f355d785..f46bd095 100644 --- a/minecode/collectors/golang.py +++ b/minecode/collectors/golang.py @@ -14,8 +14,8 @@ from minecode import priority_router from minecode.collectors.generic import map_fetchcode_supported_package -from minecode.miners.golang import build_packages_from_gitlab - +from minecode.miners.gitlab import build_packages_from_json_golang +from minecode.miners.bitbucket import build_bitbucket_packages logger = logging.getLogger(__name__) handler = logging.StreamHandler() @@ -23,28 +23,49 @@ logger.setLevel(logging.INFO) -def extract_golang_purl(purl): +def extract_golang__subset_purl(purl_str): """ - Extract the name, namespace and version of a given purl. + Extract the first two swgments after github.com or bitbucket.org and + version For instance, pkg:golang/github.com/rickar/cal/v2/aa@2.1.23 + Return + subset_path: rickar/cal + version: 2.1.23 """ # Strip "pkg:golang/" - purl_body = purl[len("pkg:golang/") :] + purl_body = purl_str[len("pkg:golang/") :] # Extract namespace, name, and version parts = purl_body.split("/") - version = parts[-1].split("@")[-1] - namespace = parts[1] - name = parts[2].partition("@")[0] + version = "" + if "@" in purl_str: + version = purl_str.rpartition("@")[2] + subset_path = parts[1] + "/" + parts[2] + + return subset_path, version - return namespace, name, version + +def gitlab_updated_purl(purl_str): + """ + Return the path between "pkg:golang/gitlab.com/" and version with + replacing "/" with "%2F" and version + """ + version = "" + if "@" in purl_str: + version = purl_str.rpartition("@")[2] + subset = purl_str.partition("pkg:golang/gitlab.com/")[2].partition("@")[0] + subset_path = subset.replace("/", "%2F") + return subset_path, version -def gitlab_get_package_json(namespace, name): +def get_package_json(subset_path, type): """ Return the contents of the JSON file of the package. """ # Create URLs using purl fields - url = f"https://gitlab.com/api/v4/projects/{namespace}%2F{name}" + if type == "gitlab": + url = f"https://gitlab.com/api/v4/projects/{subset_path}" + elif type == "bitbucket": + url = f"https://api.bitbucket.org/2.0/repositories/{subset_path}" try: response = requests.get(url) @@ -54,12 +75,12 @@ def gitlab_get_package_json(namespace, name): logger.error(f"HTTP error occurred: {err}") -def gitlab_get_all_package_version_author(namespace, name): +def gitlab_get_all_package_version_author(subset_path): """ Return a list of all version numbers along with author and author email for the package. """ - repo_tags = f"https://gitlab.com/api/v4/projects/{namespace}%2F{name}/repository/tags" + repo_tags = f"https://gitlab.com/api/v4/projects/{subset_path}/repository/tags" try: response = requests.get(repo_tags) response.raise_for_status() @@ -67,9 +88,7 @@ def gitlab_get_all_package_version_author(namespace, name): version_author_list = [] # Get all available versions for item in data: - if not item["release"]: - continue - version = item["release"]["tag_name"] + version = item["name"] author = item["commit"]["author_name"] author_email = item["commit"]["author_email"] version_author_list.append((version, author, author_email)) @@ -78,9 +97,33 @@ def gitlab_get_all_package_version_author(namespace, name): logger.error(f"HTTP error occurred: {err}") -def map_golang_package(package_url, package_json, pipelines, priority=0): +def bitbucket_get_all_package_version_author(subset_path): + """ + Return a list of all version numbers along with author for the package. + """ + repo_tags = f"https://api.bitbucket.org/2.0/repositories/{subset_path}/refs/tags" + try: + response = requests.get(repo_tags) + response.raise_for_status() + data = response.json() + version_author_list = [] + if data["size"] > 0: + # Get all available versions + for item in data["values"]: + version = item["name"] + print(version) + author = "" + if item["tagger"]["type"] == "author": + author = item["tagger"]["raw"] + version_author_list.append((version, author)) + return version_author_list + except requests.exceptions.HTTPError as err: + logger.error(f"HTTP error occurred: {err}") + + +def map_golang_package(package_url, package_json, pipelines, priority=0, filename=None): """ - Add a pypi `package_url` to the PackageDB. + Add a golang `package_url` to the PackageDB. Return an error string if any errors are encountered during the process """ @@ -90,14 +133,18 @@ def map_golang_package(package_url, package_json, pipelines, priority=0): error = "" if not package_json: - error = f"Package does not exist on PyPI: {package_url}" + error = f"Package does not exist: {package_url}" logger.error(error) return error - packages = build_packages_from_gitlab(package_json, package_url) + purl_str = package_url.to_string() + if purl_str.startswith("pkg:golang/gitlab"): + packages = build_packages_from_json_golang(package_json, package_url) + elif purl_str.startswith("pkg:golang/bitbucket"): + packages = build_bitbucket_packages(package_json, package_url) for package in packages: - db_package, _, _, error = merge_or_create_package(package, visit_level=0) + db_package, _, _, error = merge_or_create_package(package, visit_level=0, filename=filename) if error: break @@ -108,10 +155,31 @@ def map_golang_package(package_url, package_json, pipelines, priority=0): return error +def process_download_metadata(download_url, package_json): + """ + Return the download_url and the filename + """ + response = requests.head(download_url, allow_redirects=True) + redirected_download_url = response.url + # Sometimes, the filename obtained from a + # downloaded URL, even after following a redirect, + # does not match the actual name of the downloaded + # file. To retrieve the correct filename, it is + # necessary to examine the "Content-Disposition" + # header. + content_disposition = response.headers.get("Content-Disposition") + if content_disposition: + filename = content_disposition.split("filename=")[-1].strip('"') + else: + filename = redirected_download_url.rpartition("/")[2] + package_json["download_url"] = redirected_download_url + + return package_json, filename + # It may need indexing GitHub PURLs that requires a GitHub API token. # Please add your GitHub API key to the `.env` file, for example: `GH_TOKEN=your-github-api`. @priority_router.route("pkg:golang/.*") -def process_request_dir_listed(purl_str, **kwargs): +def process_requests(purl_str, **kwargs): """ Process `priority_resource_uri` containing a GitHub Package URL (PURL). @@ -127,43 +195,83 @@ def process_request_dir_listed(purl_str, **kwargs): try: # FIXME: This is not working for some reasons. - # It'll work if I input the same updated_purl_str in the UI - namespace, name, version = extract_golang_purl(purl_str) + # It'll work if I input the same github_purl in the UI if purl_str.startswith("pkg:golang/github"): + subset_path, version = extract_golang__subset_purl(purl_str) # Construct the GitHub purl - github_purl = f"pkg:github/{namespace}/{name}@{version}" + github_purl = f"pkg:github/{subset_path}@{version}" package_url = PackageURL.from_string(github_purl) error_msg = map_fetchcode_supported_package(package_url, pipelines, priority) if error_msg: return error_msg elif purl_str.startswith("pkg:golang/gitlab"): package_url = PackageURL.from_string(purl_str) - package_json = gitlab_get_package_json(namespace, name) - repo_version_author_list = gitlab_get_all_package_version_author(namespace, name) - if version: + subset_path, version = gitlab_updated_purl(purl_str) + package_json = get_package_json(subset_path, "gitlab") + if not package_json: + error = f"package not found: {purl_str}" + return error + repo_version_author_list = gitlab_get_all_package_version_author(subset_path) + if repo_version_author_list: for repo_version, author, email in repo_version_author_list: # Check the version along with stripping the first # character 'v' in the repo_version - if version == repo_version or version == repo_version[1:]: - download_url = f"https://gitlab.com/api/v4/projects/{namespace}%2F{name}/repository/archive.zip?sha={repo_version}" - response = requests.head(download_url, allow_redirects=True) - redirected_download_url = response.url - package_json["download_url"] = redirected_download_url - package_json["author"] = author - package_json["email"] = email + if not version or version in {repo_version, repo_version[1:]}: + download_url = f"https://gitlab.com/api/v4/projects/{subset_path}/repository/archive.zip?sha={repo_version}" + updated_json, filename = process_download_metadata(download_url, package_json) + updated_json["author"] = author + updated_json["email"] = email error_msg = map_golang_package( - package_url, package_json, pipelines, priority + package_url, updated_json, pipelines, priority, filename=filename ) - break + if version: + break else: - for repo_version, author, email in repo_version_author_list: - download_url = f"https://gitlab.com/api/v4/projects/{namespace}%2F{name}/repository/archive.zip?sha={repo_version}" - response = requests.head(download_url, allow_redirects=True) - redirected_download_url = response.url - package_json["download_url"] = redirected_download_url - package_json["author"] = author - package_json["email"] = email - error_msg = map_golang_package(package_url, package_json, pipelines, priority) + # The repo does not have any tag (i.e. it only has one version) + download_url = ( + f"https://gitlab.com/api/v4/projects/{subset_path}/repository/archive.zip" + ) + updated_json, filename = process_download_metadata(download_url, package_json) + error_msg = map_golang_package( + package_url, package_json, pipelines, priority, filename=filename + ) + elif purl_str.startswith("pkg:golang/bitbucket"): + package_url = PackageURL.from_string(purl_str) + subset_path, version = extract_golang__subset_purl(purl_str) + package_json = get_package_json(subset_path, "bitbucket") + if not package_json: + error = f"package not found: {purl_str}" + return error + repo_version_author_list = bitbucket_get_all_package_version_author(subset_path) + if repo_version_author_list: + for repo_version, author in repo_version_author_list: + # Check the version along with stripping the first + # character 'v' in the repo_version + if not version or version in {repo_version, repo_version[1:]}: + download_url = f"https://bitbucket.org/{subset_path}/get/{repo_version}.zip" + updated_json, filename = process_download_metadata(download_url, package_json) + updated_json["author"] = author + if repo_version.startswith("v"): + collected_version = repo_version[1:] + else: + collected_version = repo_version + updated_json["version"] = collected_version + + error_msg = map_golang_package( + package_url, updated_json, pipelines, priority, filename=filename + ) + if version: + break + else: + # The repo does not have any tag (i.e. it only has one version) + # Get the main branch name for the download url + main_branch = package_json["mainbranch"]["name"] + download_url = f"https://bitbucket.org/{subset_path}/get/{main_branch}.zip" + updated_json, filename = process_download_metadata(download_url, package_json) + + error_msg = map_golang_package( + package_url, package_json, pipelines, priority, filename=filename + ) except ValueError as e: error = f"error occurred when parsing {purl_str}: {e}" diff --git a/minecode/miners/bitbucket.py b/minecode/miners/bitbucket.py index be3f968f..7413deb9 100644 --- a/minecode/miners/bitbucket.py +++ b/minecode/miners/bitbucket.py @@ -300,3 +300,40 @@ def build_bitbucket_repo_package(repo_data, purl): ) package.set_purl(purl) return package + + +def build_bitbucket_packages(metadata_dict, purl): + """ + Yield ScannedPackage built from Bitbucket. + The metadata_dict is a dictionary. + purl: String value of the package url of the ResourceURI object + """ + name = metadata_dict["name"] + description = metadata_dict["description"] + homepage_url = metadata_dict["links"]["html"]["href"] + version = metadata_dict["version"] + size = metadata_dict["size"] + primary_language = metadata_dict["language"] + + common_data = dict( + name=name, + version=version, + description=description, + homepage_url=homepage_url, + size=size, + primary_language=primary_language, + ) + + download_data = dict( + datasource_id="bitbucket_pkginfo", + download_url=metadata_dict["download_url"], + ) + + common_data.update(download_data) + print("COMMON DICT") + print(common_data) + package = scan_models.PackageData.from_data(common_data) + + package.datasource_id = "bitbucket_api_metadata" + package.set_purl(purl) + yield package diff --git a/minecode/miners/gitlab.py b/minecode/miners/gitlab.py index 8a27f98c..87ef76bb 100644 --- a/minecode/miners/gitlab.py +++ b/minecode/miners/gitlab.py @@ -134,3 +134,60 @@ def build_packages_from_json(metadata, purl=None): package = scan_models.Package(**common_data) package.set_purl(purl) yield package + + +def build_packages_from_json_golang(content, purl=None): + """ + Yield Package built from gitlab json content + metadata: Json metadata content + purl: String value of the package url of the ResourceURI object + """ + import requests + + id = content.get("id") + name = content.get("name") + repository_homepage_url = content.get("http_url_to_repo") + version = "" + if purl: + version = purl.version + + author = "" + email = "" + if "author" in content: + author = content.get("author") + if "email" in content: + email = content.get("email") + + license_url = f"https://gitlab.com/api/v4/projects/{id}/repository/files/LICENSE/raw" + response = requests.get(license_url) + extracted_license_statement = [] + if response.status_code == 200: + extracted_license_statement = [response.text] + + common_data = dict( + name=name, + version=version, + description=content.get("description"), + homepage_url=content.get("web_url"), + repository_homepage_url=repository_homepage_url, + extracted_license_statement=extracted_license_statement, + download_url=content.get("download_url"), + ) + + if repository_homepage_url: + repository_homepage_url = form_vcs_url("git", repository_homepage_url) + common_data["vcs_url"] = repository_homepage_url + common_data["code_view_url"] = repository_homepage_url + common_data["release_date"] = parse_date(content.get("created_at")) + + if author: + parties = common_data.get("parties") + if not parties: + common_data["parties"] = [] + common_data["parties"].append(scan_models.Party(name=author, role="author", email=email)) + + package = scan_models.PackageData.from_data(common_data) + + package.datasource_id = "golang_api_metadata" + package.set_purl(purl) + yield package diff --git a/minecode/miners/golang.py b/minecode/miners/golang.py index 52164549..66a8726b 100644 --- a/minecode/miners/golang.py +++ b/minecode/miners/golang.py @@ -19,6 +19,7 @@ from minecode.miners import Mapper from minecode.miners import NonPersistentHttpVisitor from minecode.utils import form_vcs_url +from minecode.utils import parse_date class GoLangSeed(seed.Seeder): @@ -237,45 +238,3 @@ def build_golang_package(package_data, purl): vcs_url=vcs_url, ) return package - - -def build_packages_from_gitlab(metadata_dict, purl): - """ - Yield ScannedPackage built from Gitlab. - - The metadata_dict is a dictionary. - - purl: String value of the package url of the ResourceURI object - """ - id = metadata_dict["id"] - name = metadata_dict["name"] - version = purl.version - description = metadata_dict["description"] - repository_homepage_url = metadata_dict["http_url_to_repo"] - download_url = metadata_dict["download_url"] - author = metadata_dict["author"] - email = metadata_dict["email"] - - license_url = f"https://gitlab.com/api/v4/projects/{id}/repository/files/LICENSE/raw" - extracted_license_statement = [license_url] - - common_data = dict( - name=name, - version=version, - description=description, - repository_homepage_url=repository_homepage_url, - extracted_license_statement=extracted_license_statement, - download_url=download_url, - ) - - if author: - parties = common_data.get("parties") - if not parties: - common_data["parties"] = [] - common_data["parties"].append(scan_models.Party(name=author, role="author", email=email)) - - package = scan_models.PackageData.from_data(common_data) - - package.datasource_id = "golang_api_metadata" - package.set_purl(purl) - yield package diff --git a/minecode/model_utils.py b/minecode/model_utils.py index f31245ab..50f04800 100644 --- a/minecode/model_utils.py +++ b/minecode/model_utils.py @@ -228,7 +228,7 @@ def merge_packages(existing_package, new_package_data, replace=False): return updated_fields -def merge_or_create_package(scanned_package, visit_level, override=False): +def merge_or_create_package(scanned_package, visit_level, override=False, filename=None): """ Update Package from ``scanned_package`` instance if `visit_level` is greater than the mining level of the existing package. @@ -348,10 +348,13 @@ def merge_or_create_package(scanned_package, visit_level, override=False): existing_related_package = existing_related_packages.first() package_content = scanned_package.extra_data.get("package_content") + if not filename: + filename = fileutils.file_name(package_uri) + package_data = dict( # FIXME: we should get the file_name in the # PackageData object instead. - filename=fileutils.file_name(package_uri), + filename=filename, # TODO: update the PackageDB model release_date=scanned_package.release_date, mining_level=mining_level, From b0f1a22b657186c82ff35bfe632c98cc48a49d25 Mon Sep 17 00:00:00 2001 From: Chin Yeung Li Date: Mon, 14 Apr 2025 18:52:07 +0800 Subject: [PATCH 03/19] #596 - Added the following data collection for golang ``` pkg:golang/github.com/* pkg:golang/gitlab.com/* pkg:golang/bitbucket.org/* ``` Signed-off-by: Chin Yeung Li --- minecode/collectors/bitbucket.py | 49 +++++++++++ minecode/collectors/generic.py | 5 +- minecode/collectors/github.py | 34 ++++++++ minecode/collectors/gitlab.py | 44 ++++++++++ minecode/collectors/golang.py | 143 ++++++++++++++++--------------- minecode/miners/bitbucket.py | 52 ++++++++++- minecode/miners/gitlab.py | 4 +- minecode/miners/golang.py | 1 - 8 files changed, 253 insertions(+), 79 deletions(-) create mode 100644 minecode/collectors/bitbucket.py create mode 100644 minecode/collectors/gitlab.py diff --git a/minecode/collectors/bitbucket.py b/minecode/collectors/bitbucket.py new file mode 100644 index 00000000..420299d1 --- /dev/null +++ b/minecode/collectors/bitbucket.py @@ -0,0 +1,49 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging + +import requests + + +""" +Collect bitbucket packages from bitbucket registries. +""" + +logger = logging.getLogger(__name__) +handler = logging.StreamHandler() +logger.addHandler(handler) +logger.setLevel(logging.INFO) + + +def bitbucket_get_all_package_version_author(subset_path): + """ + Return a list of all version numbers along with author for the package. + """ + repo_tags = f"https://api.bitbucket.org/2.0/repositories/{subset_path}/refs/tags" + version_author_list = [] + try: + while repo_tags: + response = requests.get(repo_tags) + response.raise_for_status() + data = response.json() + if data["size"] > 0: + # Get all available versions + for item in data["values"]: + version = item["name"] + author = "" + if "tagger" in item and item["tagger"]: + if item["tagger"]["type"] == "author": + author = item["tagger"]["raw"] + version_author_list.append((version, author)) + # Handle pagination + repo_tags = data.get("next", None) + return version_author_list + except requests.exceptions.HTTPError as err: + logger.error(f"HTTP error occurred: {err}") diff --git a/minecode/collectors/generic.py b/minecode/collectors/generic.py index 032be878..6080e516 100644 --- a/minecode/collectors/generic.py +++ b/minecode/collectors/generic.py @@ -100,7 +100,7 @@ def packagedata_from_dict(package_data): return PackageData.from_data(cleaned_package_data) -def map_fetchcode_supported_package(package_url, pipelines, priority=0): +def map_fetchcode_supported_package(package_url, pipelines, priority=0, from_go_lang=False): """ Add a `package_url` supported by fetchcode to the PackageDB. @@ -116,6 +116,9 @@ def map_fetchcode_supported_package(package_url, pipelines, priority=0): logger.error(error) return error + if from_go_lang: + packages[0].type = "golang" + packages[0].namespace = "github.com/" + packages[0].namespace package_data = packages[0].to_dict() # Remove obsolete Package fields see https://github.com/aboutcode-org/fetchcode/issues/108 diff --git a/minecode/collectors/github.py b/minecode/collectors/github.py index fbf6337e..63ec3b63 100644 --- a/minecode/collectors/github.py +++ b/minecode/collectors/github.py @@ -13,6 +13,40 @@ from minecode.collectors.generic import map_fetchcode_supported_package +def github_get_all_versions(subset_path): + """ + Fetch all versions (tags) from a GitHub repository using the API + Returns a list of all version tags in the repository + """ + import requests + + url = f"https://api.github.com/repos/{subset_path}/tags" + version_list = [] + page = 1 + + while True: + response = requests.get( + url, + params={"page": page, "per_page": 100}, # Max 100 per page + headers={"Accept": "application/vnd.github.v3+json"}, + ) + response.raise_for_status() + + data = response.json() + if not data: + break + + for tag in data: + version_list.append(tag["name"]) + page += 1 + + # Check if we've reached the last page + if "next" not in response.links: + break + + return version_list + + # Indexing GitHub PURLs requires a GitHub API token. # Please add your GitHub API key to the `.env` file, for example: `GH_TOKEN=your-github-api`. @priority_router.route("pkg:github/.*") diff --git a/minecode/collectors/gitlab.py b/minecode/collectors/gitlab.py new file mode 100644 index 00000000..a57ca1bd --- /dev/null +++ b/minecode/collectors/gitlab.py @@ -0,0 +1,44 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging + +import requests + + +""" +Collect gitlab packages from gitlab registries. +""" + +logger = logging.getLogger(__name__) +handler = logging.StreamHandler() +logger.addHandler(handler) +logger.setLevel(logging.INFO) + + +def gitlab_get_all_package_version_author(subset_path): + """ + Return a list of all version numbers along with author and author email + for the package. + """ + repo_tags = f"https://gitlab.com/api/v4/projects/{subset_path}/repository/tags" + try: + response = requests.get(repo_tags) + response.raise_for_status() + data = response.json() + version_author_list = [] + # Get all available versions + for item in data: + version = item["name"] + author = item["commit"]["author_name"] + author_email = item["commit"]["author_email"] + version_author_list.append((version, author, author_email)) + return version_author_list + except requests.exceptions.HTTPError as err: + logger.error(f"HTTP error occurred: {err}") diff --git a/minecode/collectors/golang.py b/minecode/collectors/golang.py index f46bd095..6560a112 100644 --- a/minecode/collectors/golang.py +++ b/minecode/collectors/golang.py @@ -14,6 +14,10 @@ from minecode import priority_router from minecode.collectors.generic import map_fetchcode_supported_package +from minecode.collectors.gitlab import gitlab_get_all_package_version_author +from minecode.collectors.github import github_get_all_versions +from minecode.collectors.bitbucket import bitbucket_get_all_package_version_author + from minecode.miners.gitlab import build_packages_from_json_golang from minecode.miners.bitbucket import build_bitbucket_packages @@ -23,7 +27,7 @@ logger.setLevel(logging.INFO) -def extract_golang__subset_purl(purl_str): +def extract_golang_subset_purl(purl_str): """ Extract the first two swgments after github.com or bitbucket.org and version For instance, pkg:golang/github.com/rickar/cal/v2/aa@2.1.23 @@ -39,7 +43,7 @@ def extract_golang__subset_purl(purl_str): version = "" if "@" in purl_str: version = purl_str.rpartition("@")[2] - subset_path = parts[1] + "/" + parts[2] + subset_path = parts[1] + "/" + parts[2].partition("@")[0] return subset_path, version @@ -75,52 +79,6 @@ def get_package_json(subset_path, type): logger.error(f"HTTP error occurred: {err}") -def gitlab_get_all_package_version_author(subset_path): - """ - Return a list of all version numbers along with author and author email - for the package. - """ - repo_tags = f"https://gitlab.com/api/v4/projects/{subset_path}/repository/tags" - try: - response = requests.get(repo_tags) - response.raise_for_status() - data = response.json() - version_author_list = [] - # Get all available versions - for item in data: - version = item["name"] - author = item["commit"]["author_name"] - author_email = item["commit"]["author_email"] - version_author_list.append((version, author, author_email)) - return version_author_list - except requests.exceptions.HTTPError as err: - logger.error(f"HTTP error occurred: {err}") - - -def bitbucket_get_all_package_version_author(subset_path): - """ - Return a list of all version numbers along with author for the package. - """ - repo_tags = f"https://api.bitbucket.org/2.0/repositories/{subset_path}/refs/tags" - try: - response = requests.get(repo_tags) - response.raise_for_status() - data = response.json() - version_author_list = [] - if data["size"] > 0: - # Get all available versions - for item in data["values"]: - version = item["name"] - print(version) - author = "" - if item["tagger"]["type"] == "author": - author = item["tagger"]["raw"] - version_author_list.append((version, author)) - return version_author_list - except requests.exceptions.HTTPError as err: - logger.error(f"HTTP error occurred: {err}") - - def map_golang_package(package_url, package_json, pipelines, priority=0, filename=None): """ Add a golang `package_url` to the PackageDB. @@ -176,6 +134,7 @@ def process_download_metadata(download_url, package_json): return package_json, filename + # It may need indexing GitHub PURLs that requires a GitHub API token. # Please add your GitHub API key to the `.env` file, for example: `GH_TOKEN=your-github-api`. @priority_router.route("pkg:golang/.*") @@ -194,16 +153,33 @@ def process_requests(purl_str, **kwargs): priority = kwargs.get("priority", 0) try: - # FIXME: This is not working for some reasons. - # It'll work if I input the same github_purl in the UI if purl_str.startswith("pkg:golang/github"): - subset_path, version = extract_golang__subset_purl(purl_str) - # Construct the GitHub purl - github_purl = f"pkg:github/{subset_path}@{version}" - package_url = PackageURL.from_string(github_purl) - error_msg = map_fetchcode_supported_package(package_url, pipelines, priority) - if error_msg: - return error_msg + subset_path, version = extract_golang_subset_purl(purl_str) + if version: + # Construct the GitHub purl + github_purl = f"pkg:github/{subset_path}@{version}" + package_url = PackageURL.from_string(github_purl) + error_msg = map_fetchcode_supported_package( + package_url, pipelines, priority, from_go_lang=True + ) + if error_msg: + return error_msg + else: + version_list = github_get_all_versions(subset_path) + for v in version_list: + # Construct the GitHub purl + # Strip the 'version' or 'v' from the collected version + if v.startswith("version"): + v = v.partition("version")[2] + elif v.startswith("v"): + v = v[1:] + github_purl = f"pkg:github/{subset_path}@{v}" + package_url = PackageURL.from_string(github_purl) + error_msg = map_fetchcode_supported_package( + package_url, pipelines, priority, from_go_lang=True + ) + if error_msg: + return error_msg elif purl_str.startswith("pkg:golang/gitlab"): package_url = PackageURL.from_string(purl_str) subset_path, version = gitlab_updated_purl(purl_str) @@ -218,13 +194,28 @@ def process_requests(purl_str, **kwargs): # character 'v' in the repo_version if not version or version in {repo_version, repo_version[1:]}: download_url = f"https://gitlab.com/api/v4/projects/{subset_path}/repository/archive.zip?sha={repo_version}" - updated_json, filename = process_download_metadata(download_url, package_json) + updated_json, filename = process_download_metadata( + download_url, package_json + ) updated_json["author"] = author updated_json["email"] = email - error_msg = map_golang_package( - package_url, updated_json, pipelines, priority, filename=filename - ) - if version: + if not version: + if repo_version.startswith("v"): + updated_purl_str = ( + PackageURL.to_string(package_url) + "@" + repo_version[1:] + ) + else: + updated_purl_str = ( + PackageURL.to_string(package_url) + "@" + repo_version + ) + updated_purl = PackageURL.from_string(updated_purl_str) + error_msg = map_golang_package( + updated_purl, updated_json, pipelines, priority, filename=filename + ) + else: + error_msg = map_golang_package( + package_url, updated_json, pipelines, priority, filename=filename + ) break else: # The repo does not have any tag (i.e. it only has one version) @@ -233,35 +224,45 @@ def process_requests(purl_str, **kwargs): ) updated_json, filename = process_download_metadata(download_url, package_json) error_msg = map_golang_package( - package_url, package_json, pipelines, priority, filename=filename + package_url, updated_json, pipelines, priority, filename=filename ) elif purl_str.startswith("pkg:golang/bitbucket"): package_url = PackageURL.from_string(purl_str) - subset_path, version = extract_golang__subset_purl(purl_str) + subset_path, version = extract_golang_subset_purl(purl_str) package_json = get_package_json(subset_path, "bitbucket") if not package_json: error = f"package not found: {purl_str}" return error repo_version_author_list = bitbucket_get_all_package_version_author(subset_path) + package_json["repo_workspace_name"] = subset_path if repo_version_author_list: + found_match = False for repo_version, author in repo_version_author_list: # Check the version along with stripping the first # character 'v' in the repo_version if not version or version in {repo_version, repo_version[1:]}: + found_match = True download_url = f"https://bitbucket.org/{subset_path}/get/{repo_version}.zip" - updated_json, filename = process_download_metadata(download_url, package_json) + updated_json, filename = process_download_metadata( + download_url, package_json + ) updated_json["author"] = author - if repo_version.startswith("v"): - collected_version = repo_version[1:] - else: - collected_version = repo_version - updated_json["version"] = collected_version + if not version: + if repo_version.startswith("v"): + collected_version = repo_version[1:] + else: + collected_version = repo_version + updated_purl_str = purl_str + "@" + collected_version + package_url = PackageURL.from_string(updated_purl_str) error_msg = map_golang_package( package_url, updated_json, pipelines, priority, filename=filename ) if version: break + if not found_match: + error_msg = f"The package version not found: {version}" + return error_msg else: # The repo does not have any tag (i.e. it only has one version) # Get the main branch name for the download url @@ -270,7 +271,7 @@ def process_requests(purl_str, **kwargs): updated_json, filename = process_download_metadata(download_url, package_json) error_msg = map_golang_package( - package_url, package_json, pipelines, priority, filename=filename + package_url, updated_json, pipelines, priority, filename=filename ) except ValueError as e: diff --git a/minecode/miners/bitbucket.py b/minecode/miners/bitbucket.py index 7413deb9..e69ad327 100644 --- a/minecode/miners/bitbucket.py +++ b/minecode/miners/bitbucket.py @@ -4,6 +4,7 @@ import json import logging +import requests from packagedcode import models as scan_models from packageurl import PackageURL @@ -302,6 +303,45 @@ def build_bitbucket_repo_package(repo_data, purl): return package +def get_bitbucket_license_info(repo_path): + """ + Fetch license information from a Bitbucket repository. + Returns the detected license text based on the common license filenames + """ + + # Bitbucket API endpoint for repository sources (where license file typically is) + url = f"https://api.bitbucket.org/2.0/repositories/{repo_path}/src" + + try: + while url: + response = requests.get(url) + response.raise_for_status() + + # Check common license file names + common_license_file_name = [ + "LICENSE", + "LICENSE.MD", + "LICENSE.TXT", + "COPYING", + "COPYING.TXT", + ] + data = response.json() + # Search for license files in the root directory + for item in data["values"]: + if item["path"].upper() in common_license_file_name: + # Found a license file - fetch its content + license_url = f"https://api.bitbucket.org/2.0/repositories/{repo_path}/src/HEAD/{item['path']}" + license_response = requests.get(license_url) + license_response.raise_for_status() + return license_response.text + # Handle pagination + url = data.get("next", None) + return None # No license file found + except requests.exceptions.RequestException as e: + print(f"Error fetching license info: {e}") + return None + + def build_bitbucket_packages(metadata_dict, purl): """ Yield ScannedPackage built from Bitbucket. @@ -311,17 +351,23 @@ def build_bitbucket_packages(metadata_dict, purl): name = metadata_dict["name"] description = metadata_dict["description"] homepage_url = metadata_dict["links"]["html"]["href"] - version = metadata_dict["version"] size = metadata_dict["size"] primary_language = metadata_dict["language"] + if "repo_workspace_name" in metadata_dict: + repo_path = metadata_dict["repo_workspace_name"] + else: + repo_path = "" + license_text = get_bitbucket_license_info(repo_path) + extracted_license_statement = [license_text] + common_data = dict( name=name, - version=version, description=description, homepage_url=homepage_url, size=size, primary_language=primary_language, + extracted_license_statement=extracted_license_statement, ) download_data = dict( @@ -330,8 +376,6 @@ def build_bitbucket_packages(metadata_dict, purl): ) common_data.update(download_data) - print("COMMON DICT") - print(common_data) package = scan_models.PackageData.from_data(common_data) package.datasource_id = "bitbucket_api_metadata" diff --git a/minecode/miners/gitlab.py b/minecode/miners/gitlab.py index 87ef76bb..f26662df 100644 --- a/minecode/miners/gitlab.py +++ b/minecode/miners/gitlab.py @@ -8,6 +8,7 @@ # import json +import requests import packagedcode.models as scan_models from packageurl import PackageURL @@ -142,8 +143,6 @@ def build_packages_from_json_golang(content, purl=None): metadata: Json metadata content purl: String value of the package url of the ResourceURI object """ - import requests - id = content.get("id") name = content.get("name") repository_homepage_url = content.get("http_url_to_repo") @@ -172,6 +171,7 @@ def build_packages_from_json_golang(content, purl=None): repository_homepage_url=repository_homepage_url, extracted_license_statement=extracted_license_statement, download_url=content.get("download_url"), + primary_language="go", ) if repository_homepage_url: diff --git a/minecode/miners/golang.py b/minecode/miners/golang.py index 66a8726b..e54593b6 100644 --- a/minecode/miners/golang.py +++ b/minecode/miners/golang.py @@ -19,7 +19,6 @@ from minecode.miners import Mapper from minecode.miners import NonPersistentHttpVisitor from minecode.utils import form_vcs_url -from minecode.utils import parse_date class GoLangSeed(seed.Seeder): From 82cb1fb08236d217cef039146e655b3eaf30e10c Mon Sep 17 00:00:00 2001 From: Chin Yeung Li Date: Tue, 15 Apr 2025 16:26:01 +0800 Subject: [PATCH 04/19] Correct typo Signed-off-by: Chin Yeung Li --- minecode/collectors/golang.py | 88 +++++++++++++++++++++++- minecode/miners/golang.py | 26 +++++++ minecode/tests/collectors/test_cargo.py | 2 +- minecode/tests/collectors/test_golang.py | 88 ++++++++++++++++++++++++ 4 files changed, 202 insertions(+), 2 deletions(-) create mode 100644 minecode/tests/collectors/test_golang.py diff --git a/minecode/collectors/golang.py b/minecode/collectors/golang.py index 6560a112..6d02c97e 100644 --- a/minecode/collectors/golang.py +++ b/minecode/collectors/golang.py @@ -7,20 +7,24 @@ # See https://aboutcode.org for more information about nexB OSS projects. # import logging - import requests +from bs4 import BeautifulSoup + from packageurl import PackageURL from minecode import priority_router + from minecode.collectors.generic import map_fetchcode_supported_package from minecode.collectors.gitlab import gitlab_get_all_package_version_author from minecode.collectors.github import github_get_all_versions from minecode.collectors.bitbucket import bitbucket_get_all_package_version_author from minecode.miners.gitlab import build_packages_from_json_golang +from minecode.miners.golang import build_golang_generic_package from minecode.miners.bitbucket import build_bitbucket_packages + logger = logging.getLogger(__name__) handler = logging.StreamHandler() logger.addHandler(handler) @@ -100,9 +104,12 @@ def map_golang_package(package_url, package_json, pipelines, priority=0, filenam packages = build_packages_from_json_golang(package_json, package_url) elif purl_str.startswith("pkg:golang/bitbucket"): packages = build_bitbucket_packages(package_json, package_url) + else: + packages = build_golang_generic_package(package_json, package_url) for package in packages: db_package, _, _, error = merge_or_create_package(package, visit_level=0, filename=filename) + print(db_package) if error: break @@ -135,6 +142,57 @@ def process_download_metadata(download_url, package_json): return package_json, filename +def scrape_go_package(repo_path, version): + """ + Access the repository on pkg.go.dev and extract the project's metadata. + """ + url = f"https://pkg.go.dev/{repo_path}@v{version}" + try: + response = requests.get(url) + response.raise_for_status() + + # Parse HTML content + soup = BeautifulSoup(response.text, "html.parser") + + # Find the tag with the specific text + license_tag = soup.find("a", {"data-test-id": "UnitHeader-license"}) + license_text = license_tag.text if license_tag else "" + + # Find the tag inside the UnitMeta-repo div + repo_tag = soup.find("div", class_="UnitMeta-repo").find("a") + repo_url = repo_tag["href"] if repo_tag else "" + + download_url = f"https://proxy.golang.org/{repo_path}/@v/v{version}.zip" + + return { + "license_text": license_text, + "repository_homepage_url": repo_url, + "download_url": download_url, + } + + except requests.exceptions.RequestException as e: + return {"error": f"Request failed: {str(e)}"} + except Exception as e: + return {"error": f"An error occurred: {str(e)}"} + + +def scrape_package_versions(repo_path): + """ + Return all the version of a repo as a list that is fetched from pkg.go.dev. + """ + url = f"https://pkg.go.dev/{repo_path}?tab=versions" + response = requests.get(url) + + if response.status_code == 200: + soup = BeautifulSoup(response.text, "html.parser") + version_divs = soup.find_all("div", class_="Version-tag") + versions = [div.get_text(strip=True) for div in version_divs] + return versions + else: + print(f"Error fetching page: {response.status_code}") + return [] + + # It may need indexing GitHub PURLs that requires a GitHub API token. # Please add your GitHub API key to the `.env` file, for example: `GH_TOKEN=your-github-api`. @priority_router.route("pkg:golang/.*") @@ -153,6 +211,10 @@ def process_requests(purl_str, **kwargs): priority = kwargs.get("priority", 0) try: + """ + We retrieve metadata from APIs for GitHub, GitLab, and Bitbucket. + For the other cases, we will scrape data from pkg.go.dev + """ if purl_str.startswith("pkg:golang/github"): subset_path, version = extract_golang_subset_purl(purl_str) if version: @@ -273,6 +335,30 @@ def process_requests(purl_str, **kwargs): error_msg = map_golang_package( package_url, updated_json, pipelines, priority, filename=filename ) + else: + subset_path = "" + version = "" + subset_path = purl_str.partition("pkg:golang/")[2].partition("@")[0] + if "@" in purl_str: + version = purl_str.rpartition("@")[2] + if not version: + version_list = scrape_package_versions(subset_path) + for ver in version_list: + if ver.startswith("version"): + ver = ver.partition("version")[2] + elif ver.startswith("v"): + ver = ver[1:] + updated_purl_str = purl_str + "@" + ver + package_url = PackageURL.from_string(updated_purl_str) + package_json = scrape_go_package(subset_path, ver) + error_msg = map_golang_package(package_url, package_json, pipelines, priority) + else: + print("HAVE VERSION") + print(subset_path) + print(version) + package_url = PackageURL.from_string(purl_str) + package_json = scrape_go_package(subset_path, version) + error_msg = map_golang_package(package_url, package_json, pipelines, priority) except ValueError as e: error = f"error occurred when parsing {purl_str}: {e}" diff --git a/minecode/miners/golang.py b/minecode/miners/golang.py index e54593b6..b512d28a 100644 --- a/minecode/miners/golang.py +++ b/minecode/miners/golang.py @@ -237,3 +237,29 @@ def build_golang_package(package_data, purl): vcs_url=vcs_url, ) return package + + +def build_golang_generic_package(package_data, package_url): + """Return a single Golang package""" + homepage_url = "/".join(["https:/", package_url.namespace, package_url.name]) + license_text = package_data.get("license_text") + extracted_license_statement = [license_text] + print("NAME") + print(package_url.name) + print(package_url.namespace) + print(package_url.type) + + common_data = dict( + name=package_url.name, + namespace=package_url.namespace, + type=package_url.type, + primary_language="go", + repository_homepage_url=package_data.get("repository_homepage_url"), + homepage_url=homepage_url, + extracted_license_statement=extracted_license_statement, + download_url=package_data.get("download_url"), + ) + + package = scan_models.PackageData.from_data(common_data) + package.set_purl(package_url) + yield package diff --git a/minecode/tests/collectors/test_cargo.py b/minecode/tests/collectors/test_cargo.py index 63ccb3aa..ebaab2fb 100644 --- a/minecode/tests/collectors/test_cargo.py +++ b/minecode/tests/collectors/test_cargo.py @@ -41,7 +41,7 @@ def test_get_package_json(self): self.assertListEqual(list(keys), expected_list) self.assertEqual(json_contents["crate"]["id"], "sam") - def test_map_npm_package(self): + def test_map_cargo_package(self): package_count = packagedb.models.Package.objects.all().count() self.assertEqual(0, package_count) package_url = PackageURL.from_string("pkg:cargo/sam@0.3.1") diff --git a/minecode/tests/collectors/test_golang.py b/minecode/tests/collectors/test_golang.py new file mode 100644 index 00000000..7d2c439e --- /dev/null +++ b/minecode/tests/collectors/test_golang.py @@ -0,0 +1,88 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import json +import os + +from django.test import TestCase as DjangoTestCase + +from packageurl import PackageURL + +import packagedb +from minecode.collectors import golang +from minecode.utils_test import JsonBasedTesting + + +class GoLangPriorityQueueTests(JsonBasedTesting, DjangoTestCase): + test_data_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "testfiles") + + def setUp(self): + super().setUp() + self.expected_json_loc = self.get_test_loc("golang/glog.json") + with open(self.expected_json_loc) as f: + self.expected_json_contents = json.load(f) + + def test_extract_golang_subset_purl(self): + test1 = "pkg:golang/rickar/cal@2.1.23" + test2 = "pkg:golang/rickar/cal" + expected_path1 = "rickar/cal" + expected_version1 = "2.1.23" + + result_path1, result_version1 = golang.extract_golang_subset_purl(test1) + result_path2, result_version2 = golang.extract_golang_subset_purl(test2) + + self.assertEqual(expected_path1, result_path1) + self.assertEqual(result_version1, expected_version1) + + self.assertEqual(expected_path1, result_path2) + self.assertEqual(result_version2, "") + + def test_gitlab_updated_purl(self): + test1 = "pkg:golang/gitlab.com/gitlab-org/api/client-go@0.127.0" + test2 = "pkg:golang/gitlab.com/gitlab-org/api/client-go" + expected_path1 = "gitlab-org%2Fapi%2Fclient-go" + expected_version1 = "0.127.0" + + result_path1, result_version1 = golang.extract_golang_subset_purl(test1) + result_path2, result_version2 = golang.extract_golang_subset_purl(test2) + + self.assertEqual(expected_path1, result_path1) + self.assertEqual(result_version1, expected_version1) + + self.assertEqual(expected_path1, result_path2) + self.assertEqual(result_version2, "") + + def test_get_package_json_gitlab(self): + json_contents = golang.get_package_json.get_package_json("xx_network%2Fprimitives", "gitlab") + expected_id = "20321795" + expected_name = "primitives" + + self.assertEqual(json_contents.get("id"), expected_id) + self.assertEqual(json_contents.get("name"), expected_name) + + def test_get_package_json_bitbucket(self): + json_contents = golang.get_package_json.get_package_json("lebronto_kerovol/gwerror", "bitbucket") + expected_full_name = "lebronto_kerovol/gwerror" + expected_name = "gwerror" + + self.assertEqual(json_contents.get("full_name"), expected_full_name) + self.assertEqual(json_contents.get("name"), expected_name) + + def test_map_go_package(self): + package_count = packagedb.models.Package.objects.all().count() + self.assertEqual(0, package_count) + package_url = PackageURL.from_string("pkg:cargo/sam@0.3.1") + cargo.map_cargo_package(package_url, ("test_pipeline")) + package_count = packagedb.models.Package.objects.all().count() + self.assertEqual(1, package_count) + package = packagedb.models.Package.objects.all().first() + expected_purl_str = "pkg:cargo/sam@0.3.1" + expected_download_url = "https://static.crates.io/crates/sam/sam-0.3.1.crate" + self.assertEqual(expected_purl_str, package.purl) + self.assertEqual(expected_download_url, package.download_url) From 18f94132c8cdd2fc543c5486f15d4dc61110b1b3 Mon Sep 17 00:00:00 2001 From: Chin Yeung Li Date: Tue, 15 Apr 2025 18:52:39 +0800 Subject: [PATCH 05/19] #596 - Add on-demand package data collection for golang * Collect metadata from API for the following "namespace" ``` pkg:golang/github.com/* pkg:golang/gitlab.com/* pkg:golang/bitbucket.org/* ``` * Add tests * Add "golang" in the "supported_ecosystems" list in the api.py Signed-off-by: Chin Yeung Li --- minecode/collectors/bitbucket.py | 7 +- minecode/collectors/golang.py | 7 +- minecode/miners/bitbucket.py | 2 +- minecode/miners/golang.py | 4 - minecode/tests/collectors/test_bitbucket.py | 41 ++++++ minecode/tests/collectors/test_github.py | 38 +++++ minecode/tests/collectors/test_gitlab.py | 33 +++++ minecode/tests/collectors/test_golang.py | 88 +++++++++--- .../testfiles/golang/client-go_0.127.0.json | 41 ++++++ .../testfiles/golang/gg-core_0.3.64.json | 130 ++++++++++++++++++ packagedb/api.py | 1 + 11 files changed, 361 insertions(+), 31 deletions(-) create mode 100644 minecode/tests/collectors/test_bitbucket.py create mode 100644 minecode/tests/collectors/test_github.py create mode 100644 minecode/tests/collectors/test_gitlab.py create mode 100644 minecode/tests/testfiles/golang/client-go_0.127.0.json create mode 100644 minecode/tests/testfiles/golang/gg-core_0.3.64.json diff --git a/minecode/collectors/bitbucket.py b/minecode/collectors/bitbucket.py index 420299d1..81b60126 100644 --- a/minecode/collectors/bitbucket.py +++ b/minecode/collectors/bitbucket.py @@ -38,9 +38,10 @@ def bitbucket_get_all_package_version_author(subset_path): for item in data["values"]: version = item["name"] author = "" - if "tagger" in item and item["tagger"]: - if item["tagger"]["type"] == "author": - author = item["tagger"]["raw"] + if "target" in item and item["target"]: + if "author" in item["target"] and item["target"]["author"]: + if item["target"]["author"]["type"] == "author": + author = item["target"]["author"]["user"]["display_name"] version_author_list.append((version, author)) # Handle pagination repo_tags = data.get("next", None) diff --git a/minecode/collectors/golang.py b/minecode/collectors/golang.py index 6d02c97e..6567cde9 100644 --- a/minecode/collectors/golang.py +++ b/minecode/collectors/golang.py @@ -24,6 +24,7 @@ from minecode.miners.golang import build_golang_generic_package from minecode.miners.bitbucket import build_bitbucket_packages +from packagedb.models import PackageContentType logger = logging.getLogger(__name__) handler = logging.StreamHandler() @@ -108,8 +109,8 @@ def map_golang_package(package_url, package_json, pipelines, priority=0, filenam packages = build_golang_generic_package(package_json, package_url) for package in packages: + package.extra_data["package_content"] = PackageContentType.SOURCE_ARCHIVE db_package, _, _, error = merge_or_create_package(package, visit_level=0, filename=filename) - print(db_package) if error: break @@ -316,7 +317,6 @@ def process_requests(purl_str, **kwargs): collected_version = repo_version updated_purl_str = purl_str + "@" + collected_version package_url = PackageURL.from_string(updated_purl_str) - error_msg = map_golang_package( package_url, updated_json, pipelines, priority, filename=filename ) @@ -353,9 +353,6 @@ def process_requests(purl_str, **kwargs): package_json = scrape_go_package(subset_path, ver) error_msg = map_golang_package(package_url, package_json, pipelines, priority) else: - print("HAVE VERSION") - print(subset_path) - print(version) package_url = PackageURL.from_string(purl_str) package_json = scrape_go_package(subset_path, version) error_msg = map_golang_package(package_url, package_json, pipelines, priority) diff --git a/minecode/miners/bitbucket.py b/minecode/miners/bitbucket.py index e69ad327..3c6f47aa 100644 --- a/minecode/miners/bitbucket.py +++ b/minecode/miners/bitbucket.py @@ -338,7 +338,7 @@ def get_bitbucket_license_info(repo_path): url = data.get("next", None) return None # No license file found except requests.exceptions.RequestException as e: - print(f"Error fetching license info: {e}") + logger.error(f"Error fetching license info: {e}") return None diff --git a/minecode/miners/golang.py b/minecode/miners/golang.py index b512d28a..225581e5 100644 --- a/minecode/miners/golang.py +++ b/minecode/miners/golang.py @@ -244,10 +244,6 @@ def build_golang_generic_package(package_data, package_url): homepage_url = "/".join(["https:/", package_url.namespace, package_url.name]) license_text = package_data.get("license_text") extracted_license_statement = [license_text] - print("NAME") - print(package_url.name) - print(package_url.namespace) - print(package_url.type) common_data = dict( name=package_url.name, diff --git a/minecode/tests/collectors/test_bitbucket.py b/minecode/tests/collectors/test_bitbucket.py new file mode 100644 index 00000000..304af4ee --- /dev/null +++ b/minecode/tests/collectors/test_bitbucket.py @@ -0,0 +1,41 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import os + +from django.test import TestCase as DjangoTestCase + +from minecode.collectors import bitbucket +from minecode.utils_test import JsonBasedTesting + + +class BitbucketPriorityQueueTests(JsonBasedTesting, DjangoTestCase): + test_data_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "testfiles") + + def test_bitbucket_get_all_package_version_author(self): + repo_path = "creachadair/stringset" + version_author_list = bitbucket.bitbucket_get_all_package_version_author(repo_path) + expected = [ + ("v0.0.1", "M. J. Fromberger"), + ("v0.0.10", "M. J. Fromberger"), + ("v0.0.11", "M. J. Fromberger"), + ("v0.0.12", "M. J. Fromberger"), + ("v0.0.13", "M. J. Fromberger"), + ("v0.0.14", "M. J. Fromberger"), + ("v0.0.2", "M. J. Fromberger"), + ("v0.0.3", "M. J. Fromberger"), + ("v0.0.4", "M. J. Fromberger"), + ("v0.0.5", "M. J. Fromberger"), + ("v0.0.6", "M. J. Fromberger"), + ("v0.0.7", "M. J. Fromberger"), + ("v0.0.8", "M. J. Fromberger"), + ("v0.0.9", "M. J. Fromberger"), + ] + for item in version_author_list: + self.assertIn(item, expected) diff --git a/minecode/tests/collectors/test_github.py b/minecode/tests/collectors/test_github.py new file mode 100644 index 00000000..eb5d4525 --- /dev/null +++ b/minecode/tests/collectors/test_github.py @@ -0,0 +1,38 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import os + +from django.test import TestCase as DjangoTestCase + +from minecode.collectors import github +from minecode.utils_test import JsonBasedTesting + + +class GithubPriorityQueueTests(JsonBasedTesting, DjangoTestCase): + test_data_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "testfiles") + + def test_github_get_all_versions(self): + repo_path = "aboutcode-org/purldb" + versions = github.github_get_all_versions(repo_path) + expected = [ + "v6.0.0", + "v5.0.1", + "v5.0.0", + "v3.0.0", + "v2.0.0", + "purldb-toolkit-v0.1.0", + "purl2vcs-v2.0.0", + "purl2vcs-v1.0.2", + "pre-scan-queue-update", + "matchcode-toolkit-v3.0.0", + "matchcode-toolkit-v1.1.1", + ] + for item in versions: + self.assertIn(item, expected) diff --git a/minecode/tests/collectors/test_gitlab.py b/minecode/tests/collectors/test_gitlab.py new file mode 100644 index 00000000..5bfedbd5 --- /dev/null +++ b/minecode/tests/collectors/test_gitlab.py @@ -0,0 +1,33 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import os + +from django.test import TestCase as DjangoTestCase + +from minecode.collectors import gitlab +from minecode.utils_test import JsonBasedTesting + + +class GitlabPriorityQueueTests(JsonBasedTesting, DjangoTestCase): + test_data_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "testfiles") + + def test_gitlab_get_all_package_version_author(self): + repo_path = "xx_network%2Fprimitives" + version_author_list = gitlab.gitlab_get_all_package_version_author(repo_path) + expected = [ + ("v0.0.5", "Richard T. Carback III", "rick.carback@gmail.com"), + ("v0.0.4", "Richard T. Carback III", "rick.carback@gmail.com"), + ("v0.0.3", "Benjamin Wenger", "ben@privategrity.com"), + ("v0.0.2", "Richard T. Carback III", "rick.carback@gmail.com"), + ("v0.0.1", "Jonathan Wenger", "jono@elixxir.io"), + ("v0.0.0", "Sydney Anne Erickson", "sydney@elixxir.io"), + ] + for item in version_author_list: + self.assertIn(item, expected) diff --git a/minecode/tests/collectors/test_golang.py b/minecode/tests/collectors/test_golang.py index 7d2c439e..068dda53 100644 --- a/minecode/tests/collectors/test_golang.py +++ b/minecode/tests/collectors/test_golang.py @@ -22,15 +22,9 @@ class GoLangPriorityQueueTests(JsonBasedTesting, DjangoTestCase): test_data_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "testfiles") - def setUp(self): - super().setUp() - self.expected_json_loc = self.get_test_loc("golang/glog.json") - with open(self.expected_json_loc) as f: - self.expected_json_contents = json.load(f) - def test_extract_golang_subset_purl(self): - test1 = "pkg:golang/rickar/cal@2.1.23" - test2 = "pkg:golang/rickar/cal" + test1 = "pkg:golang/github.com/rickar/cal/v2/aa@2.1.23" + test2 = "pkg:golang/github.com/rickar/cal/v2/aa" expected_path1 = "rickar/cal" expected_version1 = "2.1.23" @@ -49,8 +43,8 @@ def test_gitlab_updated_purl(self): expected_path1 = "gitlab-org%2Fapi%2Fclient-go" expected_version1 = "0.127.0" - result_path1, result_version1 = golang.extract_golang_subset_purl(test1) - result_path2, result_version2 = golang.extract_golang_subset_purl(test2) + result_path1, result_version1 = golang.gitlab_updated_purl(test1) + result_path2, result_version2 = golang.gitlab_updated_purl(test2) self.assertEqual(expected_path1, result_path1) self.assertEqual(result_version1, expected_version1) @@ -59,30 +53,88 @@ def test_gitlab_updated_purl(self): self.assertEqual(result_version2, "") def test_get_package_json_gitlab(self): - json_contents = golang.get_package_json.get_package_json("xx_network%2Fprimitives", "gitlab") - expected_id = "20321795" + json_contents = golang.get_package_json("xx_network%2Fprimitives", "gitlab") + expected_id = 20321795 expected_name = "primitives" self.assertEqual(json_contents.get("id"), expected_id) self.assertEqual(json_contents.get("name"), expected_name) def test_get_package_json_bitbucket(self): - json_contents = golang.get_package_json.get_package_json("lebronto_kerovol/gwerror", "bitbucket") + json_contents = golang.get_package_json("lebronto_kerovol/gwerror", "bitbucket") expected_full_name = "lebronto_kerovol/gwerror" expected_name = "gwerror" self.assertEqual(json_contents.get("full_name"), expected_full_name) self.assertEqual(json_contents.get("name"), expected_name) - def test_map_go_package(self): + def test_map_go_package_gitlab(self): + package_count = packagedb.models.Package.objects.all().count() + self.assertEqual(0, package_count) + package_url = PackageURL.from_string( + "pkg:golang/gitlab.com/gitlab-org/api/client-go@0.127.0" + ) + + with open(self.get_test_loc("golang/client-go_0.127.0.json")) as file: + package_json = json.load(file) + golang.map_golang_package(package_url, package_json, ("test_pipeline")) + package_count = packagedb.models.Package.objects.all().count() + self.assertEqual(1, package_count) + package = packagedb.models.Package.objects.all().first() + expected_purl_str = "pkg:golang/gitlab.com/gitlab-org/api/client-go@0.127.0" + expected_download_url = "https://gitlab.com/api/v4/projects/gitlab-org%2Fapi%2Fclient-go/repository/archive.zip?sha=v0.127.0" + self.assertEqual(expected_purl_str, package.purl) + self.assertEqual(expected_download_url, package.download_url) + + def test_map_go_package_bitbucket(self): package_count = packagedb.models.Package.objects.all().count() self.assertEqual(0, package_count) - package_url = PackageURL.from_string("pkg:cargo/sam@0.3.1") - cargo.map_cargo_package(package_url, ("test_pipeline")) + package_url = PackageURL.from_string("pkg:golang/bitbucket.org/digi-sense/gg-core@0.3.64") + + with open(self.get_test_loc("golang/gg-core_0.3.64.json")) as file: + package_json = json.load(file) + golang.map_golang_package(package_url, package_json, ("test_pipeline")) + package_count = packagedb.models.Package.objects.all().count() + self.assertEqual(1, package_count) + package = packagedb.models.Package.objects.all().first() + expected_purl_str = "pkg:golang/bitbucket.org/digi-sense/gg-core@0.3.64" + expected_download_url = "https://bitbucket.org/digi-sense/gg-core/get/v0.3.64.zip" + self.assertEqual(expected_purl_str, package.purl) + self.assertEqual(expected_download_url, package.download_url) + + def test_map_go_package_others(self): + package_count = packagedb.models.Package.objects.all().count() + self.assertEqual(0, package_count) + package_url = PackageURL.from_string("pkg:golang/golang.org/x/oauth2@0.29.0") + + package_json = golang.scrape_go_package("golang.org/x/oauth2", "0.29.0") + golang.map_golang_package(package_url, package_json, ("test_pipeline")) package_count = packagedb.models.Package.objects.all().count() self.assertEqual(1, package_count) package = packagedb.models.Package.objects.all().first() - expected_purl_str = "pkg:cargo/sam@0.3.1" - expected_download_url = "https://static.crates.io/crates/sam/sam-0.3.1.crate" + expected_purl_str = "pkg:golang/golang.org/x/oauth2@0.29.0" + expected_download_url = "https://proxy.golang.org/golang.org/x/oauth2/@v/v0.29.0.zip" self.assertEqual(expected_purl_str, package.purl) self.assertEqual(expected_download_url, package.download_url) + + def test_process_download_metadata(self): + url = "https://bitbucket.org/digi-sense/gg-core/get/v0.3.64.zip" + _package_json, filename = golang.process_download_metadata(url, {}) + exprected_filename = "digi-sense-gg-core-9d3dfdc43161.zip" + self.assertEqual(exprected_filename, filename) + + def test_scrape_go_package(self): + metadata_dict = golang.scrape_go_package("golang.org/x/oauth2", "0.29.0") + expected_dict = { + "license_text": "BSD-3-Clause", + "repository_homepage_url": "https://cs.opensource.google/go/x/oauth2", + "download_url": "https://proxy.golang.org/golang.org/x/oauth2/@v/v0.29.0.zip", + } + self.assertEqual(expected_dict, metadata_dict) + + def test_scrape_package_versions(self): + versions = golang.scrape_package_versions("golang.org/x/oauth2") + # The version list may expand overtime, as of writing the test, + # there are 29 releases + expected_version_len = 29 + self.assertGreaterEqual(len(versions), expected_version_len) diff --git a/minecode/tests/testfiles/golang/client-go_0.127.0.json b/minecode/tests/testfiles/golang/client-go_0.127.0.json new file mode 100644 index 00000000..bf4d2f99 --- /dev/null +++ b/minecode/tests/testfiles/golang/client-go_0.127.0.json @@ -0,0 +1,41 @@ +{ + "id": 65271576, + "description": "GitLab Go SDK\r\n\r\nThis project has been migrated from `github.com/xanzy/go-gitlab`.", + "name": "client-go", + "name_with_namespace": "GitLab.org / API / client-go", + "path": "client-go", + "path_with_namespace": "gitlab-org/api/client-go", + "created_at": "2024-12-10T14:14:37.357Z", + "default_branch": "main", + "tag_list": [ + "gitlab", + "go", + "sdk" + ], + "topics": [ + "gitlab", + "go", + "sdk" + ], + "ssh_url_to_repo": "git@gitlab.com:gitlab-org/api/client-go.git", + "http_url_to_repo": "https://gitlab.com/gitlab-org/api/client-go.git", + "web_url": "https://gitlab.com/gitlab-org/api/client-go", + "readme_url": "https://gitlab.com/gitlab-org/api/client-go/-/blob/main/README.md", + "forks_count": 36, + "avatar_url": null, + "star_count": 54, + "last_activity_at": "2025-04-15T04:41:12.062Z", + "namespace": { + "id": 98461187, + "name": "API", + "path": "api", + "kind": "group", + "full_path": "gitlab-org/api", + "parent_id": 9970, + "avatar_url": null, + "web_url": "https://gitlab.com/groups/gitlab-org/api" + }, + "download_url": "https://gitlab.com/api/v4/projects/gitlab-org%2Fapi%2Fclient-go/repository/archive.zip?sha=v0.127.0", + "author": "Patrick Rice", + "email": "patrick.rice@kingland.com" + } diff --git a/minecode/tests/testfiles/golang/gg-core_0.3.64.json b/minecode/tests/testfiles/golang/gg-core_0.3.64.json new file mode 100644 index 00000000..2e5c6fef --- /dev/null +++ b/minecode/tests/testfiles/golang/gg-core_0.3.64.json @@ -0,0 +1,130 @@ +{ + "type": "repository", + "full_name": "digi-sense/gg-core", + "links": { + "self": { + "href": "https://api.bitbucket.org/2.0/repositories/digi-sense/gg-core" + }, + "html": { + "href": "https://bitbucket.org/digi-sense/gg-core" + }, + "avatar": { + "href": "https://bytebucket.org/ravatar/%7B1bc29c37-3104-464d-ba27-bffa8847bcd6%7D?ts=3152224" + }, + "pullrequests": { + "href": "https://api.bitbucket.org/2.0/repositories/digi-sense/gg-core/pullrequests" + }, + "commits": { + "href": "https://api.bitbucket.org/2.0/repositories/digi-sense/gg-core/commits" + }, + "forks": { + "href": "https://api.bitbucket.org/2.0/repositories/digi-sense/gg-core/forks" + }, + "watchers": { + "href": "https://api.bitbucket.org/2.0/repositories/digi-sense/gg-core/watchers" + }, + "branches": { + "href": "https://api.bitbucket.org/2.0/repositories/digi-sense/gg-core/refs/branches" + }, + "tags": { + "href": "https://api.bitbucket.org/2.0/repositories/digi-sense/gg-core/refs/tags" + }, + "downloads": { + "href": "https://api.bitbucket.org/2.0/repositories/digi-sense/gg-core/downloads" + }, + "source": { + "href": "https://api.bitbucket.org/2.0/repositories/digi-sense/gg-core/src" + }, + "clone": [ + { + "name": "https", + "href": "https://bitbucket.org/digi-sense/gg-core.git" + }, + { + "name": "ssh", + "href": "git@bitbucket.org:digi-sense/gg-core.git" + } + ], + "hooks": { + "href": "https://api.bitbucket.org/2.0/repositories/digi-sense/gg-core/hooks" + } + }, + "name": "gg-core", + "slug": "gg-core", + "description": "", + "scm": "git", + "website": null, + "owner": { + "display_name": "Digi Sense", + "links": { + "self": { + "href": "https://api.bitbucket.org/2.0/workspaces/%7B2b6cccfa-727a-4d9a-82e0-bd8a163847a0%7D" + }, + "avatar": { + "href": "https://bitbucket.org/account/digi-sense/avatar/" + }, + "html": { + "href": "https://bitbucket.org/%7B2b6cccfa-727a-4d9a-82e0-bd8a163847a0%7D/" + } + }, + "type": "team", + "uuid": "{2b6cccfa-727a-4d9a-82e0-bd8a163847a0}", + "username": "digi-sense" + }, + "workspace": { + "type": "workspace", + "uuid": "{2b6cccfa-727a-4d9a-82e0-bd8a163847a0}", + "name": "Digi Sense", + "slug": "digi-sense", + "links": { + "avatar": { + "href": "https://bitbucket.org/workspaces/digi-sense/avatar/?ts=1732269844" + }, + "html": { + "href": "https://bitbucket.org/digi-sense/" + }, + "self": { + "href": "https://api.bitbucket.org/2.0/workspaces/digi-sense" + } + } + }, + "is_private": false, + "project": { + "type": "project", + "key": "BASE", + "uuid": "{32414877-738a-4002-96a4-893a906d88c8}", + "name": "base", + "links": { + "self": { + "href": "https://api.bitbucket.org/2.0/workspaces/digi-sense/projects/BASE" + }, + "html": { + "href": "https://bitbucket.org/digi-sense/workspace/projects/BASE" + }, + "avatar": { + "href": "https://bitbucket.org/digi-sense/workspace/projects/BASE/avatar/32?ts=1632311978" + } + } + }, + "fork_policy": "allow_forks", + "created_on": "2022-07-08T14:40:46.166913+00:00", + "updated_on": "2025-04-14T16:01:39.490611+00:00", + "size": 47007088, + "language": "go", + "uuid": "{1bc29c37-3104-464d-ba27-bffa8847bcd6}", + "mainbranch": { + "name": "master", + "type": "branch" + }, + "override_settings": { + "default_merge_strategy": true, + "branching_model": true + }, + "parent": null, + "enforced_signed_commits": null, + "has_issues": false, + "has_wiki": false, + "repo_workspace_name": "digi-sense/gg-core", + "download_url": "https://bitbucket.org/digi-sense/gg-core/get/v0.3.64.zip", + "author": "" + } diff --git a/packagedb/api.py b/packagedb/api.py index 62262735..04c1e41c 100644 --- a/packagedb/api.py +++ b/packagedb/api.py @@ -975,6 +975,7 @@ def _reindex_package(package, reindexed_packages, **kwargs): "pypi", "cargo", "gem", + "golang", ] unique_packages, unsupported_packages, unsupported_vers = get_resolved_packages( From 905195af68a6615b8d00b8f3e7a37eebbd4ec8d3 Mon Sep 17 00:00:00 2001 From: Chin Yeung Date: Wed, 16 Apr 2025 15:04:14 +0800 Subject: [PATCH 06/19] Update minecode/collectors/golang.py Signed-off-by: Chin Yeung Li Co-authored-by: Jono Yang --- minecode/collectors/golang.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/minecode/collectors/golang.py b/minecode/collectors/golang.py index 6567cde9..6a5f9bf2 100644 --- a/minecode/collectors/golang.py +++ b/minecode/collectors/golang.py @@ -34,7 +34,7 @@ def extract_golang_subset_purl(purl_str): """ - Extract the first two swgments after github.com or bitbucket.org and + Extract the first two segments after github.com or bitbucket.org and version For instance, pkg:golang/github.com/rickar/cal/v2/aa@2.1.23 Return subset_path: rickar/cal From cb18cb31dbb63259bb48fede561f3dd28bd53f45 Mon Sep 17 00:00:00 2001 From: Chin Yeung Li Date: Mon, 28 Jul 2025 16:29:16 +0800 Subject: [PATCH 07/19] change from web scrapping to fetch from deps.dev #596 Signed-off-by: Chin Yeung Li --- minecode/collectors/golang.py | 338 +++++++++--------- minecode/miners/golang.py | 23 +- minecode/tests/collectors/test_golang.py | 67 ++-- .../tests/testfiles/golang/oauth2_0.29.0.json | 21 ++ 4 files changed, 241 insertions(+), 208 deletions(-) create mode 100644 minecode/tests/testfiles/golang/oauth2_0.29.0.json diff --git a/minecode/collectors/golang.py b/minecode/collectors/golang.py index 6a5f9bf2..efb88cac 100644 --- a/minecode/collectors/golang.py +++ b/minecode/collectors/golang.py @@ -9,23 +9,21 @@ import logging import requests -from bs4 import BeautifulSoup - -from packageurl import PackageURL - from minecode import priority_router - from minecode.collectors.generic import map_fetchcode_supported_package from minecode.collectors.gitlab import gitlab_get_all_package_version_author from minecode.collectors.github import github_get_all_versions from minecode.collectors.bitbucket import bitbucket_get_all_package_version_author - from minecode.miners.gitlab import build_packages_from_json_golang from minecode.miners.golang import build_golang_generic_package from minecode.miners.bitbucket import build_bitbucket_packages +from packageurl import PackageURL + from packagedb.models import PackageContentType +from urllib.parse import quote + logger = logging.getLogger(__name__) handler = logging.StreamHandler() logger.addHandler(handler) @@ -41,7 +39,7 @@ def extract_golang_subset_purl(purl_str): version: 2.1.23 """ # Strip "pkg:golang/" - purl_body = purl_str[len("pkg:golang/") :] + purl_body = purl_str[len("pkg:golang/"):] # Extract namespace, name, and version parts = purl_body.split("/") @@ -62,11 +60,30 @@ def gitlab_updated_purl(purl_str): if "@" in purl_str: version = purl_str.rpartition("@")[2] subset = purl_str.partition("pkg:golang/gitlab.com/")[2].partition("@")[0] - subset_path = subset.replace("/", "%2F") + subset_path = quote(subset, safe="") return subset_path, version -def get_package_json(subset_path, type): +def get_package_versions(namespace_name): + """ + Return all the version of the package fetched from "api.deps.dev" in a + list + """ + version_list = [] + url = f"https://api.deps.dev/v3/systems/GO/packages/{namespace_name}" + try: + response = requests.get(url) + response.raise_for_status() + context = response.json() + versions = context["versions"] + for version in versions: + version_list.append(version["versionKey"]["version"]) + return version_list + except requests.exceptions.HTTPError as err: + logger.error(f"HTTP error occurred: {err}") + + +def get_package_json(subset_path, type=None, version=None): """ Return the contents of the JSON file of the package. """ @@ -75,10 +92,17 @@ def get_package_json(subset_path, type): url = f"https://gitlab.com/api/v4/projects/{subset_path}" elif type == "bitbucket": url = f"https://api.bitbucket.org/2.0/repositories/{subset_path}" - + else: + if version: + if version.startswith('v'): + url = f"https://api.deps.dev/v3/systems/GO/packages/{subset_path}/versions/{version}" + else: + url = f"https://api.deps.dev/v3/systems/GO/packages/{subset_path}/versions/v{version}" try: response = requests.get(url) response.raise_for_status() + print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@") + print(response.json()) return response.json() except requests.exceptions.HTTPError as err: logger.error(f"HTTP error occurred: {err}") @@ -110,13 +134,15 @@ def map_golang_package(package_url, package_json, pipelines, priority=0, filenam for package in packages: package.extra_data["package_content"] = PackageContentType.SOURCE_ARCHIVE - db_package, _, _, error = merge_or_create_package(package, visit_level=0, filename=filename) + db_package, _, _, error = merge_or_create_package( + package, visit_level=0, filename=filename) if error: break # Submit package for scanning if db_package: - add_package_to_scan_queue(package=db_package, pipelines=pipelines, priority=priority) + add_package_to_scan_queue( + package=db_package, pipelines=pipelines, priority=priority) return error @@ -143,63 +169,12 @@ def process_download_metadata(download_url, package_json): return package_json, filename -def scrape_go_package(repo_path, version): - """ - Access the repository on pkg.go.dev and extract the project's metadata. - """ - url = f"https://pkg.go.dev/{repo_path}@v{version}" - try: - response = requests.get(url) - response.raise_for_status() - - # Parse HTML content - soup = BeautifulSoup(response.text, "html.parser") - - # Find the tag with the specific text - license_tag = soup.find("a", {"data-test-id": "UnitHeader-license"}) - license_text = license_tag.text if license_tag else "" - - # Find the tag inside the UnitMeta-repo div - repo_tag = soup.find("div", class_="UnitMeta-repo").find("a") - repo_url = repo_tag["href"] if repo_tag else "" - - download_url = f"https://proxy.golang.org/{repo_path}/@v/v{version}.zip" - - return { - "license_text": license_text, - "repository_homepage_url": repo_url, - "download_url": download_url, - } - - except requests.exceptions.RequestException as e: - return {"error": f"Request failed: {str(e)}"} - except Exception as e: - return {"error": f"An error occurred: {str(e)}"} - - -def scrape_package_versions(repo_path): - """ - Return all the version of a repo as a list that is fetched from pkg.go.dev. - """ - url = f"https://pkg.go.dev/{repo_path}?tab=versions" - response = requests.get(url) - - if response.status_code == 200: - soup = BeautifulSoup(response.text, "html.parser") - version_divs = soup.find_all("div", class_="Version-tag") - versions = [div.get_text(strip=True) for div in version_divs] - return versions - else: - print(f"Error fetching page: {response.status_code}") - return [] - - # It may need indexing GitHub PURLs that requires a GitHub API token. # Please add your GitHub API key to the `.env` file, for example: `GH_TOKEN=your-github-api`. @priority_router.route("pkg:golang/.*") def process_requests(purl_str, **kwargs): """ - Process `priority_resource_uri` containing a GitHub Package URL (PURL). + Process `priority_resource_uri` containing a golang Package URL (PURL). This involves obtaining Package information for the PURL using https://github.com/aboutcode-org/fetchcode and using it to create a new @@ -214,8 +189,10 @@ def process_requests(purl_str, **kwargs): try: """ We retrieve metadata from APIs for GitHub, GitLab, and Bitbucket. - For the other cases, we will scrape data from pkg.go.dev + For the other cases (or failing cases), we will get the data from + deps.dev """ + processed = False if purl_str.startswith("pkg:golang/github"): subset_path, version = extract_golang_subset_purl(purl_str) if version: @@ -226,7 +203,9 @@ def process_requests(purl_str, **kwargs): package_url, pipelines, priority, from_go_lang=True ) if error_msg: - return error_msg + print(error_msg) + else: + processed = True else: version_list = github_get_all_versions(subset_path) for v in version_list: @@ -242,120 +221,151 @@ def process_requests(purl_str, **kwargs): package_url, pipelines, priority, from_go_lang=True ) if error_msg: - return error_msg + print(error_msg) + else: + processed = True elif purl_str.startswith("pkg:golang/gitlab"): package_url = PackageURL.from_string(purl_str) subset_path, version = gitlab_updated_purl(purl_str) package_json = get_package_json(subset_path, "gitlab") if not package_json: error = f"package not found: {purl_str}" - return error - repo_version_author_list = gitlab_get_all_package_version_author(subset_path) - if repo_version_author_list: - for repo_version, author, email in repo_version_author_list: - # Check the version along with stripping the first - # character 'v' in the repo_version - if not version or version in {repo_version, repo_version[1:]}: - download_url = f"https://gitlab.com/api/v4/projects/{subset_path}/repository/archive.zip?sha={repo_version}" - updated_json, filename = process_download_metadata( - download_url, package_json - ) - updated_json["author"] = author - updated_json["email"] = email - if not version: - if repo_version.startswith("v"): - updated_purl_str = ( - PackageURL.to_string(package_url) + "@" + repo_version[1:] - ) - else: - updated_purl_str = ( - PackageURL.to_string(package_url) + "@" + repo_version - ) - updated_purl = PackageURL.from_string(updated_purl_str) - error_msg = map_golang_package( - updated_purl, updated_json, pipelines, priority, filename=filename - ) - else: - error_msg = map_golang_package( - package_url, updated_json, pipelines, priority, filename=filename - ) - break else: - # The repo does not have any tag (i.e. it only has one version) - download_url = ( - f"https://gitlab.com/api/v4/projects/{subset_path}/repository/archive.zip" - ) - updated_json, filename = process_download_metadata(download_url, package_json) - error_msg = map_golang_package( - package_url, updated_json, pipelines, priority, filename=filename - ) + repo_version_author_list = gitlab_get_all_package_version_author( + subset_path) + if repo_version_author_list: + for repo_version, author, email in repo_version_author_list: + # Check the version along with stripping the first + # character 'v' in the repo_version + if not version or version in {repo_version, repo_version[1:]}: + download_url = f"https://gitlab.com/api/v4/projects/{subset_path}/repository/archive.zip?sha={repo_version}" + updated_json, filename = process_download_metadata( + download_url, package_json + ) + updated_json["author"] = author + updated_json["email"] = email + if not version: + if repo_version.startswith("v"): + updated_purl_str = ( + PackageURL.to_string( + package_url) + "@" + repo_version[1:]) + else: + updated_purl_str = ( + PackageURL.to_string( + package_url) + "@" + repo_version) + updated_purl = PackageURL.from_string( + updated_purl_str) + error_msg = map_golang_package( + updated_purl, updated_json, pipelines, priority, filename=filename) + if error_msg: + print(error_msg) + else: + processed = True + else: + error_msg = map_golang_package( + package_url, updated_json, pipelines, priority, filename=filename) + if error_msg: + print(error_msg) + else: + processed = True + break + else: + # The repo does not have any tag (i.e. it only has one version) + download_url = ( + f"https://gitlab.com/api/v4/projects/{subset_path}/repository/archive.zip" + ) + updated_json, filename = process_download_metadata( + download_url, package_json) + error_msg = map_golang_package( + package_url, updated_json, pipelines, priority, filename=filename + ) + if error_msg: + print(error_msg) + else: + processed = True elif purl_str.startswith("pkg:golang/bitbucket"): package_url = PackageURL.from_string(purl_str) subset_path, version = extract_golang_subset_purl(purl_str) package_json = get_package_json(subset_path, "bitbucket") if not package_json: error = f"package not found: {purl_str}" - return error - repo_version_author_list = bitbucket_get_all_package_version_author(subset_path) - package_json["repo_workspace_name"] = subset_path - if repo_version_author_list: - found_match = False - for repo_version, author in repo_version_author_list: - # Check the version along with stripping the first - # character 'v' in the repo_version - if not version or version in {repo_version, repo_version[1:]}: - found_match = True - download_url = f"https://bitbucket.org/{subset_path}/get/{repo_version}.zip" - updated_json, filename = process_download_metadata( - download_url, package_json - ) - updated_json["author"] = author - if not version: - if repo_version.startswith("v"): - collected_version = repo_version[1:] - else: - collected_version = repo_version - updated_purl_str = purl_str + "@" + collected_version - package_url = PackageURL.from_string(updated_purl_str) - error_msg = map_golang_package( - package_url, updated_json, pipelines, priority, filename=filename - ) - if version: - break - if not found_match: - error_msg = f"The package version not found: {version}" - return error_msg else: - # The repo does not have any tag (i.e. it only has one version) - # Get the main branch name for the download url - main_branch = package_json["mainbranch"]["name"] - download_url = f"https://bitbucket.org/{subset_path}/get/{main_branch}.zip" - updated_json, filename = process_download_metadata(download_url, package_json) - - error_msg = map_golang_package( - package_url, updated_json, pipelines, priority, filename=filename - ) - else: - subset_path = "" - version = "" - subset_path = purl_str.partition("pkg:golang/")[2].partition("@")[0] - if "@" in purl_str: - version = purl_str.rpartition("@")[2] - if not version: - version_list = scrape_package_versions(subset_path) - for ver in version_list: - if ver.startswith("version"): - ver = ver.partition("version")[2] - elif ver.startswith("v"): - ver = ver[1:] - updated_purl_str = purl_str + "@" + ver - package_url = PackageURL.from_string(updated_purl_str) - package_json = scrape_go_package(subset_path, ver) - error_msg = map_golang_package(package_url, package_json, pipelines, priority) + repo_version_author_list = bitbucket_get_all_package_version_author( + subset_path) + package_json["repo_workspace_name"] = subset_path + if repo_version_author_list: + found_match = False + for repo_version, author in repo_version_author_list: + # Check the version along with stripping the first + # character 'v' in the repo_version + if not version or version in {repo_version, repo_version[1:]}: + found_match = True + download_url = f"https://bitbucket.org/{subset_path}/get/{repo_version}.zip" + updated_json, filename = process_download_metadata( + download_url, package_json + ) + updated_json["author"] = author + if not version: + if repo_version.startswith("v"): + collected_version = repo_version[1:] + else: + collected_version = repo_version + updated_purl_str = purl_str + "@" + collected_version + package_url = PackageURL.from_string( + updated_purl_str) + error_msg = map_golang_package( + package_url, updated_json, pipelines, priority, filename=filename + ) + if error_msg: + print(error_msg) + if version: + break + if not found_match: + error_msg = f"The package version not found: {version}" + else: + processed = True + else: + # The repo does not have any tag (i.e. it only has one version) + # Get the main branch name for the download url + main_branch = package_json["mainbranch"]["name"] + download_url = f"https://bitbucket.org/{subset_path}/get/{main_branch}.zip" + updated_json, filename = process_download_metadata( + download_url, package_json) + + error_msg = map_golang_package( + package_url, updated_json, pipelines, priority, filename=filename + ) + if error_msg: + print(error_msg) + else: + processed = True + if not processed: + # Handle case which no version is in the input purl + if '@' not in purl_str: + namespace_name = purl_str.partition("pkg:golang/")[2] + encoded_namespace_name = quote(namespace_name, safe="") + version_list = get_package_versions(encoded_namespace_name) + + for version in version_list: + purl_str_version = purl_str + "@" + version + package_url = PackageURL.from_string(purl_str_version) + package_json = get_package_json( + encoded_namespace_name, version=version) + error_msg = map_golang_package( + package_url, package_json, pipelines, priority) + if error_msg: + print(error_msg) else: + namespace_name = purl_str.partition( + "pkg:golang/")[2].rpartition("@")[0] + encoded_namespace_name = quote(namespace_name, safe="") package_url = PackageURL.from_string(purl_str) - package_json = scrape_go_package(subset_path, version) - error_msg = map_golang_package(package_url, package_json, pipelines, priority) + package_json = get_package_json( + encoded_namespace_name, version=package_url.version) + error_msg = map_golang_package( + package_url, package_json, pipelines, priority) + if error_msg: + print(error_msg) except ValueError as e: error = f"error occurred when parsing {purl_str}: {e}" diff --git a/minecode/miners/golang.py b/minecode/miners/golang.py index 225581e5..abd3767b 100644 --- a/minecode/miners/golang.py +++ b/minecode/miners/golang.py @@ -41,13 +41,15 @@ def get_uris(self, content): # note the addition of a * at the end of the search string... # without this the returned data are sparse - details_url = "https://api.godoc.org/search?q={path}*".format(**locals()) + details_url = "https://api.godoc.org/search?q={path}*".format( + **locals()) host = get_well_known_host(path) # If the path belongs github/bitbucket, yield a repo too if host: # keep github, bitbucket... as type: repo_type, _, _ = host.lower().partition(".") # NOQA - repo_url = "https://{namespace}/{name}".format(**package_url.to_dict()) + repo_url = "https://{namespace}/{name}".format( + **package_url.to_dict()) repo_purl = PackageURL( type=repo_type, namespace=package_url.namespace, @@ -206,7 +208,8 @@ def parse_package_path(path): path = "/".join(segments) - package_url = PackageURL(type="golang", namespace=namespace, name=name, qualifiers=qualifiers) + package_url = PackageURL( + type="golang", namespace=namespace, name=name, qualifiers=qualifiers) return package_url, path @@ -222,7 +225,8 @@ def build_golang_package(package_data, purl): """Return a single Golang package""" package_url = PackageURL.from_string(purl) vcs_url = package_url.qualifiers.get("vcs_repository") - homepage_url = "/".join(["https:/", package_url.namespace, package_url.name]) + homepage_url = "/".join(["https:/", + package_url.namespace, package_url.name]) vcs_tool = "git" if "github.com" in package_url.namespace else None if vcs_tool: vcs_url = form_vcs_url(vcs_tool, vcs_url) @@ -241,19 +245,22 @@ def build_golang_package(package_data, purl): def build_golang_generic_package(package_data, package_url): """Return a single Golang package""" - homepage_url = "/".join(["https:/", package_url.namespace, package_url.name]) - license_text = package_data.get("license_text") + homepage_url = "/".join(["https:/", + package_url.namespace, package_url.name]) + license_text = package_data.get("licenses") extracted_license_statement = [license_text] + download_url = "/".join(["https://proxy.golang.org", package_url.namespace, + package_url.name, "@v"]) + "/v" + package_url.version + ".zip" + common_data = dict( name=package_url.name, namespace=package_url.namespace, type=package_url.type, primary_language="go", - repository_homepage_url=package_data.get("repository_homepage_url"), homepage_url=homepage_url, extracted_license_statement=extracted_license_statement, - download_url=package_data.get("download_url"), + download_url=download_url, ) package = scan_models.PackageData.from_data(common_data) diff --git a/minecode/tests/collectors/test_golang.py b/minecode/tests/collectors/test_golang.py index 068dda53..30224a71 100644 --- a/minecode/tests/collectors/test_golang.py +++ b/minecode/tests/collectors/test_golang.py @@ -20,16 +20,19 @@ class GoLangPriorityQueueTests(JsonBasedTesting, DjangoTestCase): - test_data_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "testfiles") + test_data_dir = os.path.join(os.path.dirname( + os.path.dirname(__file__)), "testfiles") def test_extract_golang_subset_purl(self): - test1 = "pkg:golang/github.com/rickar/cal/v2/aa@2.1.23" - test2 = "pkg:golang/github.com/rickar/cal/v2/aa" + test1 = "pkg:golang/github.com/rickar/cal/v2@2.1.23" + test2 = "pkg:golang/github.com/rickar/cal/v2" expected_path1 = "rickar/cal" expected_version1 = "2.1.23" - result_path1, result_version1 = golang.extract_golang_subset_purl(test1) - result_path2, result_version2 = golang.extract_golang_subset_purl(test2) + result_path1, result_version1 = golang.extract_golang_subset_purl( + test1) + result_path2, result_version2 = golang.extract_golang_subset_purl( + test2) self.assertEqual(expected_path1, result_path1) self.assertEqual(result_version1, expected_version1) @@ -53,7 +56,8 @@ def test_gitlab_updated_purl(self): self.assertEqual(result_version2, "") def test_get_package_json_gitlab(self): - json_contents = golang.get_package_json("xx_network%2Fprimitives", "gitlab") + json_contents = golang.get_package_json( + "xx_network%2Fprimitives", "gitlab") expected_id = 20321795 expected_name = "primitives" @@ -61,7 +65,8 @@ def test_get_package_json_gitlab(self): self.assertEqual(json_contents.get("name"), expected_name) def test_get_package_json_bitbucket(self): - json_contents = golang.get_package_json("lebronto_kerovol/gwerror", "bitbucket") + json_contents = golang.get_package_json( + "lebronto_kerovol/gwerror", "bitbucket") expected_full_name = "lebronto_kerovol/gwerror" expected_name = "gwerror" @@ -77,7 +82,8 @@ def test_map_go_package_gitlab(self): with open(self.get_test_loc("golang/client-go_0.127.0.json")) as file: package_json = json.load(file) - golang.map_golang_package(package_url, package_json, ("test_pipeline")) + golang.map_golang_package( + package_url, package_json, ("test_pipeline")) package_count = packagedb.models.Package.objects.all().count() self.assertEqual(1, package_count) package = packagedb.models.Package.objects.all().first() @@ -89,11 +95,13 @@ def test_map_go_package_gitlab(self): def test_map_go_package_bitbucket(self): package_count = packagedb.models.Package.objects.all().count() self.assertEqual(0, package_count) - package_url = PackageURL.from_string("pkg:golang/bitbucket.org/digi-sense/gg-core@0.3.64") + package_url = PackageURL.from_string( + "pkg:golang/bitbucket.org/digi-sense/gg-core@0.3.64") with open(self.get_test_loc("golang/gg-core_0.3.64.json")) as file: package_json = json.load(file) - golang.map_golang_package(package_url, package_json, ("test_pipeline")) + golang.map_golang_package( + package_url, package_json, ("test_pipeline")) package_count = packagedb.models.Package.objects.all().count() self.assertEqual(1, package_count) package = packagedb.models.Package.objects.all().first() @@ -105,36 +113,23 @@ def test_map_go_package_bitbucket(self): def test_map_go_package_others(self): package_count = packagedb.models.Package.objects.all().count() self.assertEqual(0, package_count) - package_url = PackageURL.from_string("pkg:golang/golang.org/x/oauth2@0.29.0") + package_url = PackageURL.from_string( + "pkg:golang/golang.org/x/oauth2@0.29.0") - package_json = golang.scrape_go_package("golang.org/x/oauth2", "0.29.0") - golang.map_golang_package(package_url, package_json, ("test_pipeline")) - package_count = packagedb.models.Package.objects.all().count() - self.assertEqual(1, package_count) - package = packagedb.models.Package.objects.all().first() - expected_purl_str = "pkg:golang/golang.org/x/oauth2@0.29.0" - expected_download_url = "https://proxy.golang.org/golang.org/x/oauth2/@v/v0.29.0.zip" - self.assertEqual(expected_purl_str, package.purl) - self.assertEqual(expected_download_url, package.download_url) + with open(self.get_test_loc("golang/oauth2_0.29.0.json")) as file: + package_json = json.load(file) + golang.map_golang_package( + package_url, package_json, ("test_pipeline")) + package_count = packagedb.models.Package.objects.all().count() + self.assertEqual(1, package_count) + package = packagedb.models.Package.objects.all().first() + expected_purl_str = "pkg:golang/golang.org/x/oauth2@0.29.0" + expected_download_url = "https://proxy.golang.org/golang.org/x/oauth2/@v/v0.29.0.zip" + self.assertEqual(expected_purl_str, package.purl) + self.assertEqual(expected_download_url, package.download_url) def test_process_download_metadata(self): url = "https://bitbucket.org/digi-sense/gg-core/get/v0.3.64.zip" _package_json, filename = golang.process_download_metadata(url, {}) exprected_filename = "digi-sense-gg-core-9d3dfdc43161.zip" self.assertEqual(exprected_filename, filename) - - def test_scrape_go_package(self): - metadata_dict = golang.scrape_go_package("golang.org/x/oauth2", "0.29.0") - expected_dict = { - "license_text": "BSD-3-Clause", - "repository_homepage_url": "https://cs.opensource.google/go/x/oauth2", - "download_url": "https://proxy.golang.org/golang.org/x/oauth2/@v/v0.29.0.zip", - } - self.assertEqual(expected_dict, metadata_dict) - - def test_scrape_package_versions(self): - versions = golang.scrape_package_versions("golang.org/x/oauth2") - # The version list may expand overtime, as of writing the test, - # there are 29 releases - expected_version_len = 29 - self.assertGreaterEqual(len(versions), expected_version_len) diff --git a/minecode/tests/testfiles/golang/oauth2_0.29.0.json b/minecode/tests/testfiles/golang/oauth2_0.29.0.json new file mode 100644 index 00000000..d06c80a0 --- /dev/null +++ b/minecode/tests/testfiles/golang/oauth2_0.29.0.json @@ -0,0 +1,21 @@ +{ + "versionKey": { + "system": "GO", + "name": "golang.org/x/oauth2", + "version": "v0.29.0" + }, + "publishedAt": "2025-03-19T22:59:26Z", + "isDefault": "FALSE", + "licenses": ["BSD-3-Clause"], + "advisoryKeys": [], + "links": [ + { + "label": "SOURCE_REPO", + "url": "https://go.googlesource.com/oauth2" + } + ], + "slsaProvenances": [], + "attestations": [], + "registries": [], + "relatedProjects": [] +} From 1b5034ecf9f7f4b3670303640bf298b88ac19e6c Mon Sep 17 00:00:00 2001 From: Chin Yeung Li Date: Mon, 28 Jul 2025 16:45:29 +0800 Subject: [PATCH 08/19] Better error handling and remove test code #596 Signed-off-by: Chin Yeung Li --- minecode/collectors/golang.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/minecode/collectors/golang.py b/minecode/collectors/golang.py index efb88cac..8922def5 100644 --- a/minecode/collectors/golang.py +++ b/minecode/collectors/golang.py @@ -93,19 +93,18 @@ def get_package_json(subset_path, type=None, version=None): elif type == "bitbucket": url = f"https://api.bitbucket.org/2.0/repositories/{subset_path}" else: - if version: - if version.startswith('v'): - url = f"https://api.deps.dev/v3/systems/GO/packages/{subset_path}/versions/{version}" - else: - url = f"https://api.deps.dev/v3/systems/GO/packages/{subset_path}/versions/v{version}" + if version.startswith('v'): + url = f"https://api.deps.dev/v3/systems/GO/packages/{subset_path}/versions/{version}" + else: + url = f"https://api.deps.dev/v3/systems/GO/packages/{subset_path}/versions/v{version}" try: response = requests.get(url) response.raise_for_status() - print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@") - print(response.json()) return response.json() except requests.exceptions.HTTPError as err: logger.error(f"HTTP error occurred: {err}") + except AttributeError: + logger.error("The PURL is missing a version.") def map_golang_package(package_url, package_json, pipelines, priority=0, filename=None): From 7adce31147004fa71e3fbe0e34531a612a886d7e Mon Sep 17 00:00:00 2001 From: Chin Yeung Li Date: Mon, 28 Jul 2025 17:00:52 +0800 Subject: [PATCH 09/19] Correct ci_code_style #596 Signed-off-by: Chin Yeung Li --- minecode/collectors/golang.py | 71 ++++++++++++------------ minecode/miners/golang.py | 23 ++++---- minecode/tests/collectors/test_golang.py | 30 ++++------ 3 files changed, 56 insertions(+), 68 deletions(-) diff --git a/minecode/collectors/golang.py b/minecode/collectors/golang.py index 8922def5..34de52b6 100644 --- a/minecode/collectors/golang.py +++ b/minecode/collectors/golang.py @@ -39,7 +39,7 @@ def extract_golang_subset_purl(purl_str): version: 2.1.23 """ # Strip "pkg:golang/" - purl_body = purl_str[len("pkg:golang/"):] + purl_body = purl_str[len("pkg:golang/") :] # Extract namespace, name, and version parts = purl_body.split("/") @@ -93,7 +93,7 @@ def get_package_json(subset_path, type=None, version=None): elif type == "bitbucket": url = f"https://api.bitbucket.org/2.0/repositories/{subset_path}" else: - if version.startswith('v'): + if version.startswith("v"): url = f"https://api.deps.dev/v3/systems/GO/packages/{subset_path}/versions/{version}" else: url = f"https://api.deps.dev/v3/systems/GO/packages/{subset_path}/versions/v{version}" @@ -133,15 +133,13 @@ def map_golang_package(package_url, package_json, pipelines, priority=0, filenam for package in packages: package.extra_data["package_content"] = PackageContentType.SOURCE_ARCHIVE - db_package, _, _, error = merge_or_create_package( - package, visit_level=0, filename=filename) + db_package, _, _, error = merge_or_create_package(package, visit_level=0, filename=filename) if error: break # Submit package for scanning if db_package: - add_package_to_scan_queue( - package=db_package, pipelines=pipelines, priority=priority) + add_package_to_scan_queue(package=db_package, pipelines=pipelines, priority=priority) return error @@ -230,8 +228,7 @@ def process_requests(purl_str, **kwargs): if not package_json: error = f"package not found: {purl_str}" else: - repo_version_author_list = gitlab_get_all_package_version_author( - subset_path) + repo_version_author_list = gitlab_get_all_package_version_author(subset_path) if repo_version_author_list: for repo_version, author, email in repo_version_author_list: # Check the version along with stripping the first @@ -246,23 +243,32 @@ def process_requests(purl_str, **kwargs): if not version: if repo_version.startswith("v"): updated_purl_str = ( - PackageURL.to_string( - package_url) + "@" + repo_version[1:]) + PackageURL.to_string(package_url) + "@" + repo_version[1:] + ) else: updated_purl_str = ( - PackageURL.to_string( - package_url) + "@" + repo_version) - updated_purl = PackageURL.from_string( - updated_purl_str) + PackageURL.to_string(package_url) + "@" + repo_version + ) + updated_purl = PackageURL.from_string(updated_purl_str) error_msg = map_golang_package( - updated_purl, updated_json, pipelines, priority, filename=filename) + updated_purl, + updated_json, + pipelines, + priority, + filename=filename, + ) if error_msg: print(error_msg) else: processed = True else: error_msg = map_golang_package( - package_url, updated_json, pipelines, priority, filename=filename) + package_url, + updated_json, + pipelines, + priority, + filename=filename, + ) if error_msg: print(error_msg) else: @@ -273,8 +279,7 @@ def process_requests(purl_str, **kwargs): download_url = ( f"https://gitlab.com/api/v4/projects/{subset_path}/repository/archive.zip" ) - updated_json, filename = process_download_metadata( - download_url, package_json) + updated_json, filename = process_download_metadata(download_url, package_json) error_msg = map_golang_package( package_url, updated_json, pipelines, priority, filename=filename ) @@ -289,8 +294,7 @@ def process_requests(purl_str, **kwargs): if not package_json: error = f"package not found: {purl_str}" else: - repo_version_author_list = bitbucket_get_all_package_version_author( - subset_path) + repo_version_author_list = bitbucket_get_all_package_version_author(subset_path) package_json["repo_workspace_name"] = subset_path if repo_version_author_list: found_match = False @@ -299,7 +303,9 @@ def process_requests(purl_str, **kwargs): # character 'v' in the repo_version if not version or version in {repo_version, repo_version[1:]}: found_match = True - download_url = f"https://bitbucket.org/{subset_path}/get/{repo_version}.zip" + download_url = ( + f"https://bitbucket.org/{subset_path}/get/{repo_version}.zip" + ) updated_json, filename = process_download_metadata( download_url, package_json ) @@ -310,8 +316,7 @@ def process_requests(purl_str, **kwargs): else: collected_version = repo_version updated_purl_str = purl_str + "@" + collected_version - package_url = PackageURL.from_string( - updated_purl_str) + package_url = PackageURL.from_string(updated_purl_str) error_msg = map_golang_package( package_url, updated_json, pipelines, priority, filename=filename ) @@ -328,8 +333,7 @@ def process_requests(purl_str, **kwargs): # Get the main branch name for the download url main_branch = package_json["mainbranch"]["name"] download_url = f"https://bitbucket.org/{subset_path}/get/{main_branch}.zip" - updated_json, filename = process_download_metadata( - download_url, package_json) + updated_json, filename = process_download_metadata(download_url, package_json) error_msg = map_golang_package( package_url, updated_json, pipelines, priority, filename=filename @@ -340,7 +344,7 @@ def process_requests(purl_str, **kwargs): processed = True if not processed: # Handle case which no version is in the input purl - if '@' not in purl_str: + if "@" not in purl_str: namespace_name = purl_str.partition("pkg:golang/")[2] encoded_namespace_name = quote(namespace_name, safe="") version_list = get_package_versions(encoded_namespace_name) @@ -348,21 +352,16 @@ def process_requests(purl_str, **kwargs): for version in version_list: purl_str_version = purl_str + "@" + version package_url = PackageURL.from_string(purl_str_version) - package_json = get_package_json( - encoded_namespace_name, version=version) - error_msg = map_golang_package( - package_url, package_json, pipelines, priority) + package_json = get_package_json(encoded_namespace_name, version=version) + error_msg = map_golang_package(package_url, package_json, pipelines, priority) if error_msg: print(error_msg) else: - namespace_name = purl_str.partition( - "pkg:golang/")[2].rpartition("@")[0] + namespace_name = purl_str.partition("pkg:golang/")[2].rpartition("@")[0] encoded_namespace_name = quote(namespace_name, safe="") package_url = PackageURL.from_string(purl_str) - package_json = get_package_json( - encoded_namespace_name, version=package_url.version) - error_msg = map_golang_package( - package_url, package_json, pipelines, priority) + package_json = get_package_json(encoded_namespace_name, version=package_url.version) + error_msg = map_golang_package(package_url, package_json, pipelines, priority) if error_msg: print(error_msg) diff --git a/minecode/miners/golang.py b/minecode/miners/golang.py index abd3767b..a882925a 100644 --- a/minecode/miners/golang.py +++ b/minecode/miners/golang.py @@ -41,15 +41,13 @@ def get_uris(self, content): # note the addition of a * at the end of the search string... # without this the returned data are sparse - details_url = "https://api.godoc.org/search?q={path}*".format( - **locals()) + details_url = "https://api.godoc.org/search?q={path}*".format(**locals()) host = get_well_known_host(path) # If the path belongs github/bitbucket, yield a repo too if host: # keep github, bitbucket... as type: repo_type, _, _ = host.lower().partition(".") # NOQA - repo_url = "https://{namespace}/{name}".format( - **package_url.to_dict()) + repo_url = "https://{namespace}/{name}".format(**package_url.to_dict()) repo_purl = PackageURL( type=repo_type, namespace=package_url.namespace, @@ -208,8 +206,7 @@ def parse_package_path(path): path = "/".join(segments) - package_url = PackageURL( - type="golang", namespace=namespace, name=name, qualifiers=qualifiers) + package_url = PackageURL(type="golang", namespace=namespace, name=name, qualifiers=qualifiers) return package_url, path @@ -225,8 +222,7 @@ def build_golang_package(package_data, purl): """Return a single Golang package""" package_url = PackageURL.from_string(purl) vcs_url = package_url.qualifiers.get("vcs_repository") - homepage_url = "/".join(["https:/", - package_url.namespace, package_url.name]) + homepage_url = "/".join(["https:/", package_url.namespace, package_url.name]) vcs_tool = "git" if "github.com" in package_url.namespace else None if vcs_tool: vcs_url = form_vcs_url(vcs_tool, vcs_url) @@ -245,13 +241,16 @@ def build_golang_package(package_data, purl): def build_golang_generic_package(package_data, package_url): """Return a single Golang package""" - homepage_url = "/".join(["https:/", - package_url.namespace, package_url.name]) + homepage_url = "/".join(["https:/", package_url.namespace, package_url.name]) license_text = package_data.get("licenses") extracted_license_statement = [license_text] - download_url = "/".join(["https://proxy.golang.org", package_url.namespace, - package_url.name, "@v"]) + "/v" + package_url.version + ".zip" + download_url = ( + "/".join(["https://proxy.golang.org", package_url.namespace, package_url.name, "@v"]) + + "/v" + + package_url.version + + ".zip" + ) common_data = dict( name=package_url.name, diff --git a/minecode/tests/collectors/test_golang.py b/minecode/tests/collectors/test_golang.py index 30224a71..dea621a8 100644 --- a/minecode/tests/collectors/test_golang.py +++ b/minecode/tests/collectors/test_golang.py @@ -20,8 +20,7 @@ class GoLangPriorityQueueTests(JsonBasedTesting, DjangoTestCase): - test_data_dir = os.path.join(os.path.dirname( - os.path.dirname(__file__)), "testfiles") + test_data_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "testfiles") def test_extract_golang_subset_purl(self): test1 = "pkg:golang/github.com/rickar/cal/v2@2.1.23" @@ -29,10 +28,8 @@ def test_extract_golang_subset_purl(self): expected_path1 = "rickar/cal" expected_version1 = "2.1.23" - result_path1, result_version1 = golang.extract_golang_subset_purl( - test1) - result_path2, result_version2 = golang.extract_golang_subset_purl( - test2) + result_path1, result_version1 = golang.extract_golang_subset_purl(test1) + result_path2, result_version2 = golang.extract_golang_subset_purl(test2) self.assertEqual(expected_path1, result_path1) self.assertEqual(result_version1, expected_version1) @@ -56,8 +53,7 @@ def test_gitlab_updated_purl(self): self.assertEqual(result_version2, "") def test_get_package_json_gitlab(self): - json_contents = golang.get_package_json( - "xx_network%2Fprimitives", "gitlab") + json_contents = golang.get_package_json("xx_network%2Fprimitives", "gitlab") expected_id = 20321795 expected_name = "primitives" @@ -65,8 +61,7 @@ def test_get_package_json_gitlab(self): self.assertEqual(json_contents.get("name"), expected_name) def test_get_package_json_bitbucket(self): - json_contents = golang.get_package_json( - "lebronto_kerovol/gwerror", "bitbucket") + json_contents = golang.get_package_json("lebronto_kerovol/gwerror", "bitbucket") expected_full_name = "lebronto_kerovol/gwerror" expected_name = "gwerror" @@ -82,8 +77,7 @@ def test_map_go_package_gitlab(self): with open(self.get_test_loc("golang/client-go_0.127.0.json")) as file: package_json = json.load(file) - golang.map_golang_package( - package_url, package_json, ("test_pipeline")) + golang.map_golang_package(package_url, package_json, ("test_pipeline")) package_count = packagedb.models.Package.objects.all().count() self.assertEqual(1, package_count) package = packagedb.models.Package.objects.all().first() @@ -95,13 +89,11 @@ def test_map_go_package_gitlab(self): def test_map_go_package_bitbucket(self): package_count = packagedb.models.Package.objects.all().count() self.assertEqual(0, package_count) - package_url = PackageURL.from_string( - "pkg:golang/bitbucket.org/digi-sense/gg-core@0.3.64") + package_url = PackageURL.from_string("pkg:golang/bitbucket.org/digi-sense/gg-core@0.3.64") with open(self.get_test_loc("golang/gg-core_0.3.64.json")) as file: package_json = json.load(file) - golang.map_golang_package( - package_url, package_json, ("test_pipeline")) + golang.map_golang_package(package_url, package_json, ("test_pipeline")) package_count = packagedb.models.Package.objects.all().count() self.assertEqual(1, package_count) package = packagedb.models.Package.objects.all().first() @@ -113,13 +105,11 @@ def test_map_go_package_bitbucket(self): def test_map_go_package_others(self): package_count = packagedb.models.Package.objects.all().count() self.assertEqual(0, package_count) - package_url = PackageURL.from_string( - "pkg:golang/golang.org/x/oauth2@0.29.0") + package_url = PackageURL.from_string("pkg:golang/golang.org/x/oauth2@0.29.0") with open(self.get_test_loc("golang/oauth2_0.29.0.json")) as file: package_json = json.load(file) - golang.map_golang_package( - package_url, package_json, ("test_pipeline")) + golang.map_golang_package(package_url, package_json, ("test_pipeline")) package_count = packagedb.models.Package.objects.all().count() self.assertEqual(1, package_count) package = packagedb.models.Package.objects.all().first() From fae4ff06ccd822b1642e5a1a4f60d13284c9b1cd Mon Sep 17 00:00:00 2001 From: Chin Yeung Li Date: Tue, 29 Jul 2025 15:30:58 +0800 Subject: [PATCH 10/19] Upgrade to use packageurl-python 0.17.2 #596 Signed-off-by: Chin Yeung Li --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 85224639..1351919a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -101,7 +101,7 @@ normality==2.6.1 numpy==2.2.6 openpyxl==3.1.5 packagedcode-msitools==0.101.210706 -packageurl-python==0.17.1 +packageurl-python==0.17.2 packaging==25.0 packvers==21.5 parameter-expansion-patched==0.3.1 From 140de5b1e998bb73e14deeae880c25f2dfd42aca Mon Sep 17 00:00:00 2001 From: Chin Yeung Li Date: Wed, 30 Jul 2025 15:40:08 +0800 Subject: [PATCH 11/19] Use "build_golang_download_url" to build the download url for go packages #596 Signed-off-by: Chin Yeung Li --- minecode/miners/golang.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/minecode/miners/golang.py b/minecode/miners/golang.py index a882925a..6ea252ee 100644 --- a/minecode/miners/golang.py +++ b/minecode/miners/golang.py @@ -11,6 +11,7 @@ from packagedcode import models as scan_models from packageurl import PackageURL +from packageurl.contrib.purl2url import build_golang_download_url from minecode import map_router from minecode import seed @@ -245,12 +246,8 @@ def build_golang_generic_package(package_data, package_url): license_text = package_data.get("licenses") extracted_license_statement = [license_text] - download_url = ( - "/".join(["https://proxy.golang.org", package_url.namespace, package_url.name, "@v"]) - + "/v" - + package_url.version - + ".zip" - ) + purl_str = package_url.to_string() + download_url = build_golang_download_url(purl_str) common_data = dict( name=package_url.name, From 674691d702dc8bdd36776910e3ccf86b0232bf1a Mon Sep 17 00:00:00 2001 From: Jono Yang Date: Wed, 30 Jul 2025 13:00:11 -0700 Subject: [PATCH 12/19] Use branch of scancode.io #596 * This is so we can use the updated packageurl-python library Signed-off-by: Jono Yang --- minecode/tests/collectors/test_github.py | 1 + .../cpan/expected_release_search.json | 14 +- ...pected_release_search_author_MIYAGAWA.json | 126 ++++----- .../testfiles/cran/mapper_ANN2_expected.json | 231 +++++----------- .../testfiles/cran/mapper_abe_expected.json | 248 +++++------------- .../maven/mapper/axis-1.4.pom.package.json | 4 +- .../commons-jaxrs-1.21.pom.package.json | 14 +- .../maven-all-1.0-RELEASE.pom.package.json | 14 +- ...sql-connector-java-5.1.27.pom.package.json | 8 +- .../common-object-1.0.2.pom.package.json | 4 +- .../empty/osgl-http-1.1.2.pom.package.json | 4 +- .../parse/jds-2.17.0718b.pom.package.json | 4 +- .../parsing/parse/jds-3.0.1.pom.package.json | 4 +- .../testfiles/model_utils/after_merge.json | 6 +- .../model_utils/created_package.json | 6 +- .../model_utils/expected_updated_fields.json | 6 +- .../nuget/nuget_mapper_log4net_expected.json | 60 ++--- .../pypi/expected-boolean.py-2.0.dev3.json | 18 +- .../repomd_parser/centos/expected.json | 6 +- .../repomd_parser/cloudera2/expected.json | 12 +- .../sourceforge/mapper_omonoql_expected.json | 8 +- .../mapper_openstunts_expected.json | 8 +- ...csearch-scripting-painless-spi-6.8.15.json | 4 +- requirements.txt | 16 +- setup.cfg | 6 +- 25 files changed, 304 insertions(+), 528 deletions(-) diff --git a/minecode/tests/collectors/test_github.py b/minecode/tests/collectors/test_github.py index eb5d4525..dbb345fc 100644 --- a/minecode/tests/collectors/test_github.py +++ b/minecode/tests/collectors/test_github.py @@ -22,6 +22,7 @@ def test_github_get_all_versions(self): repo_path = "aboutcode-org/purldb" versions = github.github_get_all_versions(repo_path) expected = [ + "v7.0.0", "v6.0.0", "v5.0.1", "v5.0.0", diff --git a/minecode/tests/testfiles/cpan/expected_release_search.json b/minecode/tests/testfiles/cpan/expected_release_search.json index 6570101f..639124a5 100644 --- a/minecode/tests/testfiles/cpan/expected_release_search.json +++ b/minecode/tests/testfiles/cpan/expected_release_search.json @@ -992,17 +992,17 @@ "from_file":null, "start_line":1, "end_line":1, - "matcher":"5-undetected", - "score":100.0, + "matcher":"1-hash", + "score":60.0, "matched_length":2, "match_coverage":100.0, - "rule_relevance":100, - "rule_identifier":"package-manifest-unknown-60d25d9573996bcd1c02ac0df23ea095e03a886c", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/package-manifest-unknown-60d25d9573996bcd1c02ac0df23ea095e03a886c", - "matched_text":"license - unknown" + "rule_relevance":60, + "rule_identifier":"unknown_kernel4.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/unknown_kernel4.RULE", + "matched_text":"license unknown" } ], - "identifier":"unknown-6b0f0e5d-bddc-c231-45de-646115b29dbc" + "identifier":"unknown-6b6b5f84-672b-0a16-0eaa-1c7798736e44" } ], "other_license_expression":null, diff --git a/minecode/tests/testfiles/cpan/expected_release_search_author_MIYAGAWA.json b/minecode/tests/testfiles/cpan/expected_release_search_author_MIYAGAWA.json index 656b80bc..925c3d6d 100644 --- a/minecode/tests/testfiles/cpan/expected_release_search_author_MIYAGAWA.json +++ b/minecode/tests/testfiles/cpan/expected_release_search_author_MIYAGAWA.json @@ -992,17 +992,17 @@ "from_file":null, "start_line":1, "end_line":1, - "matcher":"5-undetected", - "score":100.0, + "matcher":"1-hash", + "score":60.0, "matched_length":2, "match_coverage":100.0, - "rule_relevance":100, - "rule_identifier":"package-manifest-unknown-60d25d9573996bcd1c02ac0df23ea095e03a886c", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/package-manifest-unknown-60d25d9573996bcd1c02ac0df23ea095e03a886c", - "matched_text":"license - unknown" + "rule_relevance":60, + "rule_identifier":"unknown_kernel4.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/unknown_kernel4.RULE", + "matched_text":"license unknown" } ], - "identifier":"unknown-6b0f0e5d-bddc-c231-45de-646115b29dbc" + "identifier":"unknown-6b6b5f84-672b-0a16-0eaa-1c7798736e44" } ], "other_license_expression":null, @@ -2019,17 +2019,17 @@ "from_file":null, "start_line":1, "end_line":1, - "matcher":"5-undetected", - "score":100.0, + "matcher":"1-hash", + "score":60.0, "matched_length":2, "match_coverage":100.0, - "rule_relevance":100, - "rule_identifier":"package-manifest-unknown-60d25d9573996bcd1c02ac0df23ea095e03a886c", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/package-manifest-unknown-60d25d9573996bcd1c02ac0df23ea095e03a886c", - "matched_text":"license - unknown" + "rule_relevance":60, + "rule_identifier":"unknown_kernel4.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/unknown_kernel4.RULE", + "matched_text":"license unknown" } ], - "identifier":"unknown-6b0f0e5d-bddc-c231-45de-646115b29dbc" + "identifier":"unknown-6b6b5f84-672b-0a16-0eaa-1c7798736e44" } ], "other_license_expression":null, @@ -2098,17 +2098,17 @@ "from_file":null, "start_line":1, "end_line":1, - "matcher":"5-undetected", - "score":100.0, + "matcher":"1-hash", + "score":60.0, "matched_length":2, "match_coverage":100.0, - "rule_relevance":100, - "rule_identifier":"package-manifest-unknown-60d25d9573996bcd1c02ac0df23ea095e03a886c", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/package-manifest-unknown-60d25d9573996bcd1c02ac0df23ea095e03a886c", - "matched_text":"license - unknown" + "rule_relevance":60, + "rule_identifier":"unknown_kernel4.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/unknown_kernel4.RULE", + "matched_text":"license unknown" } ], - "identifier":"unknown-6b0f0e5d-bddc-c231-45de-646115b29dbc" + "identifier":"unknown-6b6b5f84-672b-0a16-0eaa-1c7798736e44" } ], "other_license_expression":null, @@ -2177,17 +2177,17 @@ "from_file":null, "start_line":1, "end_line":1, - "matcher":"5-undetected", - "score":100.0, + "matcher":"1-hash", + "score":60.0, "matched_length":2, "match_coverage":100.0, - "rule_relevance":100, - "rule_identifier":"package-manifest-unknown-60d25d9573996bcd1c02ac0df23ea095e03a886c", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/package-manifest-unknown-60d25d9573996bcd1c02ac0df23ea095e03a886c", - "matched_text":"license - unknown" + "rule_relevance":60, + "rule_identifier":"unknown_kernel4.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/unknown_kernel4.RULE", + "matched_text":"license unknown" } ], - "identifier":"unknown-6b0f0e5d-bddc-c231-45de-646115b29dbc" + "identifier":"unknown-6b6b5f84-672b-0a16-0eaa-1c7798736e44" } ], "other_license_expression":null, @@ -2256,17 +2256,17 @@ "from_file":null, "start_line":1, "end_line":1, - "matcher":"5-undetected", - "score":100.0, + "matcher":"1-hash", + "score":60.0, "matched_length":2, "match_coverage":100.0, - "rule_relevance":100, - "rule_identifier":"package-manifest-unknown-60d25d9573996bcd1c02ac0df23ea095e03a886c", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/package-manifest-unknown-60d25d9573996bcd1c02ac0df23ea095e03a886c", - "matched_text":"license - unknown" + "rule_relevance":60, + "rule_identifier":"unknown_kernel4.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/unknown_kernel4.RULE", + "matched_text":"license unknown" } ], - "identifier":"unknown-6b0f0e5d-bddc-c231-45de-646115b29dbc" + "identifier":"unknown-6b6b5f84-672b-0a16-0eaa-1c7798736e44" } ], "other_license_expression":null, @@ -2414,17 +2414,17 @@ "from_file":null, "start_line":1, "end_line":1, - "matcher":"5-undetected", - "score":100.0, + "matcher":"1-hash", + "score":60.0, "matched_length":2, "match_coverage":100.0, - "rule_relevance":100, - "rule_identifier":"package-manifest-unknown-60d25d9573996bcd1c02ac0df23ea095e03a886c", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/package-manifest-unknown-60d25d9573996bcd1c02ac0df23ea095e03a886c", - "matched_text":"license - unknown" + "rule_relevance":60, + "rule_identifier":"unknown_kernel4.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/unknown_kernel4.RULE", + "matched_text":"license unknown" } ], - "identifier":"unknown-6b0f0e5d-bddc-c231-45de-646115b29dbc" + "identifier":"unknown-6b6b5f84-672b-0a16-0eaa-1c7798736e44" } ], "other_license_expression":null, @@ -2493,17 +2493,17 @@ "from_file":null, "start_line":1, "end_line":1, - "matcher":"5-undetected", - "score":100.0, + "matcher":"1-hash", + "score":60.0, "matched_length":2, "match_coverage":100.0, - "rule_relevance":100, - "rule_identifier":"package-manifest-unknown-60d25d9573996bcd1c02ac0df23ea095e03a886c", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/package-manifest-unknown-60d25d9573996bcd1c02ac0df23ea095e03a886c", - "matched_text":"license - unknown" + "rule_relevance":60, + "rule_identifier":"unknown_kernel4.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/unknown_kernel4.RULE", + "matched_text":"license unknown" } ], - "identifier":"unknown-6b0f0e5d-bddc-c231-45de-646115b29dbc" + "identifier":"unknown-6b6b5f84-672b-0a16-0eaa-1c7798736e44" } ], "other_license_expression":null, @@ -2572,17 +2572,17 @@ "from_file":null, "start_line":1, "end_line":1, - "matcher":"5-undetected", - "score":100.0, + "matcher":"1-hash", + "score":60.0, "matched_length":2, "match_coverage":100.0, - "rule_relevance":100, - "rule_identifier":"package-manifest-unknown-60d25d9573996bcd1c02ac0df23ea095e03a886c", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/package-manifest-unknown-60d25d9573996bcd1c02ac0df23ea095e03a886c", - "matched_text":"license - unknown" + "rule_relevance":60, + "rule_identifier":"unknown_kernel4.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/unknown_kernel4.RULE", + "matched_text":"license unknown" } ], - "identifier":"unknown-6b0f0e5d-bddc-c231-45de-646115b29dbc" + "identifier":"unknown-6b6b5f84-672b-0a16-0eaa-1c7798736e44" } ], "other_license_expression":null, @@ -2651,17 +2651,17 @@ "from_file":null, "start_line":1, "end_line":1, - "matcher":"5-undetected", - "score":100.0, + "matcher":"1-hash", + "score":60.0, "matched_length":2, "match_coverage":100.0, - "rule_relevance":100, - "rule_identifier":"package-manifest-unknown-60d25d9573996bcd1c02ac0df23ea095e03a886c", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/package-manifest-unknown-60d25d9573996bcd1c02ac0df23ea095e03a886c", - "matched_text":"license - unknown" + "rule_relevance":60, + "rule_identifier":"unknown_kernel4.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/unknown_kernel4.RULE", + "matched_text":"license unknown" } ], - "identifier":"unknown-6b0f0e5d-bddc-c231-45de-646115b29dbc" + "identifier":"unknown-6b6b5f84-672b-0a16-0eaa-1c7798736e44" } ], "other_license_expression":null, diff --git a/minecode/tests/testfiles/cran/mapper_ANN2_expected.json b/minecode/tests/testfiles/cran/mapper_ANN2_expected.json index aff66902..50c5a338 100644 --- a/minecode/tests/testfiles/cran/mapper_ANN2_expected.json +++ b/minecode/tests/testfiles/cran/mapper_ANN2_expected.json @@ -38,45 +38,30 @@ "vcs_url":null, "copyright":null, "holder":null, - "declared_license_expression":"gpl-1.0-plus AND gpl-3.0", - "declared_license_expression_spdx":"GPL-1.0-or-later AND GPL-3.0-only", + "declared_license_expression":"gpl-3.0", + "declared_license_expression_spdx":"GPL-3.0-only", "license_detections":[ { - "license_expression":"gpl-1.0-plus AND gpl-3.0", - "license_expression_spdx":"GPL-1.0-or-later AND GPL-3.0-only", + "license_expression":"gpl-3.0", + "license_expression_spdx":"GPL-3.0-only", "matches":[ - { - "license_expression":"gpl-1.0-plus", - "license_expression_spdx":"GPL-1.0-or-later", - "from_file":null, - "start_line":1, - "end_line":1, - "matcher":"2-aho", - "score":99.0, - "matched_length":2, - "match_coverage":100.0, - "rule_relevance":99, - "rule_identifier":"gpl-1.0-plus_154.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-1.0-plus_154.RULE", - "matched_text":"../../licenses/GPL-3" - }, { "license_expression":"gpl-3.0", "license_expression_spdx":"GPL-3.0-only", "from_file":null, "start_line":1, "end_line":1, - "matcher":"2-aho", + "matcher":"1-hash", "score":100.0, - "matched_length":2, + "matched_length":3, "match_coverage":100.0, "rule_relevance":100, - "rule_identifier":"gpl-3.0_25.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-3.0_25.RULE", + "rule_identifier":"gpl-3.0_rdesc_1.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-3.0_rdesc_1.RULE", "matched_text":"../../licenses/GPL-3" } ], - "identifier":"gpl_1_0_plus_and_gpl_3_0-05e046e8-bd9c-deec-52b8-168b9ab1c398" + "identifier":"gpl_3_0-64a75311-b031-a62c-778b-bd6cdd689c55" } ], "other_license_expression":null, @@ -139,45 +124,30 @@ "vcs_url":null, "copyright":null, "holder":null, - "declared_license_expression":"gpl-1.0-plus AND gpl-3.0", - "declared_license_expression_spdx":"GPL-1.0-or-later AND GPL-3.0-only", + "declared_license_expression":"gpl-3.0", + "declared_license_expression_spdx":"GPL-3.0-only", "license_detections":[ { - "license_expression":"gpl-1.0-plus AND gpl-3.0", - "license_expression_spdx":"GPL-1.0-or-later AND GPL-3.0-only", + "license_expression":"gpl-3.0", + "license_expression_spdx":"GPL-3.0-only", "matches":[ - { - "license_expression":"gpl-1.0-plus", - "license_expression_spdx":"GPL-1.0-or-later", - "from_file":null, - "start_line":1, - "end_line":1, - "matcher":"2-aho", - "score":99.0, - "matched_length":2, - "match_coverage":100.0, - "rule_relevance":99, - "rule_identifier":"gpl-1.0-plus_154.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-1.0-plus_154.RULE", - "matched_text":"../../licenses/GPL-3" - }, { "license_expression":"gpl-3.0", "license_expression_spdx":"GPL-3.0-only", "from_file":null, "start_line":1, "end_line":1, - "matcher":"2-aho", + "matcher":"1-hash", "score":100.0, - "matched_length":2, + "matched_length":3, "match_coverage":100.0, "rule_relevance":100, - "rule_identifier":"gpl-3.0_25.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-3.0_25.RULE", + "rule_identifier":"gpl-3.0_rdesc_1.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-3.0_rdesc_1.RULE", "matched_text":"../../licenses/GPL-3" } ], - "identifier":"gpl_1_0_plus_and_gpl_3_0-05e046e8-bd9c-deec-52b8-168b9ab1c398" + "identifier":"gpl_3_0-64a75311-b031-a62c-778b-bd6cdd689c55" } ], "other_license_expression":null, @@ -240,45 +210,30 @@ "vcs_url":null, "copyright":null, "holder":null, - "declared_license_expression":"gpl-1.0-plus AND gpl-3.0", - "declared_license_expression_spdx":"GPL-1.0-or-later AND GPL-3.0-only", + "declared_license_expression":"gpl-3.0", + "declared_license_expression_spdx":"GPL-3.0-only", "license_detections":[ { - "license_expression":"gpl-1.0-plus AND gpl-3.0", - "license_expression_spdx":"GPL-1.0-or-later AND GPL-3.0-only", + "license_expression":"gpl-3.0", + "license_expression_spdx":"GPL-3.0-only", "matches":[ - { - "license_expression":"gpl-1.0-plus", - "license_expression_spdx":"GPL-1.0-or-later", - "from_file":null, - "start_line":1, - "end_line":1, - "matcher":"2-aho", - "score":99.0, - "matched_length":2, - "match_coverage":100.0, - "rule_relevance":99, - "rule_identifier":"gpl-1.0-plus_154.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-1.0-plus_154.RULE", - "matched_text":"../../licenses/GPL-3" - }, { "license_expression":"gpl-3.0", "license_expression_spdx":"GPL-3.0-only", "from_file":null, "start_line":1, "end_line":1, - "matcher":"2-aho", + "matcher":"1-hash", "score":100.0, - "matched_length":2, + "matched_length":3, "match_coverage":100.0, "rule_relevance":100, - "rule_identifier":"gpl-3.0_25.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-3.0_25.RULE", + "rule_identifier":"gpl-3.0_rdesc_1.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-3.0_rdesc_1.RULE", "matched_text":"../../licenses/GPL-3" } ], - "identifier":"gpl_1_0_plus_and_gpl_3_0-05e046e8-bd9c-deec-52b8-168b9ab1c398" + "identifier":"gpl_3_0-64a75311-b031-a62c-778b-bd6cdd689c55" } ], "other_license_expression":null, @@ -341,45 +296,30 @@ "vcs_url":null, "copyright":null, "holder":null, - "declared_license_expression":"gpl-1.0-plus AND gpl-3.0", - "declared_license_expression_spdx":"GPL-1.0-or-later AND GPL-3.0-only", + "declared_license_expression":"gpl-3.0", + "declared_license_expression_spdx":"GPL-3.0-only", "license_detections":[ { - "license_expression":"gpl-1.0-plus AND gpl-3.0", - "license_expression_spdx":"GPL-1.0-or-later AND GPL-3.0-only", + "license_expression":"gpl-3.0", + "license_expression_spdx":"GPL-3.0-only", "matches":[ - { - "license_expression":"gpl-1.0-plus", - "license_expression_spdx":"GPL-1.0-or-later", - "from_file":null, - "start_line":1, - "end_line":1, - "matcher":"2-aho", - "score":99.0, - "matched_length":2, - "match_coverage":100.0, - "rule_relevance":99, - "rule_identifier":"gpl-1.0-plus_154.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-1.0-plus_154.RULE", - "matched_text":"../../licenses/GPL-3" - }, { "license_expression":"gpl-3.0", "license_expression_spdx":"GPL-3.0-only", "from_file":null, "start_line":1, "end_line":1, - "matcher":"2-aho", + "matcher":"1-hash", "score":100.0, - "matched_length":2, + "matched_length":3, "match_coverage":100.0, "rule_relevance":100, - "rule_identifier":"gpl-3.0_25.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-3.0_25.RULE", + "rule_identifier":"gpl-3.0_rdesc_1.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-3.0_rdesc_1.RULE", "matched_text":"../../licenses/GPL-3" } ], - "identifier":"gpl_1_0_plus_and_gpl_3_0-05e046e8-bd9c-deec-52b8-168b9ab1c398" + "identifier":"gpl_3_0-64a75311-b031-a62c-778b-bd6cdd689c55" } ], "other_license_expression":null, @@ -442,45 +382,30 @@ "vcs_url":null, "copyright":null, "holder":null, - "declared_license_expression":"gpl-1.0-plus AND gpl-3.0", - "declared_license_expression_spdx":"GPL-1.0-or-later AND GPL-3.0-only", + "declared_license_expression":"gpl-3.0", + "declared_license_expression_spdx":"GPL-3.0-only", "license_detections":[ { - "license_expression":"gpl-1.0-plus AND gpl-3.0", - "license_expression_spdx":"GPL-1.0-or-later AND GPL-3.0-only", + "license_expression":"gpl-3.0", + "license_expression_spdx":"GPL-3.0-only", "matches":[ - { - "license_expression":"gpl-1.0-plus", - "license_expression_spdx":"GPL-1.0-or-later", - "from_file":null, - "start_line":1, - "end_line":1, - "matcher":"2-aho", - "score":99.0, - "matched_length":2, - "match_coverage":100.0, - "rule_relevance":99, - "rule_identifier":"gpl-1.0-plus_154.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-1.0-plus_154.RULE", - "matched_text":"../../licenses/GPL-3" - }, { "license_expression":"gpl-3.0", "license_expression_spdx":"GPL-3.0-only", "from_file":null, "start_line":1, "end_line":1, - "matcher":"2-aho", + "matcher":"1-hash", "score":100.0, - "matched_length":2, + "matched_length":3, "match_coverage":100.0, "rule_relevance":100, - "rule_identifier":"gpl-3.0_25.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-3.0_25.RULE", + "rule_identifier":"gpl-3.0_rdesc_1.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-3.0_rdesc_1.RULE", "matched_text":"../../licenses/GPL-3" } ], - "identifier":"gpl_1_0_plus_and_gpl_3_0-05e046e8-bd9c-deec-52b8-168b9ab1c398" + "identifier":"gpl_3_0-64a75311-b031-a62c-778b-bd6cdd689c55" } ], "other_license_expression":null, @@ -543,45 +468,30 @@ "vcs_url":null, "copyright":null, "holder":null, - "declared_license_expression":"gpl-1.0-plus AND gpl-3.0", - "declared_license_expression_spdx":"GPL-1.0-or-later AND GPL-3.0-only", + "declared_license_expression":"gpl-3.0", + "declared_license_expression_spdx":"GPL-3.0-only", "license_detections":[ { - "license_expression":"gpl-1.0-plus AND gpl-3.0", - "license_expression_spdx":"GPL-1.0-or-later AND GPL-3.0-only", + "license_expression":"gpl-3.0", + "license_expression_spdx":"GPL-3.0-only", "matches":[ - { - "license_expression":"gpl-1.0-plus", - "license_expression_spdx":"GPL-1.0-or-later", - "from_file":null, - "start_line":1, - "end_line":1, - "matcher":"2-aho", - "score":99.0, - "matched_length":2, - "match_coverage":100.0, - "rule_relevance":99, - "rule_identifier":"gpl-1.0-plus_154.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-1.0-plus_154.RULE", - "matched_text":"../../licenses/GPL-3" - }, { "license_expression":"gpl-3.0", "license_expression_spdx":"GPL-3.0-only", "from_file":null, "start_line":1, "end_line":1, - "matcher":"2-aho", + "matcher":"1-hash", "score":100.0, - "matched_length":2, + "matched_length":3, "match_coverage":100.0, "rule_relevance":100, - "rule_identifier":"gpl-3.0_25.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-3.0_25.RULE", + "rule_identifier":"gpl-3.0_rdesc_1.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-3.0_rdesc_1.RULE", "matched_text":"../../licenses/GPL-3" } ], - "identifier":"gpl_1_0_plus_and_gpl_3_0-05e046e8-bd9c-deec-52b8-168b9ab1c398" + "identifier":"gpl_3_0-64a75311-b031-a62c-778b-bd6cdd689c55" } ], "other_license_expression":null, @@ -644,45 +554,30 @@ "vcs_url":null, "copyright":null, "holder":null, - "declared_license_expression":"gpl-1.0-plus AND gpl-3.0", - "declared_license_expression_spdx":"GPL-1.0-or-later AND GPL-3.0-only", + "declared_license_expression":"gpl-3.0", + "declared_license_expression_spdx":"GPL-3.0-only", "license_detections":[ { - "license_expression":"gpl-1.0-plus AND gpl-3.0", - "license_expression_spdx":"GPL-1.0-or-later AND GPL-3.0-only", + "license_expression":"gpl-3.0", + "license_expression_spdx":"GPL-3.0-only", "matches":[ - { - "license_expression":"gpl-1.0-plus", - "license_expression_spdx":"GPL-1.0-or-later", - "from_file":null, - "start_line":1, - "end_line":1, - "matcher":"2-aho", - "score":99.0, - "matched_length":2, - "match_coverage":100.0, - "rule_relevance":99, - "rule_identifier":"gpl-1.0-plus_154.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-1.0-plus_154.RULE", - "matched_text":"../../licenses/GPL-3" - }, { "license_expression":"gpl-3.0", "license_expression_spdx":"GPL-3.0-only", "from_file":null, "start_line":1, "end_line":1, - "matcher":"2-aho", + "matcher":"1-hash", "score":100.0, - "matched_length":2, + "matched_length":3, "match_coverage":100.0, "rule_relevance":100, - "rule_identifier":"gpl-3.0_25.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-3.0_25.RULE", + "rule_identifier":"gpl-3.0_rdesc_1.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-3.0_rdesc_1.RULE", "matched_text":"../../licenses/GPL-3" } ], - "identifier":"gpl_1_0_plus_and_gpl_3_0-05e046e8-bd9c-deec-52b8-168b9ab1c398" + "identifier":"gpl_3_0-64a75311-b031-a62c-778b-bd6cdd689c55" } ], "other_license_expression":null, diff --git a/minecode/tests/testfiles/cran/mapper_abe_expected.json b/minecode/tests/testfiles/cran/mapper_abe_expected.json index 568f1a81..1d7988df 100644 --- a/minecode/tests/testfiles/cran/mapper_abe_expected.json +++ b/minecode/tests/testfiles/cran/mapper_abe_expected.json @@ -38,82 +38,52 @@ "vcs_url":null, "copyright":null, "holder":null, - "declared_license_expression":"(gpl-1.0-plus AND gpl-2.0) AND (gpl-1.0-plus AND gpl-3.0)", - "declared_license_expression_spdx":"(GPL-1.0-or-later AND GPL-2.0-only) AND (GPL-1.0-or-later AND GPL-3.0-only)", + "declared_license_expression":"gpl-2.0 AND gpl-3.0", + "declared_license_expression_spdx":"GPL-2.0-only AND GPL-3.0-only", "license_detections":[ { - "license_expression":"gpl-1.0-plus AND gpl-2.0", - "license_expression_spdx":"GPL-1.0-or-later AND GPL-2.0-only", + "license_expression":"gpl-2.0", + "license_expression_spdx":"GPL-2.0-only", "matches":[ - { - "license_expression":"gpl-1.0-plus", - "license_expression_spdx":"GPL-1.0-or-later", - "from_file":null, - "start_line":1, - "end_line":1, - "matcher":"2-aho", - "score":99.0, - "matched_length":2, - "match_coverage":100.0, - "rule_relevance":99, - "rule_identifier":"gpl-1.0-plus_154.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-1.0-plus_154.RULE", - "matched_text":"../../licenses/GPL-2" - }, { "license_expression":"gpl-2.0", "license_expression_spdx":"GPL-2.0-only", "from_file":null, "start_line":1, "end_line":1, - "matcher":"2-aho", + "matcher":"1-hash", "score":100.0, - "matched_length":2, + "matched_length":3, "match_coverage":100.0, "rule_relevance":100, - "rule_identifier":"gpl-2.0_620.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-2.0_620.RULE", + "rule_identifier":"gpl-2.0_561.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-2.0_561.RULE", "matched_text":"../../licenses/GPL-2" } ], - "identifier":"gpl_1_0_plus_and_gpl_2_0-5a62df7a-cc3f-6037-d483-7a77916d1dce" + "identifier":"gpl_2_0-35656954-a0b9-19ca-0df7-d9898dc6a723" }, { - "license_expression":"gpl-1.0-plus AND gpl-3.0", - "license_expression_spdx":"GPL-1.0-or-later AND GPL-3.0-only", + "license_expression":"gpl-3.0", + "license_expression_spdx":"GPL-3.0-only", "matches":[ - { - "license_expression":"gpl-1.0-plus", - "license_expression_spdx":"GPL-1.0-or-later", - "from_file":null, - "start_line":1, - "end_line":1, - "matcher":"2-aho", - "score":99.0, - "matched_length":2, - "match_coverage":100.0, - "rule_relevance":99, - "rule_identifier":"gpl-1.0-plus_154.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-1.0-plus_154.RULE", - "matched_text":"../../licenses/GPL-3" - }, { "license_expression":"gpl-3.0", "license_expression_spdx":"GPL-3.0-only", "from_file":null, "start_line":1, "end_line":1, - "matcher":"2-aho", + "matcher":"1-hash", "score":100.0, - "matched_length":2, + "matched_length":3, "match_coverage":100.0, "rule_relevance":100, - "rule_identifier":"gpl-3.0_25.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-3.0_25.RULE", + "rule_identifier":"gpl-3.0_rdesc_1.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-3.0_rdesc_1.RULE", "matched_text":"../../licenses/GPL-3" } ], - "identifier":"gpl_1_0_plus_and_gpl_3_0-05e046e8-bd9c-deec-52b8-168b9ab1c398" + "identifier":"gpl_3_0-64a75311-b031-a62c-778b-bd6cdd689c55" } ], "other_license_expression":null, @@ -176,82 +146,52 @@ "vcs_url":null, "copyright":null, "holder":null, - "declared_license_expression":"(gpl-1.0-plus AND gpl-2.0) AND (gpl-1.0-plus AND gpl-3.0)", - "declared_license_expression_spdx":"(GPL-1.0-or-later AND GPL-2.0-only) AND (GPL-1.0-or-later AND GPL-3.0-only)", + "declared_license_expression":"gpl-2.0 AND gpl-3.0", + "declared_license_expression_spdx":"GPL-2.0-only AND GPL-3.0-only", "license_detections":[ { - "license_expression":"gpl-1.0-plus AND gpl-2.0", - "license_expression_spdx":"GPL-1.0-or-later AND GPL-2.0-only", + "license_expression":"gpl-2.0", + "license_expression_spdx":"GPL-2.0-only", "matches":[ - { - "license_expression":"gpl-1.0-plus", - "license_expression_spdx":"GPL-1.0-or-later", - "from_file":null, - "start_line":1, - "end_line":1, - "matcher":"2-aho", - "score":99.0, - "matched_length":2, - "match_coverage":100.0, - "rule_relevance":99, - "rule_identifier":"gpl-1.0-plus_154.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-1.0-plus_154.RULE", - "matched_text":"../../licenses/GPL-2" - }, { "license_expression":"gpl-2.0", "license_expression_spdx":"GPL-2.0-only", "from_file":null, "start_line":1, "end_line":1, - "matcher":"2-aho", + "matcher":"1-hash", "score":100.0, - "matched_length":2, + "matched_length":3, "match_coverage":100.0, "rule_relevance":100, - "rule_identifier":"gpl-2.0_620.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-2.0_620.RULE", + "rule_identifier":"gpl-2.0_561.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-2.0_561.RULE", "matched_text":"../../licenses/GPL-2" } ], - "identifier":"gpl_1_0_plus_and_gpl_2_0-5a62df7a-cc3f-6037-d483-7a77916d1dce" + "identifier":"gpl_2_0-35656954-a0b9-19ca-0df7-d9898dc6a723" }, { - "license_expression":"gpl-1.0-plus AND gpl-3.0", - "license_expression_spdx":"GPL-1.0-or-later AND GPL-3.0-only", + "license_expression":"gpl-3.0", + "license_expression_spdx":"GPL-3.0-only", "matches":[ - { - "license_expression":"gpl-1.0-plus", - "license_expression_spdx":"GPL-1.0-or-later", - "from_file":null, - "start_line":1, - "end_line":1, - "matcher":"2-aho", - "score":99.0, - "matched_length":2, - "match_coverage":100.0, - "rule_relevance":99, - "rule_identifier":"gpl-1.0-plus_154.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-1.0-plus_154.RULE", - "matched_text":"../../licenses/GPL-3" - }, { "license_expression":"gpl-3.0", "license_expression_spdx":"GPL-3.0-only", "from_file":null, "start_line":1, "end_line":1, - "matcher":"2-aho", + "matcher":"1-hash", "score":100.0, - "matched_length":2, + "matched_length":3, "match_coverage":100.0, "rule_relevance":100, - "rule_identifier":"gpl-3.0_25.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-3.0_25.RULE", + "rule_identifier":"gpl-3.0_rdesc_1.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-3.0_rdesc_1.RULE", "matched_text":"../../licenses/GPL-3" } ], - "identifier":"gpl_1_0_plus_and_gpl_3_0-05e046e8-bd9c-deec-52b8-168b9ab1c398" + "identifier":"gpl_3_0-64a75311-b031-a62c-778b-bd6cdd689c55" } ], "other_license_expression":null, @@ -314,82 +254,52 @@ "vcs_url":null, "copyright":null, "holder":null, - "declared_license_expression":"(gpl-1.0-plus AND gpl-2.0) AND (gpl-1.0-plus AND gpl-3.0)", - "declared_license_expression_spdx":"(GPL-1.0-or-later AND GPL-2.0-only) AND (GPL-1.0-or-later AND GPL-3.0-only)", + "declared_license_expression":"gpl-2.0 AND gpl-3.0", + "declared_license_expression_spdx":"GPL-2.0-only AND GPL-3.0-only", "license_detections":[ { - "license_expression":"gpl-1.0-plus AND gpl-2.0", - "license_expression_spdx":"GPL-1.0-or-later AND GPL-2.0-only", + "license_expression":"gpl-2.0", + "license_expression_spdx":"GPL-2.0-only", "matches":[ - { - "license_expression":"gpl-1.0-plus", - "license_expression_spdx":"GPL-1.0-or-later", - "from_file":null, - "start_line":1, - "end_line":1, - "matcher":"2-aho", - "score":99.0, - "matched_length":2, - "match_coverage":100.0, - "rule_relevance":99, - "rule_identifier":"gpl-1.0-plus_154.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-1.0-plus_154.RULE", - "matched_text":"../../licenses/GPL-2" - }, { "license_expression":"gpl-2.0", "license_expression_spdx":"GPL-2.0-only", "from_file":null, "start_line":1, "end_line":1, - "matcher":"2-aho", + "matcher":"1-hash", "score":100.0, - "matched_length":2, + "matched_length":3, "match_coverage":100.0, "rule_relevance":100, - "rule_identifier":"gpl-2.0_620.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-2.0_620.RULE", + "rule_identifier":"gpl-2.0_561.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-2.0_561.RULE", "matched_text":"../../licenses/GPL-2" } ], - "identifier":"gpl_1_0_plus_and_gpl_2_0-5a62df7a-cc3f-6037-d483-7a77916d1dce" + "identifier":"gpl_2_0-35656954-a0b9-19ca-0df7-d9898dc6a723" }, { - "license_expression":"gpl-1.0-plus AND gpl-3.0", - "license_expression_spdx":"GPL-1.0-or-later AND GPL-3.0-only", + "license_expression":"gpl-3.0", + "license_expression_spdx":"GPL-3.0-only", "matches":[ - { - "license_expression":"gpl-1.0-plus", - "license_expression_spdx":"GPL-1.0-or-later", - "from_file":null, - "start_line":1, - "end_line":1, - "matcher":"2-aho", - "score":99.0, - "matched_length":2, - "match_coverage":100.0, - "rule_relevance":99, - "rule_identifier":"gpl-1.0-plus_154.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-1.0-plus_154.RULE", - "matched_text":"../../licenses/GPL-3" - }, { "license_expression":"gpl-3.0", "license_expression_spdx":"GPL-3.0-only", "from_file":null, "start_line":1, "end_line":1, - "matcher":"2-aho", + "matcher":"1-hash", "score":100.0, - "matched_length":2, + "matched_length":3, "match_coverage":100.0, "rule_relevance":100, - "rule_identifier":"gpl-3.0_25.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-3.0_25.RULE", + "rule_identifier":"gpl-3.0_rdesc_1.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-3.0_rdesc_1.RULE", "matched_text":"../../licenses/GPL-3" } ], - "identifier":"gpl_1_0_plus_and_gpl_3_0-05e046e8-bd9c-deec-52b8-168b9ab1c398" + "identifier":"gpl_3_0-64a75311-b031-a62c-778b-bd6cdd689c55" } ], "other_license_expression":null, @@ -452,82 +362,52 @@ "vcs_url":null, "copyright":null, "holder":null, - "declared_license_expression":"(gpl-1.0-plus AND gpl-2.0) AND (gpl-1.0-plus AND gpl-3.0)", - "declared_license_expression_spdx":"(GPL-1.0-or-later AND GPL-2.0-only) AND (GPL-1.0-or-later AND GPL-3.0-only)", + "declared_license_expression":"gpl-2.0 AND gpl-3.0", + "declared_license_expression_spdx":"GPL-2.0-only AND GPL-3.0-only", "license_detections":[ { - "license_expression":"gpl-1.0-plus AND gpl-2.0", - "license_expression_spdx":"GPL-1.0-or-later AND GPL-2.0-only", + "license_expression":"gpl-2.0", + "license_expression_spdx":"GPL-2.0-only", "matches":[ - { - "license_expression":"gpl-1.0-plus", - "license_expression_spdx":"GPL-1.0-or-later", - "from_file":null, - "start_line":1, - "end_line":1, - "matcher":"2-aho", - "score":99.0, - "matched_length":2, - "match_coverage":100.0, - "rule_relevance":99, - "rule_identifier":"gpl-1.0-plus_154.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-1.0-plus_154.RULE", - "matched_text":"../../licenses/GPL-2" - }, { "license_expression":"gpl-2.0", "license_expression_spdx":"GPL-2.0-only", "from_file":null, "start_line":1, "end_line":1, - "matcher":"2-aho", + "matcher":"1-hash", "score":100.0, - "matched_length":2, + "matched_length":3, "match_coverage":100.0, "rule_relevance":100, - "rule_identifier":"gpl-2.0_620.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-2.0_620.RULE", + "rule_identifier":"gpl-2.0_561.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-2.0_561.RULE", "matched_text":"../../licenses/GPL-2" } ], - "identifier":"gpl_1_0_plus_and_gpl_2_0-5a62df7a-cc3f-6037-d483-7a77916d1dce" + "identifier":"gpl_2_0-35656954-a0b9-19ca-0df7-d9898dc6a723" }, { - "license_expression":"gpl-1.0-plus AND gpl-3.0", - "license_expression_spdx":"GPL-1.0-or-later AND GPL-3.0-only", + "license_expression":"gpl-3.0", + "license_expression_spdx":"GPL-3.0-only", "matches":[ - { - "license_expression":"gpl-1.0-plus", - "license_expression_spdx":"GPL-1.0-or-later", - "from_file":null, - "start_line":1, - "end_line":1, - "matcher":"2-aho", - "score":99.0, - "matched_length":2, - "match_coverage":100.0, - "rule_relevance":99, - "rule_identifier":"gpl-1.0-plus_154.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-1.0-plus_154.RULE", - "matched_text":"../../licenses/GPL-3" - }, { "license_expression":"gpl-3.0", "license_expression_spdx":"GPL-3.0-only", "from_file":null, "start_line":1, "end_line":1, - "matcher":"2-aho", + "matcher":"1-hash", "score":100.0, - "matched_length":2, + "matched_length":3, "match_coverage":100.0, "rule_relevance":100, - "rule_identifier":"gpl-3.0_25.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-3.0_25.RULE", + "rule_identifier":"gpl-3.0_rdesc_1.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-3.0_rdesc_1.RULE", "matched_text":"../../licenses/GPL-3" } ], - "identifier":"gpl_1_0_plus_and_gpl_3_0-05e046e8-bd9c-deec-52b8-168b9ab1c398" + "identifier":"gpl_3_0-64a75311-b031-a62c-778b-bd6cdd689c55" } ], "other_license_expression":null, diff --git a/minecode/tests/testfiles/maven/mapper/axis-1.4.pom.package.json b/minecode/tests/testfiles/maven/mapper/axis-1.4.pom.package.json index 41db24ef..1297d15c 100644 --- a/minecode/tests/testfiles/maven/mapper/axis-1.4.pom.package.json +++ b/minecode/tests/testfiles/maven/mapper/axis-1.4.pom.package.json @@ -42,7 +42,7 @@ "rule_relevance":100, "rule_identifier":"apache-2.0_40.RULE", "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_40.RULE", - "matched_text":"- name: The Apache Software License, Version 2.0\n url: http://www.apache.org/licenses/LICENSE-2.0.txt" + "matched_text":"name: The Apache Software License, Version 2.0\nurl: http://www.apache.org/licenses/LICENSE-2.0.txt" } ], "identifier":"apache_2_0-bfa9e97a-62d3-0076-c881-8443e5e95192" @@ -51,7 +51,7 @@ "other_license_expression":null, "other_license_expression_spdx":null, "other_license_detections":[], - "extracted_license_statement":"- name: The Apache Software License, Version 2.0\n url: http://www.apache.org/licenses/LICENSE-2.0.txt\n", + "extracted_license_statement":"- license:\n name: The Apache Software License, Version 2.0\n url: http://www.apache.org/licenses/LICENSE-2.0.txt\n", "notice_text":null, "source_packages":[ "pkg:maven/axis/axis@1.4?classifier=sources" diff --git a/minecode/tests/testfiles/maven/mapper/commons-jaxrs-1.21.pom.package.json b/minecode/tests/testfiles/maven/mapper/commons-jaxrs-1.21.pom.package.json index 84016b63..6f344fe8 100644 --- a/minecode/tests/testfiles/maven/mapper/commons-jaxrs-1.21.pom.package.json +++ b/minecode/tests/testfiles/maven/mapper/commons-jaxrs-1.21.pom.package.json @@ -85,7 +85,7 @@ "rule_relevance":100, "rule_identifier":"apache-2.0_182.RULE", "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_182.RULE", - "matched_text":"- name: The Apache License, Version 2.0" + "matched_text":"name: The Apache License, Version 2.0" }, { "license_expression":"apache-2.0", @@ -95,21 +95,21 @@ "end_line":2, "matcher":"2-aho", "score":100.0, - "matched_length":9, + "matched_length":10, "match_coverage":100.0, "rule_relevance":100, - "rule_identifier":"apache-2.0_42.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_42.RULE", - "matched_text":" url: http://www.apache.org/licenses/LICENSE-2.0.txt" + "rule_identifier":"apache-2.0_1317.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_1317.RULE", + "matched_text":"url: http://www.apache.org/licenses/LICENSE-2.0.txt" } ], - "identifier":"apache_2_0-e8c40f40-3205-ccee-fa1e-76154bd59d16" + "identifier":"apache_2_0-82b98cc4-34fe-2658-e07e-839e12d32ec7" } ], "other_license_expression":null, "other_license_expression_spdx":null, "other_license_detections":[], - "extracted_license_statement":"- name: The Apache License, Version 2.0\n url: http://www.apache.org/licenses/LICENSE-2.0.txt\n", + "extracted_license_statement":"- license:\n name: The Apache License, Version 2.0\n url: http://www.apache.org/licenses/LICENSE-2.0.txt\n", "notice_text":null, "source_packages":[ "pkg:maven/edu.psu.swe.commons/commons-jaxrs@1.21?classifier=sources" diff --git a/minecode/tests/testfiles/maven/mapper/maven-all-1.0-RELEASE.pom.package.json b/minecode/tests/testfiles/maven/mapper/maven-all-1.0-RELEASE.pom.package.json index b23dee60..18619ba8 100644 --- a/minecode/tests/testfiles/maven/mapper/maven-all-1.0-RELEASE.pom.package.json +++ b/minecode/tests/testfiles/maven/mapper/maven-all-1.0-RELEASE.pom.package.json @@ -50,7 +50,7 @@ "rule_relevance":100, "rule_identifier":"apache-2.0_182.RULE", "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_182.RULE", - "matched_text":"- name: The Apache License, Version 2.0" + "matched_text":"name: The Apache License, Version 2.0" }, { "license_expression":"apache-2.0", @@ -60,21 +60,21 @@ "end_line":2, "matcher":"2-aho", "score":100.0, - "matched_length":9, + "matched_length":10, "match_coverage":100.0, "rule_relevance":100, - "rule_identifier":"apache-2.0_42.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_42.RULE", - "matched_text":" url: http://www.apache.org/licenses/LICENSE-2.0.txt" + "rule_identifier":"apache-2.0_1317.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_1317.RULE", + "matched_text":"url: http://www.apache.org/licenses/LICENSE-2.0.txt" } ], - "identifier":"apache_2_0-e8c40f40-3205-ccee-fa1e-76154bd59d16" + "identifier":"apache_2_0-82b98cc4-34fe-2658-e07e-839e12d32ec7" } ], "other_license_expression":null, "other_license_expression_spdx":null, "other_license_detections":[], - "extracted_license_statement":"- name: The Apache License, Version 2.0\n url: http://www.apache.org/licenses/LICENSE-2.0.txt\n", + "extracted_license_statement":"- license:\n name: The Apache License, Version 2.0\n url: http://www.apache.org/licenses/LICENSE-2.0.txt\n", "notice_text":null, "source_packages":[ "pkg:maven/date.yetao.maven/maven-all@1.0-RELEASE?classifier=sources" diff --git a/minecode/tests/testfiles/maven/mapper/mysql-connector-java-5.1.27.pom.package.json b/minecode/tests/testfiles/maven/mapper/mysql-connector-java-5.1.27.pom.package.json index 235e332f..d4480beb 100644 --- a/minecode/tests/testfiles/maven/mapper/mysql-connector-java-5.1.27.pom.package.json +++ b/minecode/tests/testfiles/maven/mapper/mysql-connector-java-5.1.27.pom.package.json @@ -50,7 +50,7 @@ "rule_relevance":100, "rule_identifier":"gpl-2.0_660.RULE", "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-2.0_660.RULE", - "matched_text":"- name: The GNU General Public License, Version 2" + "matched_text":"name: The GNU General Public License, Version 2" }, { "license_expression":"gpl-2.0", @@ -65,7 +65,7 @@ "rule_relevance":100, "rule_identifier":"gpl-2.0_78.RULE", "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-2.0_78.RULE", - "matched_text":" url: http://www.gnu.org/licenses/old-licenses/gpl-2.0.html" + "matched_text":"url: http://www.gnu.org/licenses/old-licenses/gpl-2.0.html" }, { "license_expression":"gpl-2.0 WITH mysql-linking-exception-2018", @@ -80,7 +80,7 @@ "rule_relevance":100, "rule_identifier":"gpl-2.0_with_mysql-linking-exception-2018_4.RULE", "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-2.0_with_mysql-linking-exception-2018_4.RULE", - "matched_text":" MySQL Connector/J contains exceptions to GPL requirements when linking with other components\n that are licensed under OSI-approved open source licenses, see EXCEPTIONS-CONNECTOR-J\n in this distribution for more details." + "matched_text":" MySQL Connector/J contains exceptions to GPL requirements when linking with other components\n that are licensed under OSI-approved open source licenses, see EXCEPTIONS-CONNECTOR-J\n in this distribution for more details." } ], "identifier":"gpl_2_0_and_gpl_2_0_with_mysql_linking_exception_2018-0cd7a411-f08b-91fd-75be-37baafbb81a9" @@ -89,7 +89,7 @@ "other_license_expression":null, "other_license_expression_spdx":null, "other_license_detections":[], - "extracted_license_statement":"- name: The GNU General Public License, Version 2\n url: http://www.gnu.org/licenses/old-licenses/gpl-2.0.html\n comments: |\n MySQL Connector/J contains exceptions to GPL requirements when linking with other components\n that are licensed under OSI-approved open source licenses, see EXCEPTIONS-CONNECTOR-J\n in this distribution for more details.\n", + "extracted_license_statement":"- license:\n name: The GNU General Public License, Version 2\n url: http://www.gnu.org/licenses/old-licenses/gpl-2.0.html\n comments: |\n MySQL Connector/J contains exceptions to GPL requirements when linking with other components\n that are licensed under OSI-approved open source licenses, see EXCEPTIONS-CONNECTOR-J\n in this distribution for more details.\n", "notice_text":null, "source_packages":[ "pkg:maven/mysql/mysql-connector-java@5.1.27?classifier=sources" diff --git a/minecode/tests/testfiles/maven/parsing/empty/common-object-1.0.2.pom.package.json b/minecode/tests/testfiles/maven/parsing/empty/common-object-1.0.2.pom.package.json index 86deebb1..39abec5a 100644 --- a/minecode/tests/testfiles/maven/parsing/empty/common-object-1.0.2.pom.package.json +++ b/minecode/tests/testfiles/maven/parsing/empty/common-object-1.0.2.pom.package.json @@ -50,7 +50,7 @@ "rule_relevance":100, "rule_identifier":"apache-2.0_40.RULE", "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_40.RULE", - "matched_text":"- name: The Apache Software License, Version 2.0\n url: http://www.apache.org/licenses/LICENSE-2.0.txt" + "matched_text":"name: The Apache Software License, Version 2.0\nurl: http://www.apache.org/licenses/LICENSE-2.0.txt" } ], "identifier":"apache_2_0-bfa9e97a-62d3-0076-c881-8443e5e95192" @@ -59,7 +59,7 @@ "other_license_expression":null, "other_license_expression_spdx":null, "other_license_detections":[], - "extracted_license_statement":"- name: The Apache Software License, Version 2.0\n url: http://www.apache.org/licenses/LICENSE-2.0.txt\n", + "extracted_license_statement":"- license:\n name: The Apache Software License, Version 2.0\n url: http://www.apache.org/licenses/LICENSE-2.0.txt\n", "notice_text":null, "source_packages":[ "pkg:maven/org.xson/common-object@1.0.2?classifier=sources" diff --git a/minecode/tests/testfiles/maven/parsing/empty/osgl-http-1.1.2.pom.package.json b/minecode/tests/testfiles/maven/parsing/empty/osgl-http-1.1.2.pom.package.json index 8687cb05..3c9ad3cb 100644 --- a/minecode/tests/testfiles/maven/parsing/empty/osgl-http-1.1.2.pom.package.json +++ b/minecode/tests/testfiles/maven/parsing/empty/osgl-http-1.1.2.pom.package.json @@ -42,7 +42,7 @@ "rule_relevance":100, "rule_identifier":"apache-2.0_40.RULE", "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_40.RULE", - "matched_text":"- name: The Apache Software License, Version 2.0\n url: http://www.apache.org/licenses/LICENSE-2.0.txt" + "matched_text":"name: The Apache Software License, Version 2.0\nurl: http://www.apache.org/licenses/LICENSE-2.0.txt" } ], "identifier":"apache_2_0-bfa9e97a-62d3-0076-c881-8443e5e95192" @@ -51,7 +51,7 @@ "other_license_expression":null, "other_license_expression_spdx":null, "other_license_detections":[], - "extracted_license_statement":"- name: The Apache Software License, Version 2.0\n url: http://www.apache.org/licenses/LICENSE-2.0.txt\n", + "extracted_license_statement":"- license:\n name: The Apache Software License, Version 2.0\n url: http://www.apache.org/licenses/LICENSE-2.0.txt\n", "notice_text":null, "source_packages":[ "pkg:maven/org.osgl/osgl-http@1.1.2?classifier=sources" diff --git a/minecode/tests/testfiles/maven/parsing/parse/jds-2.17.0718b.pom.package.json b/minecode/tests/testfiles/maven/parsing/parse/jds-2.17.0718b.pom.package.json index fb1aa383..8d572afd 100644 --- a/minecode/tests/testfiles/maven/parsing/parse/jds-2.17.0718b.pom.package.json +++ b/minecode/tests/testfiles/maven/parsing/parse/jds-2.17.0718b.pom.package.json @@ -50,7 +50,7 @@ "rule_relevance":100, "rule_identifier":"bsd-new_358.RULE", "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/bsd-new_358.RULE", - "matched_text":"- name: The 3-Clause BSD License\n url: https://opensource.org/licenses/BSD-3-Clause" + "matched_text":"name: The 3-Clause BSD License\nurl: https://opensource.org/licenses/BSD-3-Clause" } ], "identifier":"bsd_new-16562f16-7bf2-63a5-7b03-5327f109350b" @@ -59,7 +59,7 @@ "other_license_expression":null, "other_license_expression_spdx":null, "other_license_detections":[], - "extracted_license_statement":"- name: The 3-Clause BSD License\n url: https://opensource.org/licenses/BSD-3-Clause\n", + "extracted_license_statement":"- license:\n name: The 3-Clause BSD License\n url: https://opensource.org/licenses/BSD-3-Clause\n", "notice_text":null, "source_packages":[ "pkg:maven/io.github.subiyacryolite/jds@2.17.0718b?classifier=sources" diff --git a/minecode/tests/testfiles/maven/parsing/parse/jds-3.0.1.pom.package.json b/minecode/tests/testfiles/maven/parsing/parse/jds-3.0.1.pom.package.json index 83d0c265..6a4cba1b 100644 --- a/minecode/tests/testfiles/maven/parsing/parse/jds-3.0.1.pom.package.json +++ b/minecode/tests/testfiles/maven/parsing/parse/jds-3.0.1.pom.package.json @@ -50,7 +50,7 @@ "rule_relevance":100, "rule_identifier":"bsd-new_358.RULE", "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/bsd-new_358.RULE", - "matched_text":"- name: The 3-Clause BSD License\n url: https://opensource.org/licenses/BSD-3-Clause" + "matched_text":"name: The 3-Clause BSD License\nurl: https://opensource.org/licenses/BSD-3-Clause" } ], "identifier":"bsd_new-16562f16-7bf2-63a5-7b03-5327f109350b" @@ -59,7 +59,7 @@ "other_license_expression":null, "other_license_expression_spdx":null, "other_license_detections":[], - "extracted_license_statement":"- name: The 3-Clause BSD License\n url: https://opensource.org/licenses/BSD-3-Clause\n", + "extracted_license_statement":"- license:\n name: The 3-Clause BSD License\n url: https://opensource.org/licenses/BSD-3-Clause\n", "notice_text":null, "source_packages":[ "pkg:maven/io.github.subiyacryolite/jds@3.0.1?classifier=sources" diff --git a/minecode/tests/testfiles/model_utils/after_merge.json b/minecode/tests/testfiles/model_utils/after_merge.json index 5edd08cc..0cc1e736 100644 --- a/minecode/tests/testfiles/model_utils/after_merge.json +++ b/minecode/tests/testfiles/model_utils/after_merge.json @@ -58,7 +58,7 @@ "rule_relevance":100, "rule_identifier":"apache-2.0_48.RULE", "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_48.RULE", - "matched_text":"- name: Apache License, Version 2.0" + "matched_text":"name: Apache License, Version 2.0" }, { "license_expression":"apache-2.0", @@ -73,7 +73,7 @@ "rule_relevance":100, "rule_identifier":"apache-2.0_216.RULE", "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_216.RULE", - "matched_text":" url: https://www.apache.org/licenses/LICENSE-2.0.txt" + "matched_text":"url: https://www.apache.org/licenses/LICENSE-2.0.txt" } ], "identifier":"apache_2_0-4571b303-7bd2-686b-401c-e805e9f4700e" @@ -82,7 +82,7 @@ "other_license_expression":null, "other_license_expression_spdx":null, "other_license_detections":[], - "extracted_license_statement":"- name: Apache License, Version 2.0\n url: https://www.apache.org/licenses/LICENSE-2.0.txt\n", + "extracted_license_statement":"- license:\n name: Apache License, Version 2.0\n url: https://www.apache.org/licenses/LICENSE-2.0.txt\n", "notice_text":null, "source_packages":[ "pkg:maven/org.apache.pulsar/pulsar@2.5.1?classifier=sources" diff --git a/minecode/tests/testfiles/model_utils/created_package.json b/minecode/tests/testfiles/model_utils/created_package.json index 80acbc1e..ab2037e3 100644 --- a/minecode/tests/testfiles/model_utils/created_package.json +++ b/minecode/tests/testfiles/model_utils/created_package.json @@ -58,7 +58,7 @@ "rule_relevance":100, "rule_identifier":"apache-2.0_48.RULE", "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_48.RULE", - "matched_text":"- name: Apache License, Version 2.0" + "matched_text":"name: Apache License, Version 2.0" }, { "license_expression":"apache-2.0", @@ -73,7 +73,7 @@ "rule_relevance":100, "rule_identifier":"apache-2.0_216.RULE", "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_216.RULE", - "matched_text":" url: https://www.apache.org/licenses/LICENSE-2.0.txt" + "matched_text":"url: https://www.apache.org/licenses/LICENSE-2.0.txt" } ], "identifier":"apache_2_0-4571b303-7bd2-686b-401c-e805e9f4700e" @@ -82,7 +82,7 @@ "other_license_expression":null, "other_license_expression_spdx":null, "other_license_detections":[], - "extracted_license_statement":"- name: Apache License, Version 2.0\n url: https://www.apache.org/licenses/LICENSE-2.0.txt\n", + "extracted_license_statement":"- license:\n name: Apache License, Version 2.0\n url: https://www.apache.org/licenses/LICENSE-2.0.txt\n", "notice_text":null, "source_packages":[ "pkg:maven/org.apache.pulsar/pulsar@2.5.1?classifier=sources" diff --git a/minecode/tests/testfiles/model_utils/expected_updated_fields.json b/minecode/tests/testfiles/model_utils/expected_updated_fields.json index c9201186..233a9ad2 100644 --- a/minecode/tests/testfiles/model_utils/expected_updated_fields.json +++ b/minecode/tests/testfiles/model_utils/expected_updated_fields.json @@ -100,7 +100,7 @@ "rule_relevance":100, "rule_identifier":"apache-2.0_48.RULE", "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_48.RULE", - "matched_text":"- name: Apache License, Version 2.0" + "matched_text":"name: Apache License, Version 2.0" }, { "license_expression":"apache-2.0", @@ -115,7 +115,7 @@ "rule_relevance":100, "rule_identifier":"apache-2.0_216.RULE", "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_216.RULE", - "matched_text":" url: https://www.apache.org/licenses/LICENSE-2.0.txt" + "matched_text":"url: https://www.apache.org/licenses/LICENSE-2.0.txt" } ], "identifier":"apache_2_0-4571b303-7bd2-686b-401c-e805e9f4700e" @@ -125,7 +125,7 @@ { "field":"extracted_license_statement", "old_value":null, - "new_value":"- name: Apache License, Version 2.0\n url: https://www.apache.org/licenses/LICENSE-2.0.txt\n" + "new_value":"- license:\n name: Apache License, Version 2.0\n url: https://www.apache.org/licenses/LICENSE-2.0.txt\n" }, { "field":"source_packages", diff --git a/minecode/tests/testfiles/nuget/nuget_mapper_log4net_expected.json b/minecode/tests/testfiles/nuget/nuget_mapper_log4net_expected.json index 309875f9..ca244588 100644 --- a/minecode/tests/testfiles/nuget/nuget_mapper_log4net_expected.json +++ b/minecode/tests/testfiles/nuget/nuget_mapper_log4net_expected.json @@ -41,12 +41,12 @@ "matched_length":4, "match_coverage":100.0, "rule_relevance":100, - "rule_identifier":"apache-2.0_176.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_176.RULE", + "rule_identifier":"apache-2.0_required_phrase_14.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_required_phrase_14.RULE", "matched_text":"Apache-2.0 License" } ], - "identifier":"apache_2_0-c303006c-0c7c-913e-6e1a-d71a3c906ed1" + "identifier":"apache_2_0-17f6b2e2-96ba-f4b4-c167-193580bce138" } ], "other_license_expression":null, @@ -112,12 +112,12 @@ "matched_length":4, "match_coverage":100.0, "rule_relevance":100, - "rule_identifier":"apache-2.0_176.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_176.RULE", + "rule_identifier":"apache-2.0_required_phrase_14.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_required_phrase_14.RULE", "matched_text":"Apache-2.0 License" } ], - "identifier":"apache_2_0-c303006c-0c7c-913e-6e1a-d71a3c906ed1" + "identifier":"apache_2_0-17f6b2e2-96ba-f4b4-c167-193580bce138" } ], "other_license_expression":null, @@ -183,12 +183,12 @@ "matched_length":4, "match_coverage":100.0, "rule_relevance":100, - "rule_identifier":"apache-2.0_176.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_176.RULE", + "rule_identifier":"apache-2.0_required_phrase_14.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_required_phrase_14.RULE", "matched_text":"Apache-2.0 License" } ], - "identifier":"apache_2_0-c303006c-0c7c-913e-6e1a-d71a3c906ed1" + "identifier":"apache_2_0-17f6b2e2-96ba-f4b4-c167-193580bce138" } ], "other_license_expression":null, @@ -254,12 +254,12 @@ "matched_length":4, "match_coverage":100.0, "rule_relevance":100, - "rule_identifier":"apache-2.0_176.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_176.RULE", + "rule_identifier":"apache-2.0_required_phrase_14.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_required_phrase_14.RULE", "matched_text":"Apache-2.0 License" } ], - "identifier":"apache_2_0-c303006c-0c7c-913e-6e1a-d71a3c906ed1" + "identifier":"apache_2_0-17f6b2e2-96ba-f4b4-c167-193580bce138" } ], "other_license_expression":null, @@ -325,12 +325,12 @@ "matched_length":4, "match_coverage":100.0, "rule_relevance":100, - "rule_identifier":"apache-2.0_176.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_176.RULE", + "rule_identifier":"apache-2.0_required_phrase_14.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_required_phrase_14.RULE", "matched_text":"Apache-2.0 License" } ], - "identifier":"apache_2_0-c303006c-0c7c-913e-6e1a-d71a3c906ed1" + "identifier":"apache_2_0-17f6b2e2-96ba-f4b4-c167-193580bce138" } ], "other_license_expression":null, @@ -396,12 +396,12 @@ "matched_length":4, "match_coverage":100.0, "rule_relevance":100, - "rule_identifier":"apache-2.0_176.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_176.RULE", + "rule_identifier":"apache-2.0_required_phrase_14.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_required_phrase_14.RULE", "matched_text":"Apache-2.0 License" } ], - "identifier":"apache_2_0-c303006c-0c7c-913e-6e1a-d71a3c906ed1" + "identifier":"apache_2_0-17f6b2e2-96ba-f4b4-c167-193580bce138" } ], "other_license_expression":null, @@ -467,12 +467,12 @@ "matched_length":4, "match_coverage":100.0, "rule_relevance":100, - "rule_identifier":"apache-2.0_176.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_176.RULE", + "rule_identifier":"apache-2.0_required_phrase_14.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_required_phrase_14.RULE", "matched_text":"Apache-2.0 License" } ], - "identifier":"apache_2_0-c303006c-0c7c-913e-6e1a-d71a3c906ed1" + "identifier":"apache_2_0-17f6b2e2-96ba-f4b4-c167-193580bce138" } ], "other_license_expression":null, @@ -538,12 +538,12 @@ "matched_length":4, "match_coverage":100.0, "rule_relevance":100, - "rule_identifier":"apache-2.0_176.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_176.RULE", + "rule_identifier":"apache-2.0_required_phrase_14.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_required_phrase_14.RULE", "matched_text":"Apache-2.0 License" } ], - "identifier":"apache_2_0-c303006c-0c7c-913e-6e1a-d71a3c906ed1" + "identifier":"apache_2_0-17f6b2e2-96ba-f4b4-c167-193580bce138" } ], "other_license_expression":null, @@ -609,12 +609,12 @@ "matched_length":4, "match_coverage":100.0, "rule_relevance":100, - "rule_identifier":"apache-2.0_176.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_176.RULE", + "rule_identifier":"apache-2.0_required_phrase_14.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_required_phrase_14.RULE", "matched_text":"Apache-2.0 License" } ], - "identifier":"apache_2_0-c303006c-0c7c-913e-6e1a-d71a3c906ed1" + "identifier":"apache_2_0-17f6b2e2-96ba-f4b4-c167-193580bce138" } ], "other_license_expression":null, @@ -680,12 +680,12 @@ "matched_length":4, "match_coverage":100.0, "rule_relevance":100, - "rule_identifier":"apache-2.0_176.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_176.RULE", + "rule_identifier":"apache-2.0_required_phrase_14.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_required_phrase_14.RULE", "matched_text":"Apache-2.0 License" } ], - "identifier":"apache_2_0-c303006c-0c7c-913e-6e1a-d71a3c906ed1" + "identifier":"apache_2_0-17f6b2e2-96ba-f4b4-c167-193580bce138" } ], "other_license_expression":null, diff --git a/minecode/tests/testfiles/pypi/expected-boolean.py-2.0.dev3.json b/minecode/tests/testfiles/pypi/expected-boolean.py-2.0.dev3.json index 622c19d0..bce14ef8 100644 --- a/minecode/tests/testfiles/pypi/expected-boolean.py-2.0.dev3.json +++ b/minecode/tests/testfiles/pypi/expected-boolean.py-2.0.dev3.json @@ -50,16 +50,16 @@ "start_line":1, "end_line":1, "matcher":"1-hash", - "score":90.0, + "score":100.0, "matched_length":3, "match_coverage":100.0, - "rule_relevance":90, + "rule_relevance":100, "rule_identifier":"bsd-new_708.RULE", "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/bsd-new_708.RULE", "matched_text":"revised BSD license" } ], - "identifier":"bsd_new-9d4149ba-60ed-9e49-34a8-20576feb3295" + "identifier":"bsd_new-98c737b4-e505-9919-75a7-94b39533f450" } ], "other_license_expression":null, @@ -130,16 +130,16 @@ "start_line":1, "end_line":1, "matcher":"1-hash", - "score":90.0, + "score":100.0, "matched_length":3, "match_coverage":100.0, - "rule_relevance":90, + "rule_relevance":100, "rule_identifier":"bsd-new_708.RULE", "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/bsd-new_708.RULE", "matched_text":"revised BSD license" } ], - "identifier":"bsd_new-9d4149ba-60ed-9e49-34a8-20576feb3295" + "identifier":"bsd_new-98c737b4-e505-9919-75a7-94b39533f450" } ], "other_license_expression":null, @@ -210,16 +210,16 @@ "start_line":1, "end_line":1, "matcher":"1-hash", - "score":90.0, + "score":100.0, "matched_length":3, "match_coverage":100.0, - "rule_relevance":90, + "rule_relevance":100, "rule_identifier":"bsd-new_708.RULE", "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/bsd-new_708.RULE", "matched_text":"revised BSD license" } ], - "identifier":"bsd_new-9d4149ba-60ed-9e49-34a8-20576feb3295" + "identifier":"bsd_new-98c737b4-e505-9919-75a7-94b39533f450" } ], "other_license_expression":null, diff --git a/minecode/tests/testfiles/repodata_rpms/repomd_parser/centos/expected.json b/minecode/tests/testfiles/repodata_rpms/repomd_parser/centos/expected.json index 86391dac..2246ec1e 100644 --- a/minecode/tests/testfiles/repodata_rpms/repomd_parser/centos/expected.json +++ b/minecode/tests/testfiles/repodata_rpms/repomd_parser/centos/expected.json @@ -171,16 +171,16 @@ "start_line":1, "end_line":1, "matcher":"2-aho", - "score":99.0, + "score":100.0, "matched_length":2, "match_coverage":100.0, - "rule_relevance":99, + "rule_relevance":100, "rule_identifier":"mit_366.RULE", "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/mit_366.RULE", "matched_text":"MIT/X11/XFree86/many others" } ], - "identifier":"mit-f0a83ed6-696b-c78a-6990-0e10a45015e4" + "identifier":"mit-5b0474ee-95b1-e25a-6b45-d6d8905a7b1c" } ], "other_license_expression":null, diff --git a/minecode/tests/testfiles/repodata_rpms/repomd_parser/cloudera2/expected.json b/minecode/tests/testfiles/repodata_rpms/repomd_parser/cloudera2/expected.json index 4aae3186..8176a0cd 100644 --- a/minecode/tests/testfiles/repodata_rpms/repomd_parser/cloudera2/expected.json +++ b/minecode/tests/testfiles/repodata_rpms/repomd_parser/cloudera2/expected.json @@ -36,17 +36,17 @@ "from_file":null, "start_line":1, "end_line":1, - "matcher":"5-undetected", - "score":100.0, + "matcher":"1-hash", + "score":60.0, "matched_length":2, "match_coverage":100.0, - "rule_relevance":100, - "rule_identifier":"package-manifest-unknown-ddb1bcfb5ee6486356d14ee1a5a069d77a773026", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/package-manifest-unknown-ddb1bcfb5ee6486356d14ee1a5a069d77a773026", + "rule_relevance":60, + "rule_identifier":"unknown_kernel4.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/unknown_kernel4.RULE", "matched_text":"license unknown" } ], - "identifier":"unknown-3d894fcd-3b01-c17b-5c8a-8d492abc4264" + "identifier":"unknown-6b6b5f84-672b-0a16-0eaa-1c7798736e44" } ], "other_license_expression":null, diff --git a/minecode/tests/testfiles/sourceforge/mapper_omonoql_expected.json b/minecode/tests/testfiles/sourceforge/mapper_omonoql_expected.json index afcc3c45..616ae5cd 100644 --- a/minecode/tests/testfiles/sourceforge/mapper_omonoql_expected.json +++ b/minecode/tests/testfiles/sourceforge/mapper_omonoql_expected.json @@ -84,11 +84,11 @@ "end_line":1, "matcher":"2-aho", "score":100.0, - "matched_length":6, + "matched_length":7, "match_coverage":100.0, "rule_relevance":100, - "rule_identifier":"gpl-2.0_39.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-2.0_39.RULE", + "rule_identifier":"gpl-2.0_required_phrase_32.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-2.0_required_phrase_32.RULE", "matched_text":"GNU General Public License version 2.0 (GPLv2)" }, { @@ -107,7 +107,7 @@ "matched_text":"GNU General Public License version 2.0 (GPLv2)" } ], - "identifier":"gpl_2_0-c577222f-9147-72be-dd68-bd319655699a" + "identifier":"gpl_2_0-7eddb2db-e746-4576-b08b-dcfd30f4d17f" } ], "other_license_expression":null, diff --git a/minecode/tests/testfiles/sourceforge/mapper_openstunts_expected.json b/minecode/tests/testfiles/sourceforge/mapper_openstunts_expected.json index a63ec8d1..64c2ec9e 100644 --- a/minecode/tests/testfiles/sourceforge/mapper_openstunts_expected.json +++ b/minecode/tests/testfiles/sourceforge/mapper_openstunts_expected.json @@ -70,11 +70,11 @@ "end_line":1, "matcher":"2-aho", "score":100.0, - "matched_length":6, + "matched_length":7, "match_coverage":100.0, "rule_relevance":100, - "rule_identifier":"gpl-2.0_39.RULE", - "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-2.0_39.RULE", + "rule_identifier":"gpl-2.0_required_phrase_32.RULE", + "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-2.0_required_phrase_32.RULE", "matched_text":"GNU General Public License version 2.0 (GPLv2)" }, { @@ -93,7 +93,7 @@ "matched_text":"GNU General Public License version 2.0 (GPLv2)" } ], - "identifier":"gpl_2_0-c577222f-9147-72be-dd68-bd319655699a" + "identifier":"gpl_2_0-7eddb2db-e746-4576-b08b-dcfd30f4d17f" } ], "other_license_expression":null, diff --git a/packagedb/tests/testfiles/api/elasticsearch-scripting-painless-spi-6.8.15.json b/packagedb/tests/testfiles/api/elasticsearch-scripting-painless-spi-6.8.15.json index 8b2919ba..91080e15 100644 --- a/packagedb/tests/testfiles/api/elasticsearch-scripting-painless-spi-6.8.15.json +++ b/packagedb/tests/testfiles/api/elasticsearch-scripting-painless-spi-6.8.15.json @@ -48,7 +48,7 @@ "rule_url":"https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_40.RULE", "from_file":null, "start_line":1, - "matched_text":"- name: The Apache Software License, Version 2.0\n url: http://www.apache.org/licenses/LICENSE-2.0.txt", + "matched_text":"name: The Apache Software License, Version 2.0\nurl: http://www.apache.org/licenses/LICENSE-2.0.txt", "match_coverage":100.0, "matched_length":18, "rule_relevance":100, @@ -65,7 +65,7 @@ "other_license_expression":null, "other_license_expression_spdx":null, "other_license_detections":[], - "extracted_license_statement":"- name: The Apache Software License, Version 2.0\n url: http://www.apache.org/licenses/LICENSE-2.0.txt\n", + "extracted_license_statement":"- license:\n name: The Apache Software License, Version 2.0\n url: http://www.apache.org/licenses/LICENSE-2.0.txt\n", "notice_text":null, "source_packages":[ "pkg:maven/org.elasticsearch.plugin/elasticsearch-scripting-painless-spi@6.8.15?classifier=sources" diff --git a/requirements.txt b/requirements.txt index 1351919a..f0bccea9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -66,7 +66,7 @@ ftputil==5.1.0 fusepy==3.0.1 gemfileparser2==0.9.4 gitdb==4.0.12 -GitPython==3.1.44 +GitPython==3.1.45 go-inspector==0.5.0 gunicorn==23.0.0 html5lib==1.1 @@ -128,14 +128,14 @@ pyelftools==0.32 PyGithub==2.6.1 pygmars==0.9.0 Pygments==2.19.2 -pyinstrument==5.0.2 +pyinstrument==5.0.3 PyJWT==2.10.1 pymaven-patch==0.3.2 PyNaCl==1.5.0 pyparsing==3.2.3 python-dateutil==2.9.0.post0 python-dotenv==1.1.1 -python-inspector==0.14.0 +python-inspector==0.14.3 pytz==2025.2 PyYAML==6.0.2 rdflib==7.1.4 @@ -147,14 +147,14 @@ requests==2.32.4 resolvelib==1.2.0 rpds-py==0.25.1 rpm-inspector-rpm==4.16.1.3.210404 -rq==2.4.0 +rq==2.4.1 rq-scheduler==0.14.0 rubymarshal==1.0.3 rust-inspector==0.1.0 samecode==0.5.1 saneyaml==0.6.1 -scancode-toolkit==32.3.3 -scancodeio==35.0.0 +scancode-toolkit==32.4.0 +scancodeio==35.1.0 scipy==1.15.3 semantic-version==2.10.0 semver==3.0.4 @@ -163,7 +163,7 @@ six==1.17.0 smmap==5.0.2 sortedcontainers==2.4.0 soupsieve==2.7 -source-inspector==0.6.1 +source-inspector==0.7.0 spdx-tools==0.8.2 sqlparse==0.5.3 symbolic==10.2.1 @@ -188,7 +188,7 @@ typecode_libmagic_system_provided==33.0.0 types-python-dateutil==2.9.0.20250516 typing-inspection==0.4.1 typing_extensions==4.14.0 -univers==30.12.1 +univers==31.0.0 uritemplate==4.2.0 uritools==5.0.0 urllib3==2.5.0 diff --git a/setup.cfg b/setup.cfg index 1e8c763f..500c9f00 100644 --- a/setup.cfg +++ b/setup.cfg @@ -57,12 +57,12 @@ install_requires = reppy2 >= 0.3.6 rq-scheduler >= 0.14.0 rubymarshal == 1.0.3 - scancode-toolkit[packages] >= 32.3.3 + scancode-toolkit[packages] >= 32.4.3 urlpy >= 0.5 matchcode-toolkit >= 7.2.2 purl2vcs >= 2.0.0 - univers >= 30.12.1 - scancodeio >= 35.0.0 + univers >= 31.0.0 + scancodeio @ git+https://github.com/aboutcode-org/scancode.io@a3517d0a1bcda9582db7e1e12493d736e7fa8624 GitPython >= 3.1.44 samecode >= 0.5.1 # FederatedCode integration From dbfac90eb0e395af0f8bdab50049383df5f9e78e Mon Sep 17 00:00:00 2001 From: Chin Yeung Li Date: Thu, 31 Jul 2025 11:50:40 +0800 Subject: [PATCH 13/19] update to use scancode-toolkit 32.4.0 #596 * purldb depends on scancodeio which depends on sctk 32.4.0 (scancodeio 35.1.0 depends on scancode-toolkit==32.4.0) Signed-off-by: Chin Yeung Li --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 500c9f00..52ec075a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -57,7 +57,7 @@ install_requires = reppy2 >= 0.3.6 rq-scheduler >= 0.14.0 rubymarshal == 1.0.3 - scancode-toolkit[packages] >= 32.4.3 + scancode-toolkit[packages] >= 32.4.0 urlpy >= 0.5 matchcode-toolkit >= 7.2.2 purl2vcs >= 2.0.0 From fdd3b51b78ec893829f43c4bb1442128b7fd11b6 Mon Sep 17 00:00:00 2001 From: Chin Yeung Li Date: Fri, 1 Aug 2025 13:44:13 +0800 Subject: [PATCH 14/19] Update packageurl-python to 0.17.3 #596 Signed-off-by: Chin Yeung Li --- requirements.txt | 2 +- setup.cfg | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index f0bccea9..96e177c4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -101,7 +101,7 @@ normality==2.6.1 numpy==2.2.6 openpyxl==3.1.5 packagedcode-msitools==0.101.210706 -packageurl-python==0.17.2 +packageurl-python==0.17.3 packaging==25.0 packvers==21.5 parameter-expansion-patched==0.3.1 diff --git a/setup.cfg b/setup.cfg index 52ec075a..c86f4d26 100644 --- a/setup.cfg +++ b/setup.cfg @@ -51,7 +51,7 @@ install_requires = jawa >= 2.2.0 markdown >= 3.8.2 natsort >= 8.4.0 - packageurl-python >= 0.17.1 + packageurl-python >= 0.17.3 psycopg[binary] >= 3.2.9 PyGithub >= 2.6.1 reppy2 >= 0.3.6 @@ -62,7 +62,7 @@ install_requires = matchcode-toolkit >= 7.2.2 purl2vcs >= 2.0.0 univers >= 31.0.0 - scancodeio @ git+https://github.com/aboutcode-org/scancode.io@a3517d0a1bcda9582db7e1e12493d736e7fa8624 + scancodeio @ git+https://github.com/aboutcode-org/scancode.io@21f00bb62049504f23bf09d22412ff5d56e4f66c GitPython >= 3.1.44 samecode >= 0.5.1 # FederatedCode integration From 1e204a6951954652b2c9183ca61c9280bc7472e6 Mon Sep 17 00:00:00 2001 From: Chin Yeung Li Date: Wed, 3 Sep 2025 14:09:10 +0800 Subject: [PATCH 15/19] Update packages dependencies Signed-off-by: Chin Yeung Li --- requirements.txt | 10 +++++----- setup.cfg | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/requirements.txt b/requirements.txt index 96e177c4..ee0a971a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -42,9 +42,9 @@ django-crispy-forms==2.4 django-environ==0.12.0 django-filter==25.1 django-probes==1.7.0 -django-rq==3.0.1 +django-rq==3.1 django-taggit==6.1.0 -djangorestframework==3.16.0 +djangorestframework==3.16.1 dockerfile-parse==2.0.1 dparse2==0.7.0 drf-spectacular==0.28.0 @@ -139,7 +139,7 @@ python-inspector==0.14.3 pytz==2025.2 PyYAML==6.0.2 rdflib==7.1.4 -redis==6.2.0 +redis==6.4.0 referencing==0.36.2 regipy==5.2.0 reppy2==0.3.6 @@ -147,14 +147,14 @@ requests==2.32.4 resolvelib==1.2.0 rpds-py==0.25.1 rpm-inspector-rpm==4.16.1.3.210404 -rq==2.4.1 +rq==2.5.0 rq-scheduler==0.14.0 rubymarshal==1.0.3 rust-inspector==0.1.0 samecode==0.5.1 saneyaml==0.6.1 scancode-toolkit==32.4.0 -scancodeio==35.1.0 +scancodeio==35.3.0 scipy==1.15.3 semantic-version==2.10.0 semver==3.0.4 diff --git a/setup.cfg b/setup.cfg index c86f4d26..f3fda32b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -62,7 +62,7 @@ install_requires = matchcode-toolkit >= 7.2.2 purl2vcs >= 2.0.0 univers >= 31.0.0 - scancodeio @ git+https://github.com/aboutcode-org/scancode.io@21f00bb62049504f23bf09d22412ff5d56e4f66c + scancodeio >= 35.3.0 GitPython >= 3.1.44 samecode >= 0.5.1 # FederatedCode integration From 711b97cac8a1d1646bd0207336c7c5960e92dfca Mon Sep 17 00:00:00 2001 From: Chin Yeung Li Date: Thu, 4 Sep 2025 11:26:30 +0800 Subject: [PATCH 16/19] Add error handling for failing to fetch in fetchcode #596 Signed-off-by: Chin Yeung Li --- minecode/collectors/generic.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/minecode/collectors/generic.py b/minecode/collectors/generic.py index 6080e516..09cbf395 100644 --- a/minecode/collectors/generic.py +++ b/minecode/collectors/generic.py @@ -109,10 +109,18 @@ def map_fetchcode_supported_package(package_url, pipelines, priority=0, from_go_ from minecode.model_utils import add_package_to_scan_queue from minecode.model_utils import merge_or_create_package - packages = [p for p in info(str(package_url)) or []] + try: + packages = [] + packages = [p for p in info(str(package_url)) or []] + except Exception as e: + print(str(e)) if not packages: - error = f"Could not find package using fetchcode: {package_url}" + if from_go_lang: + purl = "pkg:golang/" + str(package_url).partition("pkg:")[2] + else: + purl = str(package_url) + error = f"Could not find package using fetchcode: {purl}" logger.error(error) return error From 27d08deb9aa7ee7ae1cb1c6cb18965a022d57daf Mon Sep 17 00:00:00 2001 From: Chin Yeung Li Date: Tue, 9 Sep 2025 15:18:00 +0800 Subject: [PATCH 17/19] Update code to use .get instead of using dict['item'] #596 Signed-off-by: Chin Yeung Li --- minecode/collectors/bitbucket.py | 14 +++++++------- minecode/collectors/github.py | 4 +++- minecode/collectors/gitlab.py | 7 ++++--- minecode/miners/bitbucket.py | 25 +++++++++++++++---------- 4 files changed, 29 insertions(+), 21 deletions(-) diff --git a/minecode/collectors/bitbucket.py b/minecode/collectors/bitbucket.py index 81b60126..11c60f1b 100644 --- a/minecode/collectors/bitbucket.py +++ b/minecode/collectors/bitbucket.py @@ -36,13 +36,13 @@ def bitbucket_get_all_package_version_author(subset_path): if data["size"] > 0: # Get all available versions for item in data["values"]: - version = item["name"] - author = "" - if "target" in item and item["target"]: - if "author" in item["target"] and item["target"]["author"]: - if item["target"]["author"]["type"] == "author": - author = item["target"]["author"]["user"]["display_name"] - version_author_list.append((version, author)) + version = item.get("name") + target = item.get("target") or {} + author = target.get("author") or {} + if author.get("type") == "author": + user = author.get("user") or {} + author_display_name = user.get("display_name") + version_author_list.append((version, author_display_name)) # Handle pagination repo_tags = data.get("next", None) return version_author_list diff --git a/minecode/collectors/github.py b/minecode/collectors/github.py index 63ec3b63..3151faf0 100644 --- a/minecode/collectors/github.py +++ b/minecode/collectors/github.py @@ -37,7 +37,9 @@ def github_get_all_versions(subset_path): break for tag in data: - version_list.append(tag["name"]) + version = tag.get("name") or {} + if version: + version_list.append(version) page += 1 # Check if we've reached the last page diff --git a/minecode/collectors/gitlab.py b/minecode/collectors/gitlab.py index a57ca1bd..be32f13c 100644 --- a/minecode/collectors/gitlab.py +++ b/minecode/collectors/gitlab.py @@ -35,9 +35,10 @@ def gitlab_get_all_package_version_author(subset_path): version_author_list = [] # Get all available versions for item in data: - version = item["name"] - author = item["commit"]["author_name"] - author_email = item["commit"]["author_email"] + version = item.get("name") + commit = item.get("commit") or {} + author = commit.get("author_name") or {} + author_email = commit.get("author_email") or {} version_author_list.append((version, author, author_email)) return version_author_list except requests.exceptions.HTTPError as err: diff --git a/minecode/miners/bitbucket.py b/minecode/miners/bitbucket.py index 3c6f47aa..792e41ba 100644 --- a/minecode/miners/bitbucket.py +++ b/minecode/miners/bitbucket.py @@ -327,10 +327,13 @@ def get_bitbucket_license_info(repo_path): ] data = response.json() # Search for license files in the root directory - for item in data["values"]: - if item["path"].upper() in common_license_file_name: + for item in data.get("values"): + path = item.get("path") + if path.upper() in common_license_file_name: # Found a license file - fetch its content - license_url = f"https://api.bitbucket.org/2.0/repositories/{repo_path}/src/HEAD/{item['path']}" + license_url = ( + f"https://api.bitbucket.org/2.0/repositories/{repo_path}/src/HEAD/{path}" + ) license_response = requests.get(license_url) license_response.raise_for_status() return license_response.text @@ -348,14 +351,16 @@ def build_bitbucket_packages(metadata_dict, purl): The metadata_dict is a dictionary. purl: String value of the package url of the ResourceURI object """ - name = metadata_dict["name"] - description = metadata_dict["description"] - homepage_url = metadata_dict["links"]["html"]["href"] - size = metadata_dict["size"] - primary_language = metadata_dict["language"] + name = metadata_dict.get("name") or {} + description = metadata_dict.get("description") or {} + links = metadata_dict.get("links") or {} + html = links.get("html") or {} + homepage_url = html.get["href"] or {} + size = metadata_dict.get("size") or {} + primary_language = metadata_dict.get("language") or {} if "repo_workspace_name" in metadata_dict: - repo_path = metadata_dict["repo_workspace_name"] + repo_path = metadata_dict.get("repo_workspace_name") or {} else: repo_path = "" license_text = get_bitbucket_license_info(repo_path) @@ -372,7 +377,7 @@ def build_bitbucket_packages(metadata_dict, purl): download_data = dict( datasource_id="bitbucket_pkginfo", - download_url=metadata_dict["download_url"], + download_url=metadata_dict.get("download_url") or {}, ) common_data.update(download_data) From e196df30b4ca2fae2887541a3b7b1893b5e21ec6 Mon Sep 17 00:00:00 2001 From: Chin Yeung Li Date: Tue, 9 Sep 2025 15:30:19 +0800 Subject: [PATCH 18/19] Revert code changes for build_bitbucket_packages() Signed-off-by: Chin Yeung Li --- minecode/miners/bitbucket.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/minecode/miners/bitbucket.py b/minecode/miners/bitbucket.py index 792e41ba..19955e82 100644 --- a/minecode/miners/bitbucket.py +++ b/minecode/miners/bitbucket.py @@ -351,16 +351,14 @@ def build_bitbucket_packages(metadata_dict, purl): The metadata_dict is a dictionary. purl: String value of the package url of the ResourceURI object """ - name = metadata_dict.get("name") or {} - description = metadata_dict.get("description") or {} - links = metadata_dict.get("links") or {} - html = links.get("html") or {} - homepage_url = html.get["href"] or {} - size = metadata_dict.get("size") or {} - primary_language = metadata_dict.get("language") or {} + name = metadata_dict["name"] + description = metadata_dict["description"] + homepage_url = metadata_dict["links"]["html"]["href"] + size = metadata_dict["size"] + primary_language = metadata_dict["language"] if "repo_workspace_name" in metadata_dict: - repo_path = metadata_dict.get("repo_workspace_name") or {} + repo_path = metadata_dict["repo_workspace_name"] else: repo_path = "" license_text = get_bitbucket_license_info(repo_path) From 2497a53a771f3745dc94b03d98974752d7d94166 Mon Sep 17 00:00:00 2001 From: Chin Yeung Li Date: Tue, 9 Sep 2025 16:35:54 +0800 Subject: [PATCH 19/19] Properly to use .get instead of using dict['item'] #596 Signed-off-by: Chin Yeung Li --- minecode/collectors/gitlab.py | 4 ++-- minecode/miners/bitbucket.py | 16 +++++++++------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/minecode/collectors/gitlab.py b/minecode/collectors/gitlab.py index be32f13c..3f98b918 100644 --- a/minecode/collectors/gitlab.py +++ b/minecode/collectors/gitlab.py @@ -37,8 +37,8 @@ def gitlab_get_all_package_version_author(subset_path): for item in data: version = item.get("name") commit = item.get("commit") or {} - author = commit.get("author_name") or {} - author_email = commit.get("author_email") or {} + author = commit.get("author_name") or "" + author_email = commit.get("author_email") or "" version_author_list.append((version, author, author_email)) return version_author_list except requests.exceptions.HTTPError as err: diff --git a/minecode/miners/bitbucket.py b/minecode/miners/bitbucket.py index 19955e82..f9afe48b 100644 --- a/minecode/miners/bitbucket.py +++ b/minecode/miners/bitbucket.py @@ -351,14 +351,16 @@ def build_bitbucket_packages(metadata_dict, purl): The metadata_dict is a dictionary. purl: String value of the package url of the ResourceURI object """ - name = metadata_dict["name"] - description = metadata_dict["description"] - homepage_url = metadata_dict["links"]["html"]["href"] - size = metadata_dict["size"] - primary_language = metadata_dict["language"] + name = metadata_dict.get("name") + description = metadata_dict.get("description") or "" + links = metadata_dict.get("links") or {} + html = links.get("html") or {} + homepage_url = html.get("href") or "" + size = metadata_dict.get("size") or "" + primary_language = metadata_dict.get("language") or "" if "repo_workspace_name" in metadata_dict: - repo_path = metadata_dict["repo_workspace_name"] + repo_path = metadata_dict.get("repo_workspace_name") or "" else: repo_path = "" license_text = get_bitbucket_license_info(repo_path) @@ -375,7 +377,7 @@ def build_bitbucket_packages(metadata_dict, purl): download_data = dict( datasource_id="bitbucket_pkginfo", - download_url=metadata_dict.get("download_url") or {}, + download_url=metadata_dict.get("download_url") or "", ) common_data.update(download_data)