Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
0882289
#596 - Added code for golang collector and updated the miner code for…
chinyeungli Apr 9, 2025
1c1fbd7
#596 - Working in progress to handle bitbucket.org (It contains a lot…
chinyeungli Apr 11, 2025
b0f1a22
#596 - Added the following data collection for golang
chinyeungli Apr 14, 2025
82cb1fb
Correct typo
chinyeungli Apr 15, 2025
18f9413
#596 - Add on-demand package data collection for golang
chinyeungli Apr 15, 2025
905195a
Update minecode/collectors/golang.py
chinyeungli Apr 16, 2025
16e9fac
Merge branch 'main' into 596_add_on-demand_package_data_collection_fo…
chinyeungli Apr 16, 2025
cb18cb3
change from web scrapping to fetch from deps.dev #596
chinyeungli Jul 28, 2025
1b5034e
Better error handling and remove test code #596
chinyeungli Jul 28, 2025
7adce31
Correct ci_code_style #596
chinyeungli Jul 28, 2025
e3d973a
Merge branch 'main' into 596_add_on-demand_package_data_collection_fo…
chinyeungli Jul 28, 2025
fae4ff0
Upgrade to use packageurl-python 0.17.2 #596
chinyeungli Jul 29, 2025
140de5b
Use "build_golang_download_url" to build the download url for go pack…
chinyeungli Jul 30, 2025
674691d
Use branch of scancode.io #596
JonoYang Jul 30, 2025
dbfac90
update to use scancode-toolkit 32.4.0 #596
chinyeungli Jul 31, 2025
fdd3b51
Update packageurl-python to 0.17.3 #596
chinyeungli Aug 1, 2025
1e204a6
Update packages dependencies
chinyeungli Sep 3, 2025
4a8b4cf
Merge branch 'main' into 596_add_on-demand_package_data_collection_fo…
chinyeungli Sep 3, 2025
711b97c
Add error handling for failing to fetch in fetchcode #596
chinyeungli Sep 4, 2025
27d08de
Update code to use .get instead of using dict['item'] #596
chinyeungli Sep 9, 2025
e196df3
Revert code changes for build_bitbucket_packages()
chinyeungli Sep 9, 2025
2497a53
Properly to use .get instead of using dict['item'] #596
chinyeungli Sep 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions minecode/collectors/bitbucket.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# purldb is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/purldb for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import logging

import requests


"""
Collect bitbucket packages from bitbucket registries.
"""

logger = logging.getLogger(__name__)
handler = logging.StreamHandler()
logger.addHandler(handler)
logger.setLevel(logging.INFO)


def bitbucket_get_all_package_version_author(subset_path):
"""
Return a list of all version numbers along with author for the package.
"""
repo_tags = f"https://api.bitbucket.org/2.0/repositories/{subset_path}/refs/tags"
version_author_list = []
try:
while repo_tags:
response = requests.get(repo_tags)
response.raise_for_status()
data = response.json()
if data["size"] > 0:
# Get all available versions
for item in data["values"]:
version = item.get("name")
target = item.get("target") or {}
author = target.get("author") or {}
if author.get("type") == "author":
user = author.get("user") or {}
author_display_name = user.get("display_name")
version_author_list.append((version, author_display_name))
# Handle pagination
repo_tags = data.get("next", None)
return version_author_list
except requests.exceptions.HTTPError as err:
logger.error(f"HTTP error occurred: {err}")
17 changes: 14 additions & 3 deletions minecode/collectors/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def packagedata_from_dict(package_data):
return PackageData.from_data(cleaned_package_data)


def map_fetchcode_supported_package(package_url, pipelines, priority=0):
def map_fetchcode_supported_package(package_url, pipelines, priority=0, from_go_lang=False):
"""
Add a `package_url` supported by fetchcode to the PackageDB.

Expand All @@ -109,13 +109,24 @@ def map_fetchcode_supported_package(package_url, pipelines, priority=0):
from minecode.model_utils import add_package_to_scan_queue
from minecode.model_utils import merge_or_create_package

packages = [p for p in info(str(package_url)) or []]
try:
packages = []
packages = [p for p in info(str(package_url)) or []]
except Exception as e:
print(str(e))

if not packages:
error = f"Could not find package using fetchcode: {package_url}"
if from_go_lang:
purl = "pkg:golang/" + str(package_url).partition("pkg:")[2]
else:
purl = str(package_url)
error = f"Could not find package using fetchcode: {purl}"
logger.error(error)
return error

if from_go_lang:
packages[0].type = "golang"
packages[0].namespace = "github.com/" + packages[0].namespace
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@chinyeungli could there be golang packages not from github?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. Only golang packages from github use this map_fetchcode_supported_package function.
Others will use map_golang_package()

package_data = packages[0].to_dict()

# Remove obsolete Package fields see https://github.com/aboutcode-org/fetchcode/issues/108
Expand Down
36 changes: 36 additions & 0 deletions minecode/collectors/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,42 @@
from minecode.collectors.generic import map_fetchcode_supported_package


def github_get_all_versions(subset_path):
"""
Fetch all versions (tags) from a GitHub repository using the API
Returns a list of all version tags in the repository
"""
import requests

url = f"https://api.github.com/repos/{subset_path}/tags"
version_list = []
page = 1

while True:
response = requests.get(
url,
params={"page": page, "per_page": 100}, # Max 100 per page
headers={"Accept": "application/vnd.github.v3+json"},
)
response.raise_for_status()

data = response.json()
if not data:
break

for tag in data:
version = tag.get("name") or {}
if version:
version_list.append(version)
page += 1

# Check if we've reached the last page
if "next" not in response.links:
break

return version_list


# Indexing GitHub PURLs requires a GitHub API token.
# Please add your GitHub API key to the `.env` file, for example: `GH_TOKEN=your-github-api`.
@priority_router.route("pkg:github/.*")
Expand Down
45 changes: 45 additions & 0 deletions minecode/collectors/gitlab.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# purldb is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/purldb for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import logging

import requests


"""
Collect gitlab packages from gitlab registries.
"""

logger = logging.getLogger(__name__)
handler = logging.StreamHandler()
logger.addHandler(handler)
logger.setLevel(logging.INFO)


def gitlab_get_all_package_version_author(subset_path):
"""
Return a list of all version numbers along with author and author email
for the package.
"""
repo_tags = f"https://gitlab.com/api/v4/projects/{subset_path}/repository/tags"
try:
response = requests.get(repo_tags)
response.raise_for_status()
data = response.json()
version_author_list = []
# Get all available versions
for item in data:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO .get will be a more better option

version = item.get("name")
commit = item.get("commit") or {}
author = commit.get("author_name") or ""
author_email = commit.get("author_email") or ""
version_author_list.append((version, author, author_email))
return version_author_list
except requests.exceptions.HTTPError as err:
logger.error(f"HTTP error occurred: {err}")
Loading
Loading