Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,41 @@ Fetch some package metadata and get a ``fetchcode.packagedcode_models.Package``
>>> list(package.info('pkg:rubygems/files'))
[Package(type='rubygems', namespace=None, name='files', version=None)]

Fetch a purl and get a ``fetchcode.fetch.Response`` object back::

>>> from fetchcode import fetch
>>> f = fetch('pkg:swift/github.com/Alamofire/[email protected]')
>>> f.location
'/tmp/tmp_cm02xsg'
>>> f.content_type
'application/zip'
>>> f.url
'https://github.com/Alamofire/Alamofire/archive/5.4.3.zip'

Ecosystems supported for fetching a purl from fetchcode:

- alpm
- apk
- bitbucket
- cargo
- composer
- conda
- cpan
- cran
- deb
- gem
- generic
- github
- golang
- hackage
- hex
- luarocks
- maven
- npm
- nuget
- pub
- pypi
- swift

License
--------
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ MarkupSafe==2.0.1
more-itertools==8.13.0
normality==2.3.3
packagedcode-msitools==0.101.210706
packageurl-python==0.9.9
packaging==21.3
packageurl-python==0.17.4
packaging==24.0
parameter-expansion-patched==0.3.1
patch==1.16
pdfminer-six==20220506
Expand Down
57 changes: 53 additions & 4 deletions src/fetchcode/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
from urllib.parse import urlparse

import requests
from packageurl.contrib import purl2url

from fetchcode.utils import _http_exists


class Response:
Expand Down Expand Up @@ -89,24 +92,70 @@ def fetch_ftp(url, location):
return resp


def resolve_purl(purl):
"""
Resolve a Package URL (PURL) to a download URL.

This function attempts to resolve the PURL using first purl2url library and
if that fails, it falls back to fetchcode's download_urls module.
"""
from fetchcode.download_urls import download_url as get_download_url_from_fetchcode

for resolver in (purl2url.get_download_url, get_download_url_from_fetchcode):
url = resolver(purl)
if url and _http_exists(url):
return url


def get_resolved_url(url, scheme):
resoltion_by_scheme = {
"pkg": resolve_url_from_purl,
}
resolution_handler = resoltion_by_scheme.get(scheme)
if not resolution_handler:
raise ValueError(f"Not a supported/known scheme: {scheme}")
url, scheme = resolution_handler(url)
return url, scheme


def resolve_url_from_purl(url):
"""
Resolve a Package URL (PURL) to a valid URL.
Raises ValueError if the PURL cannot be resolved.
"""
url = resolve_purl(url)
if not url:
raise ValueError("Could not resolve PURL to a valid URL.")
scheme = get_url_scheme(url)
return url, scheme


def get_url_scheme(url):
"""
Return the scheme of the given URL.
"""
return urlparse(url).scheme


def fetch(url):
"""
Return a `Response` object built from fetching the content at the `url` URL string and
store content at a temporary file.
"""
scheme = get_url_scheme(url)

if scheme in ["pkg"]:
url, scheme = get_resolved_url(url, scheme)

temp = tempfile.NamedTemporaryFile(delete=False)
location = temp.name

url_parts = urlparse(url)
scheme = url_parts.scheme

fetchers = {"ftp": fetch_ftp, "http": fetch_http, "https": fetch_http}

if scheme in fetchers:
return fetchers.get(scheme)(url, location)

raise Exception("Not a supported/known scheme.")
raise Exception(f"Not a supported/known scheme: {scheme}.")


def fetch_json_response(url):
Expand Down
1 change: 0 additions & 1 deletion src/fetchcode/composer.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ class Composer:

@classmethod
def get_download_url(cls, purl):

"""
Return the download URL for a Composer PURL.
"""
Expand Down
122 changes: 122 additions & 0 deletions tests/test_fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import pytest

from fetchcode import fetch
from fetchcode import resolve_purl
from fetchcode import resolve_url_from_purl


@mock.patch("fetchcode.requests.get")
Expand Down Expand Up @@ -63,3 +65,123 @@ def test_fetch_with_scheme_not_present():
url = "abc://speedtest/1KB.zip"
response = fetch(url=url)
assert "Not a supported/known scheme." == e_info


@mock.patch("fetchcode._http_exists")
@mock.patch("fetchcode.fetch_http")
@mock.patch("fetchcode.pypi.fetch_json_response")
def test_fetch_purl_with_fetchcode(mock_fetch_json_response, mock_fetch_http, mock_http_exists):
mock_fetch_http.return_value = "mocked_purl_response"
mock_http_exists.return_value = True
mock_fetch_json_response.return_value = {
"urls": [{"url": "https://example.com/sample-1.0.0.zip"}]
}

response = fetch("pkg:pypi/[email protected]")

assert response == "mocked_purl_response"
mock_http_exists.assert_called_once()
mock_fetch_http.assert_called_once()


@mock.patch("fetchcode._http_exists")
@mock.patch("fetchcode.fetch_http")
def test_fetch_purl_with_purl2url(mock_fetch_http, mock_http_exists):
mock_fetch_http.return_value = "mocked_purl_response"
mock_http_exists.return_value = True

response = fetch("pkg:alpm/[email protected]")

assert response == "mocked_purl_response"
mock_http_exists.assert_called_once()
mock_fetch_http.assert_called_once()


@mock.patch("fetchcode.pypi.fetch_json_response")
def test_fetch_invalid_purl(mock_fetch_json_response):
mock_fetch_json_response.return_value = {}

with pytest.raises(Exception, match="No download URL found for invalid-package version 1.0.0"):
fetch("pkg:pypi/[email protected]")


@mock.patch("fetchcode.pypi.fetch_json_response")
def test_fetch_invalid_purl(mock_fetch_json_response):
mock_fetch_json_response.return_value = {}

with pytest.raises(Exception, match="No download URL found for invalid-package version 1.0.0"):
fetch("pkg:pypi/[email protected]")


def test_fetch_unsupported_scheme():
with pytest.raises(Exception, match="Not a supported/known scheme"):
fetch("s3://bucket/object")


def test_resolve_url_from_purl_invalid():
with pytest.raises(ValueError, match="Could not resolve PURL to a valid URL."):
fetch("pkg:invalid/[email protected]")


@mock.patch("fetchcode._http_exists")
def test_resolve_url_from_purl_using_purl2url(mock_http_exists):
mock_http_exists.return_value = True

url, _ = resolve_url_from_purl("pkg:swift/github.com/Alamofire/[email protected]")
assert url == "https://github.com/Alamofire/Alamofire/archive/5.4.3.zip"
mock_http_exists.assert_called_once_with(
"https://github.com/Alamofire/Alamofire/archive/5.4.3.zip"
)


@mock.patch("fetchcode._http_exists")
@mock.patch("fetchcode.pypi.fetch_json_response")
def test_resolve_url_from_purl_using_fetchcode(mock_fetch_json_response, mock_http_exists):
mock_http_exists.return_value = True
mock_fetch_json_response.return_value = {
"urls": [{"url": "https://example.com/sample-1.0.0.zip"}]
}

url, _ = resolve_url_from_purl("pkg:pypi/[email protected]")
assert url == "https://example.com/sample-1.0.0.zip"
mock_http_exists.assert_called_once_with("https://example.com/sample-1.0.0.zip")


def test_resolve_purl_invalid():
assert resolve_purl("pkg:invalid/[email protected]") is None


def test_resolve_purl_using_purl2url():
url = resolve_purl("pkg:pub/[email protected]")
assert url == "https://pub.dev/api/archives/http-0.13.3.tar.gz"


@mock.patch("fetchcode._http_exists")
def test_resolve_purl_using_purl2url_url_does_not_exists(mock_http_exists):
mock_http_exists.return_value = False
url = resolve_purl("pkg:pub/[email protected]")
assert url is None


@mock.patch("fetchcode._http_exists")
@mock.patch("fetchcode.pypi.fetch_json_response")
def test_resolve_purl_using_fetchcode(mock_fetch_json_response, mock_http_exists):
mock_fetch_json_response.return_value = {
"urls": [{"url": "https://example.com/sample-1.0.0.zip"}]
}
mock_http_exists.return_value = True
url = resolve_purl("pkg:pypi/[email protected]")
assert url == "https://example.com/sample-1.0.0.zip"


@mock.patch("fetchcode._http_exists")
@mock.patch("fetchcode.pypi.fetch_json_response")
def test_resolve_purl_using_fetchcode_url_does_not_exists(
mock_fetch_json_response, mock_http_exists
):
mock_fetch_json_response.return_value = {
"urls": [{"url": "https://example.com/sample-1.0.0.zip"}]
}
mock_http_exists.return_value = False
url = resolve_purl("pkg:pypi/[email protected]")
assert url is None