From 430f14090a2a2d807d1031863b4335e8a0f37aa0 Mon Sep 17 00:00:00 2001 From: ziad hany Date: Wed, 1 Oct 2025 16:15:12 +0300 Subject: [PATCH 1/2] Add support to mine CRAN Package-URLs Update mine Cran to use grouper function Move extract_cran_packages to miners/cran.py Signed-off-by: ziad hany --- minecode_pipelines/miners/cran.py | 66 +++ minecode_pipelines/pipelines/mine_cran.py | 86 ++++ minecode_pipelines/pipes/cran.py | 73 +++ minecode_pipelines/tests/pipes/test_cran.py | 41 ++ .../tests/test_data/cran/cran_db.json | 461 ++++++++++++++++++ .../test_data/cran/expected_abbreviate.yaml | 1 + .../test_data/cran/expected_abc.data.yaml | 13 + .../tests/test_data/cran/expected_abc.yaml | 2 + pyproject-minecode_pipelines.toml | 1 + 9 files changed, 744 insertions(+) create mode 100644 minecode_pipelines/miners/cran.py create mode 100644 minecode_pipelines/pipelines/mine_cran.py create mode 100644 minecode_pipelines/pipes/cran.py create mode 100644 minecode_pipelines/tests/pipes/test_cran.py create mode 100644 minecode_pipelines/tests/test_data/cran/cran_db.json create mode 100644 minecode_pipelines/tests/test_data/cran/expected_abbreviate.yaml create mode 100644 minecode_pipelines/tests/test_data/cran/expected_abc.data.yaml create mode 100644 minecode_pipelines/tests/test_data/cran/expected_abc.yaml diff --git a/minecode_pipelines/miners/cran.py b/minecode_pipelines/miners/cran.py new file mode 100644 index 00000000..0cfe9008 --- /dev/null +++ b/minecode_pipelines/miners/cran.py @@ -0,0 +1,66 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# +import json +from pathlib import Path +import requests +from packageurl import PackageURL + + +def fetch_cran_db(output_file="cran_db.json") -> Path: + """ + Download the CRAN package database (~250MB JSON) in a memory-efficient way. + Saves it to a file instead of loading everything into memory. + """ + + url = "https://crandb.r-pkg.org/-/all" + output_path = Path(output_file) + + with requests.get(url, stream=True) as response: + response.raise_for_status() + with output_path.open("wb") as f: + for chunk in response.iter_content(chunk_size=8192): + f.write(chunk) + + return output_path + + +def extract_cran_packages(json_file_path: str) -> list: + """ + Extract package names and their versions from a CRAN DB JSON file. + ex: + { + "AATtools": { + "_id": "AATtools", + "_rev": "8-9ebb721d05b946f2b437b49e892c9e8c", + "name": "AATtools", + "versions": { + "0.0.1": {...}, + "0.0.2": {...}, + "0.0.3": {...} + } + } + """ + db_path = Path(json_file_path) + if not db_path.exists(): + raise FileNotFoundError(f"File not found: {db_path}") + + with open(db_path, encoding="utf-8") as f: + data = json.load(f) + + for pkg_name, pkg_data in data.items(): + versions = list(pkg_data.get("versions", {}).keys()) + purls = [] + for version in versions: + purl = PackageURL( + type="cran", + name=pkg_name, + version=version, + ) + purls.append(purl.to_string()) + yield purls \ No newline at end of file diff --git a/minecode_pipelines/pipelines/mine_cran.py b/minecode_pipelines/pipelines/mine_cran.py new file mode 100644 index 00000000..c354395b --- /dev/null +++ b/minecode_pipelines/pipelines/mine_cran.py @@ -0,0 +1,86 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# http://nexb.com and https://github.com/aboutcode-org/scancode.io +# The ScanCode.io software is licensed under the Apache License version 2.0. +# Data generated with ScanCode.io is provided as-is without warranties. +# ScanCode is a trademark of nexB Inc. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# +# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. No content created from +# ScanCode.io should be considered or used as legal advice. Consult an Attorney +# for any legal advice. +# +# ScanCode.io is a free software code scanning tool from nexB Inc. and others. +# Visit https://github.com/aboutcode-org/scancode.io for support and download. + +import os +from scanpipe.pipelines import Pipeline +from scanpipe.pipes import federatedcode + +from minecode_pipelines import pipes +from minecode_pipelines.miners.cran import fetch_cran_db +from minecode_pipelines.pipes import cran + + +MINECODE_DATA_CRAN_REPO = os.environ.get( + "MINECODE_DATA_CRAN_REPO", "https://github.com/aboutcode-data/minecode-data-cran-test" +) + + +class MineCran(Pipeline): + """ + Mine all packageURLs from a CRAN R index and publish them to a FederatedCode repo. + """ + + @classmethod + def steps(cls): + return ( + cls.check_federatedcode_eligibility, + cls.setup_federatedcode_cran, + cls.mine_and_publish_cran_packageurls, + cls.cleanup_db_and_repo, + ) + + def check_federatedcode_eligibility(self): + """ + Check if the project fulfills the following criteria for + pushing the project result to FederatedCode. + """ + federatedcode.check_federatedcode_configured_and_available(logger=self.log) + + def setup_federatedcode_cran(self): + """ + Clone the FederatedCode CRAN repository and download the CRAN DB JSON file. + """ + self.cloned_data_repo = federatedcode.clone_repository(MINECODE_DATA_CRAN_REPO) + self.db_path = fetch_cran_db() + + if self.log: + self.log( + f"{MINECODE_DATA_CRAN_REPO} repo cloned at: {self.cloned_data_repo.working_dir}" + ) + + def mine_and_publish_cran_packageurls(self): + """Get cran packageURLs for all mined cran package names.""" + cran.mine_and_publish_cran_packageurls( + cloned_data_repo=self.cloned_data_repo, db_path=self.db_path, logger=self.log + ) + + def cleanup_db_and_repo(self): + self.log(f"Cleaning database file at: {self.db_path}") + os.remove(self.db_path) + + self.log( + f"Deleting cloned repo {MINECODE_DATA_CRAN_REPO} from: {self.cloned_data_repo.working_dir}" + ) + pipes.delete_cloned_repos( + repos=[self.cloned_data_repo], + logger=self.log, + ) \ No newline at end of file diff --git a/minecode_pipelines/pipes/cran.py b/minecode_pipelines/pipes/cran.py new file mode 100644 index 00000000..4b266bee --- /dev/null +++ b/minecode_pipelines/pipes/cran.py @@ -0,0 +1,73 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# http://nexb.com and https://github.com/aboutcode-org/scancode.io +# The ScanCode.io software is licensed under the Apache License version 2.0. +# Data generated with ScanCode.io is provided as-is without warranties. +# ScanCode is a trademark of nexB Inc. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# +# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. No content created from +# ScanCode.io should be considered or used as legal advice. Consult an Attorney +# for any legal advice. +# +# ScanCode.io is a free software code scanning tool from nexB Inc. and others. +# Visit https://github.com/aboutcode-org/scancode.io for support and download. + +from aboutcode.hashid import get_package_purls_yml_file_path, get_core_purl +from scanpipe.pipes.federatedcode import commit_changes +from scanpipe.pipes.federatedcode import push_changes +from minecode_pipelines import VERSION +from minecode_pipelines.miners.cran import extract_cran_packages +from minecode_pipelines.pipes import write_data_to_yaml_file +from minecode_pipelines.utils import grouper + +PACKAGE_BATCH_SIZE = 1000 + + +def mine_and_publish_cran_packageurls(cloned_data_repo, db_path, logger): + """ + Extract CRAN packages from the database, write their package URLs (purls) to YAML, + and commit changes in batches to the given cloned repository. + """ + packages_to_sync = list(extract_cran_packages(db_path)) + + for package_batch in grouper(packages_to_sync, PACKAGE_BATCH_SIZE): + purl_files = [] + base_purls = [] + + if logger: + logger(f"Starting package mining for a batch of {PACKAGE_BATCH_SIZE} packages") + + for updated_purls in package_batch: + if not updated_purls: + continue # skip padded None values or empty + + first_purl = updated_purls[0] + base_purl = get_core_purl(first_purl) + purl_yaml_path = cloned_data_repo.working_dir / get_package_purls_yml_file_path( + first_purl + ) + write_data_to_yaml_file(path=purl_yaml_path, data=updated_purls) + + logger(f"writing packageURLs for package: {str(base_purl)} at: {purl_yaml_path}") + purl_files.append(purl_yaml_path) + base_purls.append(str(base_purl)) + + # After finishing the batch, commit & push if there’s something to save + if purl_files and base_purls: + commit_changes( + repo=cloned_data_repo, + files_to_commit=purl_files, + purls=base_purls, + mine_type="packageURL", + tool_name="pkg:pypi/minecode-pipelines", + tool_version=VERSION, + ) + push_changes(repo=cloned_data_repo) \ No newline at end of file diff --git a/minecode_pipelines/tests/pipes/test_cran.py b/minecode_pipelines/tests/pipes/test_cran.py new file mode 100644 index 00000000..d937b5e8 --- /dev/null +++ b/minecode_pipelines/tests/pipes/test_cran.py @@ -0,0 +1,41 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import saneyaml +from pathlib import Path +from unittest import TestCase +from minecode_pipelines.pipes.cran import extract_cran_packages + + +DATA_DIR = Path(__file__).parent.parent / "test_data" / "cran" + + +class CranPipelineTests(TestCase): + def test_extract_cran_packages_from_testdata(self): + """ + Ensure extract_cran_packages correctly parses the CRAN database + and produces results identical to the expected YAML files. + """ + + db_file = DATA_DIR / "cran_db.json" + results = list(extract_cran_packages(db_file)) + + expected_files = [ + DATA_DIR / "expected_abbreviate.yaml", + DATA_DIR / "expected_abc.data.yaml", + DATA_DIR / "expected_abc.yaml", + ] + + assert len(results) == len(expected_files) + + for result, expected_file in zip(results, expected_files): + with open(expected_file, encoding="utf-8") as f: + expected = saneyaml.load(f) + + assert result == expected \ No newline at end of file diff --git a/minecode_pipelines/tests/test_data/cran/cran_db.json b/minecode_pipelines/tests/test_data/cran/cran_db.json new file mode 100644 index 00000000..4600e98f --- /dev/null +++ b/minecode_pipelines/tests/test_data/cran/cran_db.json @@ -0,0 +1,461 @@ +{ + "abbreviate": { + "_id": "abbreviate", + "_rev": "6-c214483426b263b02f8aff6d73c87402", + "name": "abbreviate", + "archived": false, + "versions": { + "0.1": { + "Package": "abbreviate", + "Type": "Package", + "Title": "Readable String Abbreviation", + "Version": "0.1", + "Date": "2021-12-12", + "Authors@R": "\nperson(\"Sigbert\", \"Klinke\", email=\"sigbert@hu-berlin.de\", role=c(\"aut\", \"cre\"))", + "Description": "Strings are abbreviated to at least \"minlength\" characters, such that they remain unique\n(if they were). The abbreviations should be recognisable.", + "URL": "https://github.com/sigbertklinke/abbreviate (development version)", + "RoxygenNote": "7.1.1", + "License": "GPL-3", + "Encoding": "UTF-8", + "Suggests": { + "testthat": ">= 3.0.0" + }, + "Config/testthat/edition": "3", + "NeedsCompilation": "no", + "Packaged": "2021-12-12 14:35:02 UTC; sk", + "Author": "Sigbert Klinke [aut, cre]", + "Maintainer": "Sigbert Klinke ", + "Repository": "CRAN", + "Date/Publication": "2021-12-14 08:40:04 UTC", + "crandb_file_date": "2021-12-14 10:02:23", + "MD5sum": "37285eddefb6b0fce95783bf21b32999", + "date": "2021-12-14T07:40:04+00:00", + "releases": [] + } + }, + "timeline": { + "0.1": "2021-12-14T07:40:04+00:00" + }, + "latest": "0.1", + "title": "Readable String Abbreviation" + }, + "abc": { + "_id": "abc", + "_rev": "13-5b00c8405507a05d9247b38313e3d2e3", + "name": "abc", + "versions": { + "1.0": { + "Package": "abc", + "Version": "1.0", + "Date": "2010-08-03", + "Title": "Functions to perform Approximate Bayesian Computation (ABC)using simulated data", + "Author": "Katalin Csillery, with contributions from Michael Blum andOlivier Francois", + "Maintainer": "Katalin Csillery ", + "Depends": { + "R": ">= 1.8.0", + "nnet": "*", + "quantreg": "*", + "locfit": "*", + "methods": "*" + }, + "Description": "The 'abc' package provides various functions for parameterestimation and model selection in an ABC framework. Three mainfunctions are available: (i) 'abc' implements several ABCinference algorithms, (ii) 'cv4abc' is a cross-validation toolto evaluate the quality of the estimation and help the choiceof tolerance rate, and (iii) 'postpr' implements modelselection in an ABC setting. All these functions areaccompanied by appropriate summary and plotting functions.", + "License": "Unlimited", + "Packaged": "2010-10-04 19:52:49 UTC; kcsillery", + "Repository": "CRAN", + "Date/Publication": "2010-10-05 08:45:21", + "crandb_file_date": "2010-10-05 04:45:28", + "date": "2010-10-05T08:45:21+00:00", + "releases": [] + }, + "1.1": { + "Package": "abc", + "Version": "1.1", + "Date": "2010-08-03", + "Title": "Functions to perform Approximate Bayesian Computation (ABC)using simulated data", + "Author": "Katalin Csillery, with contributions from Michael Blum andOlivier Francois", + "Maintainer": "Katalin Csillery ", + "Depends": { + "R": ">= 1.8.0", + "nnet": "*", + "quantreg": "*", + "locfit": "*", + "methods": "*" + }, + "Description": "The 'abc' package provides various functions for parameterestimation and model selection in an ABC framework. Three mainfunctions are available: (i) 'abc' implements several ABCinference algorithms, (ii) 'cv4abc' is a cross-validation toolto evaluate the quality of the estimation and help the choiceof tolerance rate, and (iii) 'postpr' implements modelselection in an ABC setting. All these functions areaccompanied by appropriate summary and plotting functions.", + "License": "Unlimited", + "Packaged": "2010-10-11 07:22:18 UTC; mblum", + "Repository": "CRAN", + "Date/Publication": "2010-10-11 07:24:31", + "crandb_file_date": "2010-10-11 01:24:31", + "date": "2010-10-11T07:24:31+00:00", + "releases": [ + "2.12.0", + "2.12.1" + ] + }, + "1.2": { + "Package": "abc", + "Version": "1.2", + "Date": "2011-01-10", + "Title": "Functions to perform Approximate Bayesian Computation (ABC)using simulated data", + "Author": "Katalin Csillery, Michael Blum and Olivier Francois", + "Maintainer": "Katalin Csillery and Michael Blum", + "Depends": { + "R": ">= 1.8.0", + "nnet": "*", + "quantreg": "*", + "locfit": "*" + }, + "Description": "The 'abc' package provides various functions for parameterestimation and model selection in an ABC framework. Three mainfunctions are available: (i) 'abc' implements several ABCinference algorithms, (ii) 'cv4abc' is a cross-validation toolto evaluate the quality of the estimation and help the choiceof tolerance rate, and (iii) 'postpr' implements modelselection in an ABC setting. All these functions areaccompanied by appropriate summary and plotting functions.", + "License": "GPL (>= 2)", + "Packaged": "2011-01-14 16:27:58 UTC; katalinc", + "Repository": "CRAN", + "Date/Publication": "2011-01-15 16:23:31", + "crandb_file_date": "2011-01-15 11:23:38", + "date": "2011-01-15T16:23:31+00:00", + "releases": [ + "2.12.2", + "2.13.0" + ] + }, + "1.3": { + "Package": "abc", + "Version": "1.3", + "Date": "2011-05-09", + "Title": "Tools for Approximate Bayesian Computation (ABC)", + "Author": "Katalin Csillery, Michael Blum and Olivier Francois", + "Maintainer": "Katalin Csillery and Michael Blum", + "Depends": { + "R": ">= 2.10", + "nnet": "*", + "quantreg": "*", + "locfit": "*" + }, + "Description": "The package implements several ABC algorithms forperforming parameter estimation and model selection.Cross-validation tools are also available for measuring theaccuracy of ABC estimates, and to calculate themisclassification probabilities of different models.", + "License": "GPL (>= 3)", + "Packaged": "2011-05-10 07:57:44 UTC; kcsillery", + "Repository": "CRAN", + "Date/Publication": "2011-05-10 09:42:38", + "crandb_file_date": "2011-05-10 05:42:44", + "date": "2011-05-10T09:42:38+00:00", + "releases": "2.13.1" + }, + "1.4": { + "Package": "abc", + "Version": "1.4", + "Date": "2011-09-02", + "Title": "Tools for Approximate Bayesian Computation (ABC)", + "Author": "Katalin Csillery, Michael Blum and Olivier Francois", + "Maintainer": "Katalin Csillery and Michael Blum", + "Depends": { + "R": ">= 2.10", + "MASS": "*", + "nnet": "*", + "quantreg": "*", + "locfit": "*" + }, + "Description": "The package implements several ABC algorithms forperforming parameter estimation and model selection.Cross-validation tools are also available for measuring theaccuracy of ABC estimates, and to calculate themisclassification probabilities of different models.", + "License": "GPL (>= 3)", + "Packaged": "2011-09-03 12:42:41 UTC; kcsillery", + "Repository": "CRAN", + "Date/Publication": "2011-09-04 05:18:45", + "crandb_file_date": "2011-09-04 01:18:53", + "date": "2011-09-04T05:18:45+00:00", + "releases": [ + "2.13.2", + "2.14.0", + "2.14.1", + "2.14.2", + "2.15.0", + "2.15.1" + ] + }, + "1.5": { + "Package": "abc", + "Version": "1.5", + "Date": "2012-16-02", + "Title": "Tools for Approximate Bayesian Computation (ABC)", + "Author": "Katalin Csillery, Michael Blum and Olivier Francois", + "Maintainer": "Michael Blum ", + "Depends": { + "R": ">= 2.10", + "MASS": "*", + "nnet": "*", + "quantreg": "*", + "locfit": "*" + }, + "Description": "The package implements several ABC algorithms forperforming parameter estimation and model selection.Cross-validation tools are also available for measuring theaccuracy of ABC estimates, and to calculate themisclassification probabilities of different models.", + "License": "GPL (>= 3)", + "Packaged": "2012-08-08 09:30:30 UTC; ripley", + "Repository": "CRAN", + "Date/Publication": "2012-08-08 09:50:54", + "crandb_file_date": "2012-08-08 03:50:54", + "date": "2012-08-08T09:50:54+00:00", + "releases": [] + }, + "1.6": { + "Package": "abc", + "Version": "1.6", + "Date": "2012-16-02", + "Title": "Tools for Approximate Bayesian Computation (ABC)", + "Author": "Katalin Csillery, Michael Blum and Olivier Francois", + "Maintainer": "Michael Blum ", + "Depends": { + "R": ">= 2.10", + "MASS": "*", + "nnet": "*", + "quantreg": "*", + "locfit": "*" + }, + "Description": "The package implements several ABC algorithms forperforming parameter estimation and model selection.Cross-validation tools are also available for measuring theaccuracy of ABC estimates, and to calculate themisclassification probabilities of different models.", + "License": "GPL (>= 3)", + "Packaged": "2012-08-14 15:10:43 UTC; mblum", + "Repository": "CRAN", + "Date/Publication": "2012-08-14 16:27:09", + "crandb_file_date": "2012-08-14 12:27:12", + "date": "2012-08-14T16:27:09+00:00", + "releases": [ + "2.15.2", + "2.15.3", + "3.0.0", + "3.0.1" + ] + }, + "1.7": { + "Package": "abc", + "Version": "1.7", + "Date": "2013-06-06", + "Title": "Tools for Approximate Bayesian Computation (ABC)", + "Author": "Katalin Csillery, Michael Blum and Olivier Francois", + "Maintainer": "Michael Blum ", + "Depends": { + "R": ">= 2.10", + "MASS": "*", + "nnet": "*", + "quantreg": "*", + "locfit": "*" + }, + "Description": "The package implements several ABC algorithms forperforming parameter estimation and model selection.Cross-validation tools are also available for measuring theaccuracy of ABC estimates, and to calculate themisclassification probabilities of different models.", + "License": "GPL (>= 3)", + "Packaged": "2013-06-06 15:45:28 UTC; mblum", + "Repository": "CRAN", + "Date/Publication": "2013-06-06 19:53:42", + "crandb_file_date": "2013-06-06 13:53:45", + "NeedsCompilation": "no", + "date": "2013-06-06T19:53:42+00:00", + "releases": "3.0.2" + }, + "1.8": { + "Package": "abc", + "Version": "1.8", + "Date": "2013-10-14", + "Title": "Tools for Approximate Bayesian Computation (ABC)", + "Author": "Katalin Csillery, Michael Blum and Olivier Francois", + "Maintainer": "Michael Blum ", + "Depends": { + "R": ">= 2.10", + "nnet": "*", + "quantreg": "*", + "MASS": "*" + }, + "Description": "The package implements several ABC algorithms forperforming parameter estimation and model selection.Cross-validation tools are also available for measuring theaccuracy of ABC estimates, and to calculate themisclassification probabilities of different models.", + "License": "GPL (>= 3)", + "Packaged": "2013-10-28 14:27:28 UTC; kcsillery", + "Repository": "CRAN", + "Date/Publication": "2013-10-29 14:33:22", + "crandb_file_date": "2013-10-29 09:33:25", + "NeedsCompilation": "no", + "date": "2013-10-29T14:33:22+00:00", + "releases": [ + "3.0.3", + "3.1.0", + "3.1.1" + ] + }, + "2.0": { + "Package": "abc", + "Version": "2.0", + "Date": "2014-7-10", + "Title": "Tools for Approximate Bayesian Computation (ABC)", + "Author": "Katalin Csillery, Louisiane Lemaire, Michael Blum and Olivier Francois", + "Maintainer": "Michael Blum ", + "Depends": { + "R": ">= 2.10", + "nnet": "*", + "quantreg": "*", + "MASS": "*", + "locfit": "*" + }, + "Description": "The package implements several ABC algorithms forperforming parameter estimation, model selection, and goodness-of-fit.Cross-validation tools are also available for measuring theaccuracy of ABC estimates, and to calculate themisclassification probabilities of different models.", + "License": "GPL (>= 3)", + "Packaged": "2014-07-11 15:09:21 UTC; mblum", + "Repository": "CRAN", + "Date/Publication": "2014-07-11 23:50:41", + "crandb_file_date": "2014-07-11 17:50:44", + "NeedsCompilation": "no", + "date": "2014-07-11T23:50:41+00:00", + "releases": [] + }, + "2.1": { + "Package": "abc", + "Type": "Package", + "Title": "Tools for Approximate Bayesian Computation (ABC)", + "Version": "2.1", + "Date": "2015-05-04", + "Authors@R": "c(\nperson(\"Csillery\", \"Katalin\", role = \"aut\", email=\"kati.csillery@gmail.com\"),\nperson(\"Lemaire\", \"Louisiane\", role = \"aut\"),\nperson(\"Francois\", \"Olivier\", role = \"aut\"),\nperson(\"Blum\", \"Michael\",\nemail = \"michael.blum@imag.fr\", role = c(\"aut\", \"cre\")))", + "Depends": { + "R": ">= 2.10", + "abc.data": "*", + "nnet": "*", + "quantreg": "*", + "MASS": "*", + "locfit": "*" + }, + "Description": "Implements several ABC algorithms for\nperforming parameter estimation, model selection, and goodness-of-fit.\nCross-validation tools are also available for measuring the\naccuracy of ABC estimates, and to calculate the\nmisclassification probabilities of different models.", + "Repository": "CRAN", + "License": "GPL (>= 3)", + "NeedsCompilation": "no", + "Packaged": "2015-05-05 08:35:25 UTC; mblum", + "Author": "Csillery Katalin [aut],\nLemaire Louisiane [aut],\nFrancois Olivier [aut],\nBlum Michael [aut, cre]", + "Maintainer": "Blum Michael ", + "Date/Publication": "2015-05-05 11:34:14", + "crandb_file_date": "2015-05-05 05:35:37", + "date": "2015-05-05T11:34:14+00:00", + "releases": [] + }, + "2.2.1": { + "Package": "abc", + "Type": "Package", + "Title": "Tools for Approximate Bayesian Computation (ABC)", + "Version": "2.2.1", + "Date": "2022-05-17", + "Authors@R": "c(\nperson(\"Csillery\", \"Katalin\", role = \"aut\", email=\"kati.csillery@gmail.com\"),\nperson(\"Lemaire\", \"Louisiane\", role = \"aut\"),\nperson(\"Francois\", \"Olivier\", role = \"aut\"),\nperson(\"Blum\", \"Michael\",\nemail = \"michael.blum.temp@gmail.com\", role = c(\"aut\", \"cre\")))", + "Depends": { + "R": ">= 2.10", + "abc.data": "*", + "nnet": "*", + "quantreg": "*", + "MASS": "*", + "locfit": "*" + }, + "Description": "Implements several ABC algorithms for\nperforming parameter estimation, model selection, and goodness-of-fit.\nCross-validation tools are also available for measuring the\naccuracy of ABC estimates, and to calculate the\nmisclassification probabilities of different models.", + "Repository": "CRAN", + "License": "GPL (>= 3)", + "NeedsCompilation": "no", + "Packaged": "2022-05-18 18:46:30 UTC; mblum", + "Author": "Csillery Katalin [aut],\nLemaire Louisiane [aut],\nFrancois Olivier [aut],\nBlum Michael [aut, cre]", + "Maintainer": "Blum Michael ", + "Date/Publication": "2022-05-19 07:20:02 UTC", + "crandb_file_date": "2022-05-19 08:38:33", + "MD5sum": "21e4c928a8cdd4c6fe3c1c76c99913a9", + "date": "2022-05-19T06:20:02+00:00", + "releases": [] + }, + "2.2.2": { + "Package": "abc", + "Type": "Package", + "Title": "Tools for Approximate Bayesian Computation (ABC)", + "Version": "2.2.2", + "Date": "2024-12-3", + "Authors@R": "c(\nperson(\"Csillery\", \"Katalin\", role = \"aut\", email=\"kati.csillery@gmail.com\"),\nperson(\"Lemaire\", \"Louisiane\", role = \"aut\"),\nperson(\"Francois\", \"Olivier\", role = \"aut\"),\nperson(\"Blum\", \"Michael\",\nemail = \"michael.blum.temp@gmail.com\", role = c(\"aut\", \"cre\")))", + "Depends": { + "R": ">= 2.10", + "abc.data": "*", + "nnet": "*", + "quantreg": "*", + "MASS": "*", + "locfit": "*" + }, + "Description": "Implements several ABC algorithms for\nperforming parameter estimation, model selection, and goodness-of-fit.\nCross-validation tools are also available for measuring the\naccuracy of ABC estimates, and to calculate the\nmisclassification probabilities of different models.", + "Repository": "CRAN", + "License": "GPL (>= 3)", + "NeedsCompilation": "no", + "Packaged": "2024-12-03 17:06:09 UTC; michaelblum", + "Author": "Csillery Katalin [aut],\nLemaire Louisiane [aut],\nFrancois Olivier [aut],\nBlum Michael [aut, cre]", + "Maintainer": "Blum Michael ", + "Date/Publication": "2024-12-03 17:50:02 UTC", + "crandb_file_date": "2024-12-03 18:50:29", + "MD5sum": "aa7596bcec49d9bcab9ff8ecd46530c9", + "date": "2024-12-03T16:50:02+00:00", + "releases": [] + } + }, + "timeline": { + "1.0": "2010-10-05T08:45:21+00:00", + "1.1": "2010-10-11T07:24:31+00:00", + "1.2": "2011-01-15T16:23:31+00:00", + "1.3": "2011-05-10T09:42:38+00:00", + "1.4": "2011-09-04T05:18:45+00:00", + "1.5": "2012-08-08T09:50:54+00:00", + "1.6": "2012-08-14T16:27:09+00:00", + "1.7": "2013-06-06T19:53:42+00:00", + "1.8": "2013-10-29T14:33:22+00:00", + "2.0": "2014-07-11T23:50:41+00:00", + "2.1": "2015-05-05T11:34:14+00:00", + "2.2.1": "2022-05-19T06:20:02+00:00", + "2.2.2": "2024-12-03T16:50:02+00:00" + }, + "latest": "2.2.2", + "title": "Tools for Approximate Bayesian Computation (ABC)", + "archived": false, + "revdeps": 2 + }, + "abc.data": { + "_id": "abc.data", + "_rev": "7-41f5c78a9a69541f3d6afca5778185e5", + "name": "abc.data", + "archived": false, + "versions": { + "1.0": { + "Package": "abc.data", + "Type": "Package", + "Title": "Data Only: Tools for Approximate Bayesian Computation (ABC)", + "Version": "1.0", + "Date": "2015-05-04", + "Authors@R": "c(\nperson(\"Csillery\", \"Katalin\", role = \"aut\", email=\"kati.csillery@gmail.com\"),\nperson(\"Lemaire\", \"Louisiane\", role = \"aut\"),\nperson(\"Francois\", \"Olivier\", role = \"aut\"),\nperson(\"Blum\", \"Michael\",\nemail = \"michael.blum@imag.fr\", role = c(\"aut\", \"cre\")))", + "Depends": { + "R": ">= 2.10" + }, + "Description": "Contains data which are used by functions of the 'abc' package.", + "Repository": "CRAN", + "License": "GPL (>= 3)", + "NeedsCompilation": "no", + "Packaged": "2015-05-05 09:25:25 UTC; mblum", + "Author": "Csillery Katalin [aut],\nLemaire Louisiane [aut],\nFrancois Olivier [aut],\nBlum Michael [aut, cre]", + "Maintainer": "Blum Michael ", + "Date/Publication": "2015-05-05 11:34:13", + "crandb_file_date": "2015-05-05 05:35:35", + "date": "2015-05-05T11:34:13+00:00", + "releases": [] + }, + "1.1": { + "Package": "abc.data", + "Type": "Package", + "Title": "Data Only: Tools for Approximate Bayesian Computation (ABC)", + "Version": "1.1", + "Authors@R": "c(\nperson(\"Csillery\", \"Katalin\", role = \"aut\", email=\"kati.csillery@gmail.com\"),\nperson(\"Lemaire\", \"Louisiane\", role = \"aut\"),\nperson(\"Francois\", \"Olivier\", role = \"aut\"),\nperson(\"Blum\", \"Michael\",\nemail = \"michael.blum.temp@gmail.com\", role = c(\"aut\", \"cre\")))", + "Depends": { + "R": ">= 2.10" + }, + "Description": "Contains data which are used by functions of the 'abc' package.", + "Repository": "CRAN", + "License": "GPL (>= 3)", + "NeedsCompilation": "no", + "Packaged": "2024-03-24 10:04:54 UTC; hornik", + "Author": "Csillery Katalin [aut],\nLemaire Louisiane [aut],\nFrancois Olivier [aut],\nBlum Michael [aut, cre]", + "Maintainer": "Blum Michael ", + "Date/Publication": "2024-03-24 10:15:14 UTC", + "crandb_file_date": "2024-03-24 10:38:49", + "MD5sum": "1693d5a243a991f8cf290471972a54f8", + "date": "2024-03-24T09:15:14+00:00", + "releases": [] + } + }, + "timeline": { + "1.0": "2015-05-05T11:34:13+00:00", + "1.1": "2024-03-24T09:15:14+00:00" + }, + "latest": "1.1", + "title": "Data Only: Tools for Approximate Bayesian Computation (ABC)" + } +} \ No newline at end of file diff --git a/minecode_pipelines/tests/test_data/cran/expected_abbreviate.yaml b/minecode_pipelines/tests/test_data/cran/expected_abbreviate.yaml new file mode 100644 index 00000000..606f81eb --- /dev/null +++ b/minecode_pipelines/tests/test_data/cran/expected_abbreviate.yaml @@ -0,0 +1 @@ +- pkg:cran/abbreviate@0.1 \ No newline at end of file diff --git a/minecode_pipelines/tests/test_data/cran/expected_abc.data.yaml b/minecode_pipelines/tests/test_data/cran/expected_abc.data.yaml new file mode 100644 index 00000000..a2066baa --- /dev/null +++ b/minecode_pipelines/tests/test_data/cran/expected_abc.data.yaml @@ -0,0 +1,13 @@ +- pkg:cran/abc@1.0 +- pkg:cran/abc@1.1 +- pkg:cran/abc@1.2 +- pkg:cran/abc@1.3 +- pkg:cran/abc@1.4 +- pkg:cran/abc@1.5 +- pkg:cran/abc@1.6 +- pkg:cran/abc@1.7 +- pkg:cran/abc@1.8 +- pkg:cran/abc@2.0 +- pkg:cran/abc@2.1 +- pkg:cran/abc@2.2.1 +- pkg:cran/abc@2.2.2 \ No newline at end of file diff --git a/minecode_pipelines/tests/test_data/cran/expected_abc.yaml b/minecode_pipelines/tests/test_data/cran/expected_abc.yaml new file mode 100644 index 00000000..15ee719f --- /dev/null +++ b/minecode_pipelines/tests/test_data/cran/expected_abc.yaml @@ -0,0 +1,2 @@ +- pkg:cran/abc.data@1.0 +- pkg:cran/abc.data@1.1 \ No newline at end of file diff --git a/pyproject-minecode_pipelines.toml b/pyproject-minecode_pipelines.toml index cd41f297..a4f2a3f6 100644 --- a/pyproject-minecode_pipelines.toml +++ b/pyproject-minecode_pipelines.toml @@ -54,6 +54,7 @@ mine_cargo = "minecode_pipelines.pipelines.mine_cargo:MineCargo" mine_debian = "minecode_pipelines.pipelines.mine_debian:MineDebian" mine_alpine = "minecode_pipelines.pipelines.mine_alpine:MineAlpine" mine_conan = "minecode_pipelines.pipelines.mine_conan:MineConan" +mine_cran = "minecode_pipelines.pipelines.mine_cran:MineCran" [tool.bumpversion] current_version = "0.0.1b15" From c3e85ed72f8aaf223c80894ac85ae702551b9c62 Mon Sep 17 00:00:00 2001 From: ziad hany Date: Wed, 1 Oct 2025 16:19:26 +0300 Subject: [PATCH 2/2] Fix code style Signed-off-by: ziad hany --- minecode_pipelines/miners/cran.py | 2 +- minecode_pipelines/pipelines/mine_cran.py | 2 +- minecode_pipelines/pipes/cran.py | 2 +- minecode_pipelines/tests/pipes/test_cran.py | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/minecode_pipelines/miners/cran.py b/minecode_pipelines/miners/cran.py index 0cfe9008..09bac091 100644 --- a/minecode_pipelines/miners/cran.py +++ b/minecode_pipelines/miners/cran.py @@ -63,4 +63,4 @@ def extract_cran_packages(json_file_path: str) -> list: version=version, ) purls.append(purl.to_string()) - yield purls \ No newline at end of file + yield purls diff --git a/minecode_pipelines/pipelines/mine_cran.py b/minecode_pipelines/pipelines/mine_cran.py index c354395b..b1612005 100644 --- a/minecode_pipelines/pipelines/mine_cran.py +++ b/minecode_pipelines/pipelines/mine_cran.py @@ -83,4 +83,4 @@ def cleanup_db_and_repo(self): pipes.delete_cloned_repos( repos=[self.cloned_data_repo], logger=self.log, - ) \ No newline at end of file + ) diff --git a/minecode_pipelines/pipes/cran.py b/minecode_pipelines/pipes/cran.py index 4b266bee..0ec65a6f 100644 --- a/minecode_pipelines/pipes/cran.py +++ b/minecode_pipelines/pipes/cran.py @@ -70,4 +70,4 @@ def mine_and_publish_cran_packageurls(cloned_data_repo, db_path, logger): tool_name="pkg:pypi/minecode-pipelines", tool_version=VERSION, ) - push_changes(repo=cloned_data_repo) \ No newline at end of file + push_changes(repo=cloned_data_repo) diff --git a/minecode_pipelines/tests/pipes/test_cran.py b/minecode_pipelines/tests/pipes/test_cran.py index d937b5e8..ef194d71 100644 --- a/minecode_pipelines/tests/pipes/test_cran.py +++ b/minecode_pipelines/tests/pipes/test_cran.py @@ -10,7 +10,7 @@ import saneyaml from pathlib import Path from unittest import TestCase -from minecode_pipelines.pipes.cran import extract_cran_packages +from minecode_pipelines.miners.cran import extract_cran_packages DATA_DIR = Path(__file__).parent.parent / "test_data" / "cran" @@ -38,4 +38,4 @@ def test_extract_cran_packages_from_testdata(self): with open(expected_file, encoding="utf-8") as f: expected = saneyaml.load(f) - assert result == expected \ No newline at end of file + assert result == expected