diff --git a/.github/workflows/analyze-docker-image.yml b/.github/workflows/analyze-docker-image.yml index e88e1cc..36916be 100644 --- a/.github/workflows/analyze-docker-image.yml +++ b/.github/workflows/analyze-docker-image.yml @@ -5,10 +5,12 @@ jobs: runs-on: ubuntu-24.04 name: Analyze a Docker image steps: - - name: Get the action.yml from the current branch + - name: Get the action.yml and scripts from the current branch uses: actions/checkout@v4 with: - sparse-checkout: action.yml + sparse-checkout: | + action.yml + scripts/ sparse-checkout-cone-mode: false - uses: ./ diff --git a/.github/workflows/find-vulnerabilities.yml b/.github/workflows/find-vulnerabilities.yml index ec39fdc..acfa99c 100644 --- a/.github/workflows/find-vulnerabilities.yml +++ b/.github/workflows/find-vulnerabilities.yml @@ -5,10 +5,12 @@ jobs: runs-on: ubuntu-24.04 name: Scan codebase and find vulnerabilities steps: - - name: Get the action.yml from the current branch + - name: Get the action.yml and scripts from the current branch uses: actions/checkout@v4 with: - sparse-checkout: action.yml + sparse-checkout: | + action.yml + scripts/ sparse-checkout-cone-mode: false - uses: actions/checkout@v4 diff --git a/.github/workflows/map-deploy-to-develop.yml b/.github/workflows/map-deploy-to-develop.yml index 09ab26e..e4d871b 100644 --- a/.github/workflows/map-deploy-to-develop.yml +++ b/.github/workflows/map-deploy-to-develop.yml @@ -5,10 +5,12 @@ jobs: runs-on: ubuntu-24.04 name: Map deploy to develop steps: - - name: Get the action.yml from the current branch + - name: Get the action.yml and scripts from the current branch uses: actions/checkout@v4 with: - sparse-checkout: action.yml + sparse-checkout: | + action.yml + scripts/ sparse-checkout-cone-mode: false - uses: ./ diff --git a/.github/workflows/scan-codebase.yml b/.github/workflows/scan-codebase.yml index 0c02e8b..66b5e95 100644 --- a/.github/workflows/scan-codebase.yml +++ b/.github/workflows/scan-codebase.yml @@ -5,10 +5,12 @@ jobs: runs-on: ubuntu-24.04 name: Scan codebase and check for compliance issues steps: - - name: Get the action.yml from the current branch + - name: Get the action.yml and scripts from the current branch uses: actions/checkout@v4 with: - sparse-checkout: action.yml + sparse-checkout: | + action.yml + scripts/ sparse-checkout-cone-mode: false - uses: actions/checkout@v4 diff --git a/.github/workflows/scan-single-package.yml b/.github/workflows/scan-single-package.yml index a9d090b..989468c 100644 --- a/.github/workflows/scan-single-package.yml +++ b/.github/workflows/scan-single-package.yml @@ -5,10 +5,12 @@ jobs: runs-on: ubuntu-24.04 name: Scan a package archive steps: - - name: Get the action.yml from the current branch + - name: Get the action.yml and scripts from the current branch uses: actions/checkout@v4 with: - sparse-checkout: action.yml + sparse-checkout: | + action.yml + scripts/ sparse-checkout-cone-mode: false - uses: ./ diff --git a/action.yml b/action.yml index 5c0e93b..d5f5758 100644 --- a/action.yml +++ b/action.yml @@ -51,107 +51,34 @@ runs: with: python-version: ${{ inputs.python-version }} - - name: Set up environment + - name: Use Local ScanCode Pipeline Script shell: bash run: | - echo "SECRET_KEY=$(openssl rand -base64 32)" >> $GITHUB_ENV - echo "SCANCODEIO_DB_NAME=scancodeio" >> $GITHUB_ENV - echo "SCANCODEIO_DB_USER=scancodeio" >> $GITHUB_ENV - echo "SCANCODEIO_DB_PASSWORD=scancodeio" >> $GITHUB_ENV + chmod +x scripts/scancode_pipeline.py - - name: Start and setup the PostgreSQL service + - name: Run ScanCode Pipeline shell: bash run: | - sudo systemctl start postgresql.service - sudo -u postgres createuser --no-createrole --no-superuser --login --inherit --createdb ${{ env.SCANCODEIO_DB_USER }} - sudo -u postgres psql -c "ALTER USER ${{ env.SCANCODEIO_DB_USER }} WITH encrypted password '${{ env.SCANCODEIO_DB_PASSWORD }}'" - sudo -u postgres createdb --owner=scancodeio --encoding=UTF-8 ${{ env.SCANCODEIO_DB_NAME }} - - - name: Install ScanCode.io - shell: bash - run: | - if [ -z "${{ inputs.scancodeio-repo-branch }}" ]; then - echo "Installing the latest ScanCode.io release from PyPI" - pip install --upgrade scancodeio - else - echo "Installing ScanCode.io from the GitHub branch: ${{ inputs.scancodeio-repo-branch }}" - pip install git+https://github.com/aboutcode-org/scancode.io.git@${{ inputs.scancodeio-repo-branch }} - fi - - - name: Run migrations to prepare the database - shell: bash - run: scanpipe migrate --verbosity 0 - - - name: Generate `--pipeline` CLI arguments - shell: bash - run: | - IFS=',' read -ra PIPELINES <<< "${{ inputs.pipelines }}" - PIPELINE_CLI_ARGS="" - for pipeline in "${PIPELINES[@]}"; do - PIPELINE_CLI_ARGS+=" --pipeline $pipeline" - done - echo "PIPELINE_CLI_ARGS=${PIPELINE_CLI_ARGS}" >> $GITHUB_ENV - - - name: Generate `--input-url` CLI arguments - shell: bash - run: | - INPUT_URL_CLI_ARGS="" - for url in ${{ inputs.input-urls }}; do - INPUT_URL_CLI_ARGS+=" --input-url $url" - done - echo "INPUT_URL_CLI_ARGS=${INPUT_URL_CLI_ARGS}" >> $GITHUB_ENV - - - name: Create project - shell: bash - run: | - scanpipe create-project ${{ inputs.project-name }} \ - ${{ env.PIPELINE_CLI_ARGS }} \ - ${{ env.INPUT_URL_CLI_ARGS }} - - - name: Set project work directory in the environment + python scripts/scancode_pipeline.py \ + --project-name "${{ inputs.project-name }}" \ + --pipelines "${{ inputs.pipelines }}" \ + --output-formats "${{ inputs.output-formats }}" \ + --inputs-path "${{ inputs.inputs-path }}" \ + --input-urls "${{ inputs.input-urls }}" \ + ${{ inputs.check-compliance == 'true' && '--check-compliance' || '' }} \ + --compliance-fail-level "${{ inputs.compliance-fail-level }}" \ + ${{ inputs.compliance-fail-on-vulnerabilities == 'true' && '--compliance-fail-on-vulnerabilities' || '' }} \ + ${{ inputs.scancodeio-repo-branch != '' && format('--scancodeio-repo-branch {0}', inputs.scancodeio-repo-branch) || '' }} + + - name: Get project work directory for upload shell: bash run: | project_status=$(scanpipe status --project ${{ inputs.project-name }}) work_directory=$(echo "$project_status" | grep -oP 'Work directory:\s*\K[^\n]+') echo "PROJECT_WORK_DIRECTORY=$work_directory" >> $GITHUB_ENV - - name: Copy input files to project work directory - shell: bash - run: | - SOURCE_PATH="${{ inputs.inputs-path }}" - DESTINATION_PATH="${{ env.PROJECT_WORK_DIRECTORY }}/input/" - if [ -d "$SOURCE_PATH" ]; then - cp -r "$SOURCE_PATH"/* "$DESTINATION_PATH" - fi - - - name: Run the pipelines - shell: bash - run: scanpipe execute --project ${{ inputs.project-name }} --no-color - - - name: Generate outputs - id: scanpipe - shell: bash - run: scanpipe output - --project ${{ inputs.project-name }} - --format ${{ inputs.output-formats }} - - name: Upload outputs uses: actions/upload-artifact@v4 - id: artifact-upload-step with: name: ${{ inputs.outputs-archive-name }} path: ${{ env.PROJECT_WORK_DIRECTORY }}/output/* - - - name: Check compliance - if: inputs.check-compliance == 'true' - shell: bash - run: | - cmd="scanpipe check-compliance \ - --project ${{ inputs.project-name }} \ - --fail-level ${{ inputs.compliance-fail-level }}" - - if [[ "${{ inputs.compliance-fail-on-vulnerabilities }}" == "true" ]]; then - cmd="$cmd --fail-on-vulnerabilities" - fi - - eval "$cmd" diff --git a/scripts/scancode_pipeline.py b/scripts/scancode_pipeline.py new file mode 100644 index 0000000..7847137 --- /dev/null +++ b/scripts/scancode_pipeline.py @@ -0,0 +1,376 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# http://nexb.com and https://github.com/aboutcode-org/scancode.io +# The ScanCode.io software is licensed under the Apache License version 2.0. +# Data generated with ScanCode.io is provided as-is without warranties. +# ScanCode is a trademark of nexB Inc. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# +# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. No content created from +# ScanCode.io should be considered or used as legal advice. Consult an Attorney +# for any legal advice. +# +# ScanCode.io is a free software code scanning tool from nexB Inc. and others. +# Visit https://github.com/aboutcode-org/scancode.io for support and download. + +""" +ScanCode Pipeline Runner - Reusable script for CI/CD implementations +""" +import argparse +import os +import subprocess +import sys +import secrets +import shutil +from pathlib import Path +from typing import List, Optional + + +class ScanCodePipelineRunner: + def __init__(self, config: dict): + self.config = config + self.project_work_directory = None + + def setup_environment(self): + """Set up environment variables""" + print("Setting up environment...") + + if not os.getenv('SECRET_KEY'): + secret_key = secrets.token_urlsafe(32) + os.environ['SECRET_KEY'] = secret_key + + os.environ['SCANCODEIO_DB_NAME'] = self.config.get('db_name', 'scancodeio') + os.environ['SCANCODEIO_DB_USER'] = self.config.get('db_user', 'scancodeio') + os.environ['SCANCODEIO_DB_PASSWORD'] = self.config.get('db_password', 'scancodeio') + + print("Environment setup completed") + + def setup_postgresql(self): + """Start and configure PostgreSQL service""" + print("Setting up PostgreSQL...") + + db_user = os.environ['SCANCODEIO_DB_USER'] + db_password = os.environ['SCANCODEIO_DB_PASSWORD'] + db_name = os.environ['SCANCODEIO_DB_NAME'] + + try: + subprocess.run(['sudo', 'systemctl', 'start', 'postgresql.service'], check=True) + + subprocess.run([ + 'sudo', '-u', 'postgres', 'createuser', + '--no-createrole', '--no-superuser', '--login', + '--inherit', '--createdb', db_user + ], check=True) + + subprocess.run([ + 'sudo', '-u', 'postgres', 'psql', '-c', + f"ALTER USER {db_user} WITH encrypted password '{db_password}'" + ], check=True) + + subprocess.run([ + 'sudo', '-u', 'postgres', 'createdb', + f'--owner={db_user}', '--encoding=UTF-8', db_name + ], check=True) + + print("PostgreSQL setup completed") + + except subprocess.CalledProcessError as e: + print(f"PostgreSQL setup failed: {e}") + sys.exit(1) + + def install_scancodeio(self): + """Install ScanCode.io""" + print("Installing ScanCode.io...") + + repo_branch = self.config.get('scancodeio_repo_branch') + + try: + if not repo_branch: + print("Installing the latest ScanCode.io release from PyPI") + subprocess.run(['pip', 'install', '--upgrade', 'scancodeio'], check=True) + else: + print(f"Installing ScanCode.io from GitHub branch: {repo_branch}") + repo_url = f"git+https://github.com/aboutcode-org/scancode.io.git@{repo_branch}" + subprocess.run(['pip', 'install', repo_url], check=True) + + print("ScanCode.io installation completed") + + except subprocess.CalledProcessError as e: + print(f"ScanCode.io installation failed: {e}") + sys.exit(1) + + def run_migrations(self): + """Run database migrations""" + print("Running migrations...") + + try: + subprocess.run(['scanpipe', 'migrate', '--verbosity', '0'], check=True) + print("Migrations completed") + + except subprocess.CalledProcessError as e: + print(f"Migrations failed: {e}") + sys.exit(1) + + def generate_pipeline_args(self, pipelines: List[str]) -> List[str]: + """Generate pipeline CLI arguments""" + args = [] + for pipeline in pipelines: + args.extend(['--pipeline', pipeline.strip()]) + return args + + def generate_input_url_args(self, input_urls: List[str]) -> List[str]: + """Generate input URL CLI arguments""" + args = [] + for url in input_urls: + if url.strip(): + args.extend(['--input-url', url.strip()]) + return args + + def create_project(self): + """Create ScanCode project""" + print(f"Creating project: {self.config['project_name']}") + + pipelines = self.config.get('pipelines', ['scan_codebase']) + input_urls = self.config.get('input_urls', []) + + pipeline_args = self.generate_pipeline_args(pipelines) + input_url_args = self.generate_input_url_args(input_urls) + + cmd = ['scanpipe', 'create-project', self.config['project_name']] + cmd.extend(pipeline_args) + cmd.extend(input_url_args) + + try: + subprocess.run(cmd, check=True) + print("Project created successfully") + + except subprocess.CalledProcessError as e: + print(f"Project creation failed: {e}") + sys.exit(1) + + def get_project_work_directory(self): + """Get project work directory""" + print("Getting project work directory...") + + try: + result = subprocess.run([ + 'scanpipe', 'status', '--project', self.config['project_name'] + ], capture_output=True, text=True, check=True) + + for line in result.stdout.split('\n'): + if 'Work directory:' in line: + self.project_work_directory = line.split('Work directory:')[1].strip() + break + + if not self.project_work_directory: + raise ValueError("Could not find work directory in status output") + + print(f"Project work directory: {self.project_work_directory}") + + except (subprocess.CalledProcessError, ValueError) as e: + print(f"Failed to get project work directory: {e}") + sys.exit(1) + + def copy_input_files(self): + """Copy input files to project work directory""" + inputs_path = self.config.get('inputs_path') + + if not inputs_path or not os.path.exists(inputs_path): + print("No input files to copy") + return + + destination_path = os.path.join(self.project_work_directory, 'input') + + print(f"Copying input files from {inputs_path} to {destination_path}") + + try: + if os.path.isdir(inputs_path): + for item in os.listdir(inputs_path): + source_item = os.path.join(inputs_path, item) + dest_item = os.path.join(destination_path, item) + + if os.path.isdir(source_item): + shutil.copytree(source_item, dest_item, dirs_exist_ok=True) + else: + shutil.copy2(source_item, dest_item) + + print("Input files copied successfully") + + except Exception as e: + print(f"Failed to copy input files: {e}") + sys.exit(1) + + def run_pipelines(self): + """Execute the pipelines""" + print("Running pipelines...") + + try: + subprocess.run([ + 'scanpipe', 'execute', + '--project', self.config['project_name'], + '--no-color' + ], check=True) + + print("Pipelines executed successfully") + + except subprocess.CalledProcessError as e: + print(f"Pipeline execution failed: {e}") + sys.exit(1) + + def generate_outputs(self): + """Generate output files""" + print("Generating outputs...") + + output_formats = self.config.get('output_formats', ['json', 'xlsx', 'spdx', 'cyclonedx']) + + for fmt in output_formats: + fmt = fmt.strip() + print(f"\n=== Generating {fmt.upper()} format ===") + + cmd = [ + 'scanpipe', 'output', + '--project', self.config['project_name'], + '--format', fmt + ] + + try: + result = subprocess.run(cmd, capture_output=True, text=True, check=True) + print(f"āœ“ {fmt.upper()} format generated successfully") + if result.stdout: + print(f" Output: {result.stdout.strip()}") + + except subprocess.CalledProcessError as e: + print(f"āœ— {fmt.upper()} format generation failed") + print(f" Exit code: {e.returncode}") + if e.stdout: + print(f" Stdout: {e.stdout.strip()}") + if e.stderr: + print(f" Stderr: {e.stderr.strip()}") + continue + + output_path = self.get_output_path() + if os.path.exists(output_path): + files = os.listdir(output_path) + print(f"\n=== Generated files in {output_path} ===") + for file in files: + file_path = os.path.join(output_path, file) + size = os.path.getsize(file_path) + print(f" {file} ({size} bytes)") + else: + print(f"\n⚠ Output directory not found: {output_path}") + + print("Output generation completed") + + + def check_compliance(self): + """Check compliance if enabled""" + if not self.config.get('check_compliance', False): + return + + print("Checking compliance...") + + cmd = [ + 'scanpipe', 'check-compliance', + '--project', self.config['project_name'], + '--fail-level', self.config.get('compliance_fail_level', 'ERROR') + ] + + if self.config.get('compliance_fail_on_vulnerabilities', False): + cmd.append('--fail-on-vulnerabilities') + + try: + subprocess.run(cmd, check=True) + print("Compliance check passed") + + except subprocess.CalledProcessError as e: + print(f"Compliance check failed: {e}") + sys.exit(1) + + def get_output_path(self) -> str: + """Get the output directory path""" + return os.path.join(self.project_work_directory, 'output') + + def run_full_pipeline(self): + """Run the complete pipeline""" + print("Starting ScanCode pipeline...") + + self.setup_environment() + self.setup_postgresql() + self.install_scancodeio() + self.run_migrations() + self.create_project() + self.get_project_work_directory() + self.copy_input_files() + self.run_pipelines() + self.generate_outputs() + self.check_compliance() + + print("Pipeline completed successfully!") + print(f"Output files available at: {self.get_output_path()}") + + +def parse_arguments(): + """Parse command line arguments""" + parser = argparse.ArgumentParser(description='ScanCode Pipeline Runner') + + parser.add_argument('--project-name', default='scancode-project', + help='Name of the project') + parser.add_argument('--pipelines', default='scan_codebase', + help='Comma-separated list of pipelines') + parser.add_argument('--output-formats', default='json xlsx spdx cyclonedx', + help='Comma-separated list of output formats') + parser.add_argument('--inputs-path', + help='Path to input files directory') + parser.add_argument('--input-urls', + help='Comma-separated list of input URLs') + parser.add_argument('--check-compliance', action='store_true', + help='Enable compliance checking') + parser.add_argument('--compliance-fail-level', default='ERROR', + choices=['ERROR', 'WARNING', 'MISSING'], + help='Compliance failure level') + parser.add_argument('--compliance-fail-on-vulnerabilities', action='store_true', + help='Fail on vulnerabilities') + parser.add_argument('--scancodeio-repo-branch', + help='ScanCode.io repository branch to install from') + parser.add_argument('--db-name', default='scancodeio', + help='Database name') + parser.add_argument('--db-user', default='scancodeio', + help='Database user') + parser.add_argument('--db-password', default='scancodeio', + help='Database password') + + return parser.parse_args() + + +def main(): + """Main entry point""" + args = parse_arguments() + + config = { + 'project_name': args.project_name, + 'pipelines': [p.strip() for p in args.pipelines.split(',')], + 'output_formats': [f.strip() for f in args.output_formats.replace(',', ' ').split()], + 'inputs_path': args.inputs_path, + 'input_urls': [u.strip() for u in (args.input_urls or '').split(',') if u.strip()], + 'check_compliance': args.check_compliance, + 'compliance_fail_level': args.compliance_fail_level, + 'compliance_fail_on_vulnerabilities': args.compliance_fail_on_vulnerabilities, + 'scancodeio_repo_branch': args.scancodeio_repo_branch, + 'db_name': args.db_name, + 'db_user': args.db_user, + 'db_password': args.db_password, + } + + runner = ScanCodePipelineRunner(config) + runner.run_full_pipeline() + + +if __name__ == '__main__': + main()