diff --git a/.github/workflows/validate-and-execute-notebooks.yml b/.github/workflows/validate-and-execute-notebooks.yml new file mode 100644 index 0000000..f835d91 --- /dev/null +++ b/.github/workflows/validate-and-execute-notebooks.yml @@ -0,0 +1,122 @@ +name: Smoke Tests for Notebooks + +on: + pull_request: + types: [opened, synchronize, reopened] + paths: + - "notebooks/**/*.ipynb" + - ".github/workflows/validate-and-execute-notebooks.yml" + push: + branches: [ main ] + paths: + - "notebooks/**/*.ipynb" + - ".github/workflows/validate-and-execute-notebooks.yml" + +permissions: + contents: read + +jobs: + validate_tests: + runs-on: ubuntu-latest + strategy: + matrix: + # Set the notebooks to validate, wildcards are allowed + notebooks_to_validate: ["notebooks/**/*.ipynb"] + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + cache: pip + + - name: Install Testing Tools + run: | + pip install nbformat ipykernel + ipython kernel install --name "python3" --user + + - name: Validate Notebooks + run: | + set -ux + + shopt -s globstar nullglob + + for FILE in ${{ matrix.notebooks_to_validate }}; do + if [ -f "$FILE" ]; then + echo "Validating notebook '$FILE'..." + + FILE="$FILE" python - <<'PY' + import os, sys, nbformat + + f = os.environ['FILE'] + + n = nbformat.read(f, nbformat.NO_CONVERT) + + # Validate the notebook + nbformat.validate(n) + + has_parameters_cell = False + + for cell in n.cells: + if cell.cell_type == 'code': + + # Fail test if notebook has any code cell with outputs + if cell.outputs: + print(f'Code cell in notebook {f} has outputs') + sys.exit(1) + + # Fail test if notebook has any code cell with execution count + if cell.execution_count: + print(f'Code cell in notebook {f} has execution count') + sys.exit(1) + + # Check for code cells tagged with 'parameters' + if 'tags' in cell.metadata and 'parameters' in cell.metadata.tags: + has_parameters_cell = True + + # Fail test if notebook doesn't have any code cell tagged with 'parameters' + if not has_parameters_cell: + print(f'Notebook {f} does not have any cell tagged with \'parameters\'') + sys.exit(1) + PY + + fi + done + + execute_tests: + needs: validate_tests + runs-on: ubuntu-latest + strategy: + matrix: + # Set the notebooks to execute + notebook_to_execute: ["notebooks/use-cases/document-conversion-standard.ipynb"] + + # Set the files use in each notebook execution + file_to_use: ["https://raw.githubusercontent.com/py-pdf/sample-files/refs/heads/main/001-trivial/minimal-document.pdf"] + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + cache: pip + + - name: Install Testing Tools + run: | + pip install papermill ipykernel + ipython kernel install --name "python3" --user + + - name: Execute Notebooks + run: | + set -ux + + NOTEBOOK="${{ matrix.notebook_to_execute }}" + FILE="${{ matrix.file_to_use }}" + + echo "Executing notebook '$NOTEBOOK' with file '$FILE'..." + + papermill $NOTEBOOK $NOTEBOOK.tmp.ipynb -b $(echo -n "files: [\"$FILE\"]" | base64 -w 0) + + echo "✓ Notebook $NOTEBOOK executed successfully"