From 5195785da4cb007b479cd5915fbb31a1837b8fe7 Mon Sep 17 00:00:00 2001 From: alexioannides Date: Tue, 13 Feb 2024 23:03:29 +0000 Subject: [PATCH 1/4] Add stub files --- demos/kubeflow-pipelines/README.md | 13 +++++++++++++ demos/kubeflow-pipelines/docs/demo_requirements.md | 7 +++++++ demos/kubeflow-pipelines/requirements.txt | 1 + 3 files changed, 21 insertions(+) create mode 100644 demos/kubeflow-pipelines/README.md create mode 100644 demos/kubeflow-pipelines/docs/demo_requirements.md create mode 100644 demos/kubeflow-pipelines/requirements.txt diff --git a/demos/kubeflow-pipelines/README.md b/demos/kubeflow-pipelines/README.md new file mode 100644 index 0000000..bb49edc --- /dev/null +++ b/demos/kubeflow-pipelines/README.md @@ -0,0 +1,13 @@ +# ML Pipeline Orchestration with Kubeflow Pipelines + +[Kubeflow Pipelines (KFP)](https://kubeflow-pipelines.readthedocs.io) is a ... + +This demo shows .. + +## Demo Objectives + +* TODO... + +## Running the Demo + +This demo is contained... diff --git a/demos/kubeflow-pipelines/docs/demo_requirements.md b/demos/kubeflow-pipelines/docs/demo_requirements.md new file mode 100644 index 0000000..266bacd --- /dev/null +++ b/demos/kubeflow-pipelines/docs/demo_requirements.md @@ -0,0 +1,7 @@ +# Demo Requirements + +This demo depends on the following Python packages: + +```text title="demos/kubeflow-pipelines/requirements.txt" +--8<-- "demos/kubeflow-pipelines/requirements.txt" +``` diff --git a/demos/kubeflow-pipelines/requirements.txt b/demos/kubeflow-pipelines/requirements.txt new file mode 100644 index 0000000..aabcff6 --- /dev/null +++ b/demos/kubeflow-pipelines/requirements.txt @@ -0,0 +1 @@ +kfp==2.4.* From 9f19ed6f6f762357f84e02729770b6b6907d0be3 Mon Sep 17 00:00:00 2001 From: alexioannides Date: Wed, 14 Feb 2024 00:10:05 +0000 Subject: [PATCH 2/4] Add dummy component --- demos/kubeflow-pipelines/components.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 demos/kubeflow-pipelines/components.py diff --git a/demos/kubeflow-pipelines/components.py b/demos/kubeflow-pipelines/components.py new file mode 100644 index 0000000..9186ecf --- /dev/null +++ b/demos/kubeflow-pipelines/components.py @@ -0,0 +1,12 @@ +"""Reusable pipeline components.""" +from kfp import dsl + + +@dsl.component(base_image="python3.10", packages_to_install=["numpy==1.26.*"]) +def make_data(n_rows: int, n_cols: int, data: dsl.Output) -> None: + """Generate data using random number generation.""" + from numpy.random import default_rng + + rng = default_rng(42) + data_arr = rng.standard_normal((n_rows, n_cols)) + data_arr.tofile(data.path) From c95845729eb086deec4eab8e18a8ce3106f1aacc Mon Sep 17 00:00:00 2001 From: alexioannides Date: Wed, 14 Feb 2024 08:31:57 +0000 Subject: [PATCH 3/4] Add .ruff_cache to gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 56ed1bb..3a30521 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ # misc +.ruff_cache .DS_Store .vscode From 4be5f8ffa4caaacede96e5b128669c354710a248 Mon Sep 17 00:00:00 2001 From: alexioannides Date: Wed, 14 Feb 2024 15:21:23 +0000 Subject: [PATCH 4/4] Add core example --- demos/kubeflow-pipelines/components.py | 51 ++++++++++++++++++++--- demos/kubeflow-pipelines/requirements.txt | 4 +- 2 files changed, 48 insertions(+), 7 deletions(-) diff --git a/demos/kubeflow-pipelines/components.py b/demos/kubeflow-pipelines/components.py index 9186ecf..b64aa60 100644 --- a/demos/kubeflow-pipelines/components.py +++ b/demos/kubeflow-pipelines/components.py @@ -1,12 +1,51 @@ -"""Reusable pipeline components.""" -from kfp import dsl +"""Patterrns for developing reusable KFP pipeline components.""" +import shutil +from pathlib import Path +from unittest.mock import Mock +import numpy as np +from kfp import dsl, local -@dsl.component(base_image="python3.10", packages_to_install=["numpy==1.26.*"]) -def make_data(n_rows: int, n_cols: int, data: dsl.Output) -> None: - """Generate data using random number generation.""" +_BASE_IMAGE = "python3.10" +_REQUIREMENTS = Path("requirements.txt").read_text().splitlines() + + +@dsl.component(base_image=_BASE_IMAGE, packages_to_install=_REQUIREMENTS) +def make_data(n_rows: int, n_cols: int, data: dsl.Output[dsl.Dataset]) -> None: + """Synthetic dataset generation pipeline component. """ + from numpy import save from numpy.random import default_rng rng = default_rng(42) data_arr = rng.standard_normal((n_rows, n_cols)) - data_arr.tofile(data.path) + save(data.path, data_arr) + + +def test_make_data_component(): + output_dataset_file = "foo.npy" + mock_dataset = Mock() + mock_dataset.path = output_dataset_file + try: + make_data.execute(n_rows=3, n_cols=2, data=mock_dataset) + output_dataset = np.load(output_dataset_file) + assert output_dataset.shape == (3, 2) + except Exception: + assert False + finally: + data_filepath = Path(output_dataset_file) + if data_filepath.exists(): + data_filepath.unlink() + + +def test_make_data_component_integration(): + kfp_root_dir = "./kfp_outputs" + local.init(runner=local.SubprocessRunner(use_venv=True), pipeline_root=kfp_root_dir) + try: + task = make_data(n_rows=3, n_cols=2) + output_dataset = np.load(f"{task.outputs['data'].path}.npy") + assert output_dataset.shape == (3, 2) + except Exception: + assert False + finally: + shutil.rmtree(kfp_root_dir, ignore_errors=True) + \ No newline at end of file diff --git a/demos/kubeflow-pipelines/requirements.txt b/demos/kubeflow-pipelines/requirements.txt index aabcff6..5c5222b 100644 --- a/demos/kubeflow-pipelines/requirements.txt +++ b/demos/kubeflow-pipelines/requirements.txt @@ -1 +1,3 @@ -kfp==2.4.* +kfp==2.5.* +numpy==1.26.* +pytest==8.0.*