From 7c60276576c7d68c7408a0af20afc33306fa65ca Mon Sep 17 00:00:00 2001 From: Shashank Srikanth Date: Thu, 25 Sep 2025 15:50:18 -0700 Subject: [PATCH] Add support for specifying pypi ony base deps --- metaflow/metaflow_config.py | 26 ++++++++++++++++++++++ metaflow/plugins/pypi/conda_environment.py | 15 +++++++++++-- 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/metaflow/metaflow_config.py b/metaflow/metaflow_config.py index 0a54486357c..6f62080a0cf 100644 --- a/metaflow/metaflow_config.py +++ b/metaflow/metaflow_config.py @@ -548,6 +548,7 @@ # PINNED_CONDA_LIBS are the libraries that metaflow depends on for execution # and are needed within a conda environment +# NOTE: These packages should be available on both conda and pypi def get_pinned_conda_libs(python_version, datastore_type): pins = { "requests": ">=2.21.0", @@ -573,6 +574,17 @@ def get_pinned_conda_libs(python_version, datastore_type): return pins +# PINNED_PYPI_ONLY_LIBS are libraries that can only be installed via PyPI +# (not conda). These are typically internal/proprietary packages. +# Only used in metaflow extensions for now. +def get_pinned_pypi_only_libs(python_version, datastore_type): + """ + Return libraries that should only be installed via PyPI (not conda). + These are typically internal/proprietary packages. + """ + return {} + + # Check if there are extensions to Metaflow to load and override everything try: from metaflow.extension_support import get_modules @@ -602,6 +614,19 @@ def _new_get_pinned_conda_libs( return d1 globals()[n] = _new_get_pinned_conda_libs + elif n == "get_pinned_pypi_only_libs": + + def _new_get_pinned_pypi_only_libs( + python_version, datastore_type, f1=globals()[n], f2=o + ): + d1 = f1(python_version, datastore_type) + d2 = f2(python_version, datastore_type) + for k, v in d2.items(): + d1[k] = v if k not in d1 else ",".join([d1[k], v]) + return d1 + + globals()[n] = _new_get_pinned_pypi_only_libs + elif n == "TOGGLE_DECOSPECS": if any([x.startswith("-") for x in o]): raise ValueError("Removing decospecs is not currently supported") @@ -624,6 +649,7 @@ def _new_get_pinned_conda_libs( "ext_modules", "get_modules", "_new_get_pinned_conda_libs", + "_new_get_pinned_pypi_only_libs", "d1", "d2", "k", diff --git a/metaflow/plugins/pypi/conda_environment.py b/metaflow/plugins/pypi/conda_environment.py index 3477a21f051..eba2fe74350 100644 --- a/metaflow/plugins/pypi/conda_environment.py +++ b/metaflow/plugins/pypi/conda_environment.py @@ -15,7 +15,7 @@ from metaflow.debug import debug from metaflow.exception import MetaflowException -from metaflow.metaflow_config import get_pinned_conda_libs +from metaflow.metaflow_config import get_pinned_conda_libs, get_pinned_pypi_only_libs from metaflow.metaflow_environment import MetaflowEnvironment from metaflow.packaging_sys import ContentType @@ -351,7 +351,7 @@ def get_environment(self, step): # TODO: Introduce support for `--telemetry` as a follow up. # Certain packages are required for metaflow runtime to function correctly. # Ensure these packages are available both in Conda channels and PyPI - # repostories. + # repositories. pinned_packages = get_pinned_conda_libs(env_python, self.datastore_type) # PyPI dependencies are prioritized over Conda dependencies. @@ -359,6 +359,17 @@ def get_environment(self, step): **pinned_packages, **environment.get("pypi", environment["conda"])["packages"], } + + # Add PyPI-only packages if we're using PyPI + if "pypi" in environment: + pypi_only_packages = get_pinned_pypi_only_libs( + env_python, self.datastore_type + ) + environment["pypi"]["packages"] = { + **pypi_only_packages, + **environment["pypi"]["packages"], # User packages override + } + # Disallow specifying both @conda and @pypi together for now. Mixing Conda # and PyPI packages comes with a lot of operational pain that we can handle # as follow-up work in the future.