diff --git a/sdk/python/featurestore_sample/featurestore/featuresets/accounts/spec/FeatureSetSpec.yaml b/sdk/python/featurestore_sample/featurestore/featuresets/accounts/spec/FeatureSetSpec.yaml index c345b45794..89a373f3b0 100644 --- a/sdk/python/featurestore_sample/featurestore/featuresets/accounts/spec/FeatureSetSpec.yaml +++ b/sdk/python/featurestore_sample/featurestore/featuresets/accounts/spec/FeatureSetSpec.yaml @@ -11,7 +11,7 @@ index_columns: - name: accountID type: string source: - path: wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/datasources/accounts-precalculated/*.parquet + path: abfss://data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/datasources/accounts-precalculated/*.parquet timestamp_column: name: timestamp type: parquet diff --git a/sdk/python/featurestore_sample/featurestore/featuresets/transactions/featureset_asset_offline_enabled.yaml b/sdk/python/featurestore_sample/featurestore/featuresets/transactions/featureset_asset_offline_enabled.yaml index 56a4eb1dee..9e00724826 100644 --- a/sdk/python/featurestore_sample/featurestore/featuresets/transactions/featureset_asset_offline_enabled.yaml +++ b/sdk/python/featurestore_sample/featurestore/featuresets/transactions/featureset_asset_offline_enabled.yaml @@ -19,3 +19,14 @@ materialization_settings: spark.executor.instances: 2 spark.executor.memory: 36g spark.sql.shuffle.partitions: 1 + spark.synapse.library.python.env: + dependencies: + - python=3.10 + - pip: + - mlflow-skinny==2.8.1 + - azureml-mlflow + - protobuf==3.19.6 + - azureml-featurestore==1.2.1 + - azure-mgmt-msi + - aiohttp==3.8.4 + name: envdef diff --git a/sdk/python/featurestore_sample/featurestore/featuresets/transactions/featureset_asset_offline_enabled_with_schedule.yaml b/sdk/python/featurestore_sample/featurestore/featuresets/transactions/featureset_asset_offline_enabled_with_schedule.yaml index 49de8f280c..a00aa41a68 100644 --- a/sdk/python/featurestore_sample/featurestore/featuresets/transactions/featureset_asset_offline_enabled_with_schedule.yaml +++ b/sdk/python/featurestore_sample/featurestore/featuresets/transactions/featureset_asset_offline_enabled_with_schedule.yaml @@ -29,3 +29,14 @@ materialization_settings: spark.executor.instances: 2 spark.executor.memory: 36g spark.sql.shuffle.partitions: 1 + spark.synapse.library.python.env: + dependencies: + - python=3.10 + - pip: + - mlflow-skinny==2.8.1 + - azureml-mlflow + - protobuf==3.19.6 + - azureml-featurestore==1.2.1 + - azure-mgmt-msi + - aiohttp==3.8.4 + name: envdef diff --git a/sdk/python/featurestore_sample/featurestore/featuresets/transactions/featureset_asset_online_enabled.yaml b/sdk/python/featurestore_sample/featurestore/featuresets/transactions/featureset_asset_online_enabled.yaml index b38f6ab783..e333be15e7 100644 --- a/sdk/python/featurestore_sample/featurestore/featuresets/transactions/featureset_asset_online_enabled.yaml +++ b/sdk/python/featurestore_sample/featurestore/featuresets/transactions/featureset_asset_online_enabled.yaml @@ -17,4 +17,15 @@ materialization_settings: spark.driver.memory: 36g spark.executor.cores: 4 spark.executor.instances: 2 - spark.executor.memory: 36g \ No newline at end of file + spark.executor.memory: 36g + spark.synapse.library.python.env: + dependencies: + - python=3.10 + - pip: + - mlflow-skinny==2.8.1 + - azureml-mlflow + - protobuf==3.19.6 + - azureml-featurestore==1.2.1 + - azure-mgmt-msi + - aiohttp==3.8.4 + name: envdef \ No newline at end of file diff --git a/sdk/python/featurestore_sample/featurestore/featuresets/transactions/spec/FeatureSetSpec.yaml b/sdk/python/featurestore_sample/featurestore/featuresets/transactions/spec/FeatureSetSpec.yaml index fb8041346c..4de86af070 100644 --- a/sdk/python/featurestore_sample/featurestore/featuresets/transactions/spec/FeatureSetSpec.yaml +++ b/sdk/python/featurestore_sample/featurestore/featuresets/transactions/spec/FeatureSetSpec.yaml @@ -19,7 +19,7 @@ index_columns: - name: accountID type: string source: - path: wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/datasources/transactions-source/*.parquet + path: abfss://data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/datasources/transactions-source/*.parquet source_delay: days: 0 hours: 0 diff --git a/sdk/python/featurestore_sample/notebooks/sdk_and_cli/1.Develop-feature-set-and-register.ipynb b/sdk/python/featurestore_sample/notebooks/sdk_and_cli/1.Develop-feature-set-and-register.ipynb index e749e166f4..b60ef7eee8 100644 --- a/sdk/python/featurestore_sample/notebooks/sdk_and_cli/1.Develop-feature-set-and-register.ipynb +++ b/sdk/python/featurestore_sample/notebooks/sdk_and_cli/1.Develop-feature-set-and-register.ipynb @@ -565,7 +565,8 @@ "outputs": [], "source": [ "# Remove the \".\" in the roor directory path as we need to generate absolute path to read from Spark.\n", - "transactions_source_data_path = \"wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/datasources/transactions-source/*.parquet\"\n", + "# If you get errors when accessing the data blow, try the Step 1b in Network-isolation-feature-store.ipynb to copy the sample data tp the newly created storage container with anonymous access enbled.\n", + "transactions_source_data_path = \"abfss://data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/datasources/transactions-source/*.parquet\"\n", "transactions_src_df = spark.read.parquet(transactions_source_data_path)\n", "\n", "display(transactions_src_df.head(5))\n", @@ -637,10 +638,11 @@ " root_dir + \"/featurestore/featuresets/transactions/transformation_code\"\n", ")\n", "\n", + "# remeber to update the path below if you use a new storage account\n", "transactions_featureset_spec = create_feature_set_spec(\n", " source=FeatureSource(\n", " type=SourceType.parquet,\n", - " path=\"wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/datasources/transactions-source/*.parquet\",\n", + " path=\"abfss://data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/datasources/transactions-source/*.parquet\",\n", " timestamp_column=TimestampColumn(name=\"timestamp\"),\n", " source_delay=DateTimeOffset(days=0, hours=0, minutes=20),\n", " ),\n", @@ -930,7 +932,8 @@ }, "outputs": [], "source": [ - "observation_data_path = \"wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/observation_data/train/*.parquet\"\n", + "# If you get errors when accessing the data blow, try the Step 1b in Network-isolation-feature-store.ipynb to copy the sample data tp the newly created storage container with anonymous access enbled.\n", + "observation_data_path = \"abfss://data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/observation_data/train/*.parquet\"\n", "observation_data_df = spark.read.parquet(observation_data_path)\n", "obs_data_timestamp_column = \"timestamp\"\n", "\n", diff --git a/sdk/python/featurestore_sample/notebooks/sdk_and_cli/2.Experiment-train-models-using-features.ipynb b/sdk/python/featurestore_sample/notebooks/sdk_and_cli/2.Experiment-train-models-using-features.ipynb index 997e412b6f..677226a143 100644 --- a/sdk/python/featurestore_sample/notebooks/sdk_and_cli/2.Experiment-train-models-using-features.ipynb +++ b/sdk/python/featurestore_sample/notebooks/sdk_and_cli/2.Experiment-train-models-using-features.ipynb @@ -471,7 +471,8 @@ }, "outputs": [], "source": [ - "accounts_data_path = \"wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/datasources/accounts-precalculated/*.parquet\"\n", + "# If you get errors when accessing the data blow, try the Step 1b in Network-isolation-feature-store.ipynb to copy the sample data tp the newly created storage container with anonymous access enbled.\n", + "accounts_data_path = \"abfss://data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/datasources/accounts-precalculated/*.parquet\"\n", "accounts_df = spark.read.parquet(accounts_data_path)\n", "\n", "display(accounts_df.head(5))" @@ -526,7 +527,7 @@ "accounts_featureset_spec = create_feature_set_spec(\n", " source=FeatureSource(\n", " type=SourceType.parquet,\n", - " path=\"wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/datasources/accounts-precalculated/*.parquet\",\n", + " path=\"abfss://data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/datasources/accounts-precalculated/*.parquet\",\n", " timestamp_column=TimestampColumn(name=\"timestamp\"),\n", " ),\n", " index_columns=[Column(name=\"accountID\", type=ColumnType.string)],\n", @@ -703,7 +704,8 @@ "from azureml.featurestore import get_offline_features\n", "\n", "# Load the observation data. To understand observatio ndata, refer to part 1 of this tutorial\n", - "observation_data_path = \"wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/observation_data/train/*.parquet\"\n", + "# If you get errors when accessing the data blow, try the Step 1b in Network-isolation-feature-store.ipynb to copy the sample data tp the newly created storage container with anonymous access enbled.\n", + "observation_data_path = \"abfss://data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/observation_data/train/*.parquet\"\n", "observation_data_df = spark.read.parquet(observation_data_path)\n", "obs_data_timestamp_column = \"timestamp\"" ] diff --git a/sdk/python/featurestore_sample/notebooks/sdk_only/1.Develop-feature-set-and-register.ipynb b/sdk/python/featurestore_sample/notebooks/sdk_only/1.Develop-feature-set-and-register.ipynb index 75996e86d9..a0cd536a76 100644 --- a/sdk/python/featurestore_sample/notebooks/sdk_only/1.Develop-feature-set-and-register.ipynb +++ b/sdk/python/featurestore_sample/notebooks/sdk_only/1.Develop-feature-set-and-register.ipynb @@ -432,7 +432,8 @@ "outputs": [], "source": [ "# remove the \".\" in the roor directory path as we need to generate absolute path to read from spark\n", - "transactions_source_data_path = \"wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/datasources/transactions-source/*.parquet\"\n", + "# If you get errors when accessing the data blow, try the Step 1b in Network-isolation-feature-store.ipynb to copy the sample data tp the newly created storage container with anonymous access enbled.\n", + "transactions_source_data_path = \"abfss://data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/datasources/transactions-source/*.parquet\"\n", "transactions_src_df = spark.read.parquet(transactions_source_data_path)\n", "\n", "display(transactions_src_df.head(5))\n", @@ -505,7 +506,7 @@ "\n", "transactions_featureset_spec = create_feature_set_spec(\n", " source=ParquetFeatureSource(\n", - " path=\"wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/datasources/transactions-source/*.parquet\",\n", + " path=\"abfss://data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/datasources/transactions-source/*.parquet\",\n", " timestamp_column=TimestampColumn(name=\"timestamp\"),\n", " source_delay=DateTimeOffset(days=0, hours=0, minutes=20),\n", " ),\n", @@ -880,7 +881,8 @@ }, "outputs": [], "source": [ - "observation_data_path = \"wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/observation_data/train/*.parquet\"\n", + "# If you get errors when accessing the data blow, try the Step 1b in Network-isolation-feature-store.ipynb to copy the sample data tp the newly created storage container with anonymous access enbled.\n", + "observation_data_path = \"abfss://data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/observation_data/train/*.parquet\"\n", "observation_data_df = spark.read.parquet(observation_data_path)\n", "obs_data_timestamp_column = \"timestamp\"\n", "\n", @@ -1077,6 +1079,18 @@ "\n", "transactions_fset_config = fs_client._featuresets.get(name=\"transactions\", version=\"1\")\n", "\n", + "spark_python_env = \"\"\"\n", + "dependencies:\n", + " - python=3.10\n", + " - pip:\n", + " - mlflow-skinny==2.8.1\n", + " - azureml-mlflow\n", + " - protobuf==3.19.6\n", + " - azureml-featurestore==1.2.1\n", + " - azure-mgmt-msi\n", + " - aiohttp==3.8.4\n", + "name: envdef\n", + "\"\"\"\n", "transactions_fset_config.materialization_settings = MaterializationSettings(\n", " offline_enabled=True,\n", " resource=MaterializationComputeResource(instance_type=\"standard_e8s_v3\"),\n", @@ -1087,6 +1101,7 @@ " \"spark.executor.memory\": \"36g\",\n", " \"spark.executor.instances\": 2,\n", " \"spark.sql.shuffle.partitions\": 1,\n", + " \"spark.synapse.library.python.env\": spark_python_env,\n", " },\n", " schedule=None,\n", ")\n", diff --git a/sdk/python/featurestore_sample/notebooks/sdk_only/2.Experiment-train-models-using-features.ipynb b/sdk/python/featurestore_sample/notebooks/sdk_only/2.Experiment-train-models-using-features.ipynb index cbeff4055b..0a0dd0ef23 100644 --- a/sdk/python/featurestore_sample/notebooks/sdk_only/2.Experiment-train-models-using-features.ipynb +++ b/sdk/python/featurestore_sample/notebooks/sdk_only/2.Experiment-train-models-using-features.ipynb @@ -420,7 +420,8 @@ }, "outputs": [], "source": [ - "accounts_data_path = \"wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/datasources/accounts-precalculated/*.parquet\"\n", + "# If you get errors when accessing the data blow, try the Step 1b in Network-isolation-feature-store.ipynb to copy the sample data tp the newly created storage container with anonymous access enbled.\n", + "accounts_data_path = \"abfss://data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/datasources/accounts-precalculated/*.parquet\"\n", "accounts_df = spark.read.parquet(accounts_data_path)\n", "\n", "display(accounts_df.head(5))" @@ -474,7 +475,7 @@ "\n", "accounts_featureset_spec = create_feature_set_spec(\n", " source=ParquetFeatureSource(\n", - " path=\"wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/datasources/accounts-precalculated/*.parquet\",\n", + " path=\"abfss://data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/datasources/accounts-precalculated/*.parquet\",\n", " timestamp_column=TimestampColumn(name=\"timestamp\"),\n", " ),\n", " index_columns=[Column(name=\"accountID\", type=ColumnType.string)],\n", @@ -657,7 +658,8 @@ "from azureml.featurestore import get_offline_features\n", "\n", "# Load the observation data. To understand observatio ndata, refer to part 1 of this tutorial\n", - "observation_data_path = \"wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/observation_data/train/*.parquet\"\n", + "# If you get errors when accessing the data blow, try the Step 1b in Network-isolation-feature-store.ipynb to copy the sample data tp the newly created storage container with anonymous access enbled.\n", + "observation_data_path = \"abfss://data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/observation_data/train/*.parquet\"\n", "observation_data_df = spark.read.parquet(observation_data_path)\n", "obs_data_timestamp_column = \"timestamp\"" ] diff --git a/sdk/python/featurestore_sample/notebooks/sdk_only/4.Enable-online-store-run-inference.ipynb b/sdk/python/featurestore_sample/notebooks/sdk_only/4.Enable-online-store-run-inference.ipynb index b1b07611d8..9e478a624e 100644 --- a/sdk/python/featurestore_sample/notebooks/sdk_only/4.Enable-online-store-run-inference.ipynb +++ b/sdk/python/featurestore_sample/notebooks/sdk_only/4.Enable-online-store-run-inference.ipynb @@ -468,6 +468,18 @@ "\n", "accounts_fset_config = fs_client._featuresets.get(name=\"accounts\", version=\"1\")\n", "\n", + "spark_python_env = \"\"\"\n", + "dependencies:\n", + " - python=3.10\n", + " - pip:\n", + " - mlflow-skinny==2.8.1\n", + " - azureml-mlflow\n", + " - protobuf==3.19.6\n", + " - azureml-featurestore==1.2.1\n", + " - azure-mgmt-msi\n", + " - aiohttp==3.8.4\n", + "name: envdef\n", + "\"\"\"\n", "accounts_fset_config.materialization_settings = MaterializationSettings(\n", " offline_enabled=True,\n", " online_enabled=True,\n", @@ -478,6 +490,7 @@ " \"spark.executor.cores\": 4,\n", " \"spark.executor.memory\": \"36g\",\n", " \"spark.executor.instances\": 2,\n", + " \"spark.synapse.library.python.env\": spark_python_env,\n", " },\n", " schedule=None,\n", ")\n", diff --git a/sdk/python/featurestore_sample/notebooks/sdk_only/5.Develop-feature-set-custom-source.ipynb b/sdk/python/featurestore_sample/notebooks/sdk_only/5.Develop-feature-set-custom-source.ipynb index 9cac0d4d66..0b09a82f4d 100644 --- a/sdk/python/featurestore_sample/notebooks/sdk_only/5.Develop-feature-set-custom-source.ipynb +++ b/sdk/python/featurestore_sample/notebooks/sdk_only/5.Develop-feature-set-custom-source.ipynb @@ -239,10 +239,11 @@ " + \"/featurestore/featuresets/transactions_custom_source/feature_process_code\"\n", ")\n", "\n", + "# update the source_path to abfss if you copied data to new storage account\n", "udf_featureset_spec = create_feature_set_spec(\n", " source=CustomFeatureSource(\n", " kwargs={\n", - " \"source_path\": \"wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/datasources/transactions-source-json/*.json\",\n", + " \"source_path\": \"data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/datasources/transactions-source-json/*.json\",\n", " \"timestamp_column_name\": \"timestamp\",\n", " },\n", " timestamp_column=TimestampColumn(name=\"timestamp\"),\n", diff --git a/sdk/python/featurestore_sample/notebooks/sdk_only/7.Develop-feature-set-domain-specific-language-dsl.ipynb b/sdk/python/featurestore_sample/notebooks/sdk_only/7.Develop-feature-set-domain-specific-language-dsl.ipynb index 55f4aeef1f..2b54ddab8c 100644 --- a/sdk/python/featurestore_sample/notebooks/sdk_only/7.Develop-feature-set-domain-specific-language-dsl.ipynb +++ b/sdk/python/featurestore_sample/notebooks/sdk_only/7.Develop-feature-set-domain-specific-language-dsl.ipynb @@ -320,9 +320,10 @@ ")\n", "from azureml.featurestore.feature_source import ParquetFeatureSource\n", "\n", + "# update the source_path to abfss if you copied data to new storage account\n", "dsl_feature_set_spec = create_feature_set_spec(\n", " source=ParquetFeatureSource(\n", - " path=\"wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/datasources/transactions-source/*.parquet\",\n", + " path=\"data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/datasources/transactions-source/*.parquet\",\n", " timestamp_column=TimestampColumn(name=\"timestamp\"),\n", " source_delay=DateTimeOffset(days=0, hours=0, minutes=20),\n", " ),\n", @@ -533,10 +534,11 @@ " + \"/featurestore/featuresets/transactions_custom_source/source_process_code\"\n", ")\n", "\n", + "# update the source_path to abfss if you copied data to new storage account\n", "dsl_custom_feature_set_spec = create_feature_set_spec(\n", " source=CustomFeatureSource(\n", " kwargs={\n", - " \"source_path\": \"wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/datasources/transactions-source-json/*.json\",\n", + " \"source_path\": \"data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/datasources/transactions-source-json/*.json\",\n", " \"timestamp_column_name\": \"timestamp\",\n", " },\n", " timestamp_column=TimestampColumn(name=\"timestamp\"),\n", @@ -712,9 +714,10 @@ " root_dir + \"/featurestore/featuresets/transactions/transformation_code\"\n", ")\n", "\n", + "# update the source_path to abfss if you copied data to new storage account\n", "udf_feature_set_spec = create_feature_set_spec(\n", " source=ParquetFeatureSource(\n", - " path=\"wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/datasources/transactions-source/*.parquet\",\n", + " path=\"data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/datasources/transactions-source/*.parquet\",\n", " timestamp_column=TimestampColumn(name=\"timestamp\"),\n", " source_delay=DateTimeOffset(days=0, hours=0, minutes=20),\n", " ),\n", @@ -1162,6 +1165,18 @@ " MaterializationComputeResource,\n", ")\n", "\n", + "spark_python_env = \"\"\"\n", + "dependencies:\n", + " - python=3.10\n", + " - pip:\n", + " - mlflow-skinny==2.8.1\n", + " - azureml-mlflow\n", + " - protobuf==3.19.6\n", + " - azureml-featurestore==1.2.1\n", + " - azure-mgmt-msi\n", + " - aiohttp==3.8.4\n", + "name: envdef\n", + "\"\"\"\n", "materialization_settings = MaterializationSettings(\n", " offline_enabled=True,\n", " resource=MaterializationComputeResource(instance_type=\"standard_e8s_v3\"),\n", @@ -1171,6 +1186,7 @@ " \"spark.executor.cores\": 4,\n", " \"spark.executor.memory\": \"36g\",\n", " \"spark.executor.instances\": 2,\n", + " \"spark.synapse.library.python.env\": spark_python_env,\n", " },\n", " schedule=None,\n", ")\n", @@ -1354,7 +1370,8 @@ }, "outputs": [], "source": [ - "observation_data_path = \"wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/observation_data/train/*.parquet\"\n", + "# If you get errors when accessing the data blow, try the Step 1b in Network-isolation-feature-store.ipynb to copy the sample data tp the newly created storage container with anonymous access enbled.\n", + "observation_data_path = \"abfss://data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/observation_data/train/*.parquet\"\n", "observation_data_df = spark.read.parquet(observation_data_path)\n", "obs_data_timestamp_column = \"timestamp\"\n", "\n", diff --git a/sdk/python/featurestore_sample/notebooks/sdk_only/ADB Example - Develop a feature set and register with managed feature store.ipynb b/sdk/python/featurestore_sample/notebooks/sdk_only/ADB Example - Develop a feature set and register with managed feature store.ipynb index 06a051cca5..48955c2c5c 100644 --- a/sdk/python/featurestore_sample/notebooks/sdk_only/ADB Example - Develop a feature set and register with managed feature store.ipynb +++ b/sdk/python/featurestore_sample/notebooks/sdk_only/ADB Example - Develop a feature set and register with managed feature store.ipynb @@ -456,7 +456,8 @@ "outputs": [], "source": [ "# remove the \".\" in the roor directory path as we need to generate absolute path to read from spark\n", - "transactions_source_data_path = \"wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/datasources/transactions-source/*.parquet\"\n", + "# If you get errors when accessing the data blow, try the Step 1b in Network-isolation-feature-store.ipynb to copy the sample data tp the newly created storage container with anonymous access enbled.\n", + "transactions_source_data_path = \"abfss://data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/datasources/transactions-source/*.parquet\"\n", "transactions_src_df = spark.read.parquet(transactions_source_data_path)\n", "\n", "transactions_src_df.head(5)" @@ -540,7 +541,7 @@ "\n", "transactions_featureset_spec = create_feature_set_spec(\n", " source=ParquetFeatureSource(\n", - " path=\"wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/datasources/transactions-source/*.parquet\",\n", + " path=\"abfss://data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/datasources/transactions-source/*.parquet\",\n", " timestamp_column=TimestampColumn(name=\"timestamp\"),\n", " source_delay=DateTimeOffset(days=0, hours=0, minutes=20),\n", " ),\n", @@ -1019,7 +1020,8 @@ }, "outputs": [], "source": [ - "observation_data_path = \"wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/observation_data/train/*.parquet\"\n", + "# If you get errors when accessing the data blow, try the Step 1b in Network-isolation-feature-store.ipynb to copy the sample data tp the newly created storage container with anonymous access enbled.\n", + "observation_data_path = \"abfss://data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/observation_data/train/*.parquet\"\n", "observation_data_df = spark.read.parquet(observation_data_path)\n", "obs_data_timestamp_column = \"timestamp\"\n", "\n", @@ -1259,6 +1261,18 @@ "\n", "transactions_fset_config = fs_client._featuresets.get(name=\"transactions\", version=\"1\")\n", "\n", + "spark_python_env = \"\"\"\n", + "dependencies:\n", + " - python=3.10\n", + " - pip:\n", + " - mlflow-skinny==2.8.1\n", + " - azureml-mlflow\n", + " - protobuf==3.19.6\n", + " - azureml-featurestore==1.2.1\n", + " - azure-mgmt-msi\n", + " - aiohttp==3.8.4\n", + "name: envdef\n", + "\"\"\"\n", "transactions_fset_config.materialization_settings = MaterializationSettings(\n", " offline_enabled=True,\n", " resource=MaterializationComputeResource(instance_type=\"standard_e8s_v3\"),\n", @@ -1268,6 +1282,7 @@ " \"spark.executor.cores\": 4,\n", " \"spark.executor.memory\": \"36g\",\n", " \"spark.executor.instances\": 2,\n", + " \"spark.synapse.library.python.env\": spark_python_env,\n", " },\n", " schedule=None,\n", ")\n", diff --git a/sdk/python/featurestore_sample/notebooks/sdk_only/Synapse example - Develop feature set and register.ipynb b/sdk/python/featurestore_sample/notebooks/sdk_only/Synapse example - Develop feature set and register.ipynb index 3afbba9226..78105b1076 100644 --- a/sdk/python/featurestore_sample/notebooks/sdk_only/Synapse example - Develop feature set and register.ipynb +++ b/sdk/python/featurestore_sample/notebooks/sdk_only/Synapse example - Develop feature set and register.ipynb @@ -464,7 +464,8 @@ }, "outputs": [], "source": [ - "transactions_source_data_path = \"wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/datasources/transactions-source/*.parquet\"\n", + "# If you get errors when accessing the data blow, try the Step 1b in Network-isolation-feature-store.ipynb to copy the sample data tp the newly created storage container with anonymous access enbled.\n", + "transactions_source_data_path = \"abfss://data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/datasources/transactions-source/*.parquet\"\n", "transactions_src_df = spark.read.parquet(transactions_source_data_path)\n", "\n", "transactions_src_df.head(5)" @@ -548,7 +549,7 @@ "\n", "transactions_featureset_spec = create_feature_set_spec(\n", " source=ParquetFeatureSource(\n", - " path=\"wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/datasources/transactions-source/*.parquet\",\n", + " path=\"abfss://data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/datasources/transactions-source/*.parquet\",\n", " timestamp_column=TimestampColumn(name=\"timestamp\"),\n", " source_delay=DateTimeOffset(days=0, hours=0, minutes=20),\n", " ),\n", @@ -1026,7 +1027,8 @@ }, "outputs": [], "source": [ - "observation_data_path = \"wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/observation_data/train/*.parquet\"\n", + "# If you get errors when accessing the data blow, try the Step 1b in Network-isolation-feature-store.ipynb to copy the sample data tp the newly created storage container with anonymous access enbled.\n", + "observation_data_path = \"abfss://data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/observation_data/train/*.parquet\"\n", "observation_data_df = spark.read.parquet(observation_data_path)\n", "obs_data_timestamp_column = \"timestamp\"\n", "\n", @@ -1266,6 +1268,18 @@ "\n", "transactions_fset_config = fs_client._featuresets.get(name=\"transactions\", version=\"1\")\n", "\n", + "spark_python_env = \"\"\"\n", + "dependencies:\n", + " - python=3.10\n", + " - pip:\n", + " - mlflow-skinny==2.8.1\n", + " - azureml-mlflow\n", + " - protobuf==3.19.6\n", + " - azureml-featurestore==1.2.1\n", + " - azure-mgmt-msi\n", + " - aiohttp==3.8.4\n", + "name: envdef\n", + "\"\"\"\n", "transactions_fset_config.materialization_settings = MaterializationSettings(\n", " offline_enabled=True,\n", " resource=MaterializationComputeResource(instance_type=\"standard_e8s_v3\"),\n", @@ -1275,6 +1289,7 @@ " \"spark.executor.cores\": 4,\n", " \"spark.executor.memory\": \"36g\",\n", " \"spark.executor.instances\": 2,\n", + " \"spark.synapse.library.python.env\": spark_python_env,\n", " },\n", " schedule=None,\n", ")\n", diff --git a/sdk/python/featurestore_sample/project/env/conda.yml b/sdk/python/featurestore_sample/project/env/conda.yml index 93bb3be5ca..123330faa1 100644 --- a/sdk/python/featurestore_sample/project/env/conda.yml +++ b/sdk/python/featurestore_sample/project/env/conda.yml @@ -1,10 +1,10 @@ dependencies: - - python=3.8 + - python=3.10 - pip: # Protobuf is needed to avoid conflict with managed spark - protobuf==3.19.6 # Feature store core SDK - - azureml-featurestore==1.1.1 + - azureml-featurestore==1.2.1 # Azure management libraries - azure-mgmt-authorization==3.0.0 # This is needed if you want to execute the Part 2 of the "SDK" track or execute "SDK+CLI" track in the docs tutorial diff --git a/sdk/python/featurestore_sample/project/env/online.yml b/sdk/python/featurestore_sample/project/env/online.yml index 08fab8d755..60485dd404 100644 --- a/sdk/python/featurestore_sample/project/env/online.yml +++ b/sdk/python/featurestore_sample/project/env/online.yml @@ -1,12 +1,12 @@ dependencies: - - python=3.8 + - python=3.10 - pip: # Protobuf is needed to avoid conflict with managed spark - protobuf==3.19.6 # For asynchronous HTTP requests - aiohttp==3.8.4 # Online feature store core SDK - - azureml-featurestore[online]==1.1.1 + - azureml-featurestore[online]==1.2.1 # Azure management libraries - azure-mgmt-msi - azure-mgmt-redis diff --git a/sdk/python/featurestore_sample/project/fraud_model/online_inference/conda.yml b/sdk/python/featurestore_sample/project/fraud_model/online_inference/conda.yml index e8da1be9df..fc417d1d45 100644 --- a/sdk/python/featurestore_sample/project/fraud_model/online_inference/conda.yml +++ b/sdk/python/featurestore_sample/project/fraud_model/online_inference/conda.yml @@ -1,8 +1,8 @@ dependencies: - - python=3.8 + - python=3.10 - pip: - protobuf==3.19.6 - - azureml-featurestore[online]==1.1.0 + - azureml-featurestore[online]==1.2.1 - aiohttp==3.8.4 - azureml-inference-server-http - scikit-learn diff --git a/sdk/python/featurestore_sample/project/fraud_model/pipelines/batch_inference_pipeline.yaml b/sdk/python/featurestore_sample/project/fraud_model/pipelines/batch_inference_pipeline.yaml index 4269667a2a..625fd631a7 100644 --- a/sdk/python/featurestore_sample/project/fraud_model/pipelines/batch_inference_pipeline.yaml +++ b/sdk/python/featurestore_sample/project/fraud_model/pipelines/batch_inference_pipeline.yaml @@ -11,7 +11,7 @@ inputs: type: custom_model observation_data: mode: ro_mount - path: wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/observation_data/batch_inference/ + path: abfss://data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/observation_data/batch_inference/ type: uri_folder timestamp_column: timestamp diff --git a/sdk/python/featurestore_sample/project/fraud_model/pipelines/training_pipeline.yaml b/sdk/python/featurestore_sample/project/fraud_model/pipelines/training_pipeline.yaml index 62e1632f6d..b26e33c36d 100644 --- a/sdk/python/featurestore_sample/project/fraud_model/pipelines/training_pipeline.yaml +++ b/sdk/python/featurestore_sample/project/fraud_model/pipelines/training_pipeline.yaml @@ -11,7 +11,7 @@ inputs: type: uri_folder observation_data: # this will be used by the feature_retrieval_step mode: ro_mount - path: wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/observation_data/train/*.parquet + path: abfss://data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/observation_data/train/*.parquet type: uri_folder timestamp_column: timestamp # this will be used by the feature_retrieval_step