Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ index_columns:
- name: accountID
type: string
source:
path: wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/datasources/accounts-precalculated/*.parquet
path: abfss://data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/datasources/accounts-precalculated/*.parquet
timestamp_column:
name: timestamp
type: parquet
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,14 @@ materialization_settings:
spark.executor.instances: 2
spark.executor.memory: 36g
spark.sql.shuffle.partitions: 1
spark.synapse.library.python.env:
dependencies:
- python=3.10
- pip:
- mlflow-skinny==2.8.1
- azureml-mlflow
- protobuf==3.19.6
- azureml-featurestore==1.2.1
- azure-mgmt-msi
- aiohttp==3.8.4
name: envdef
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,14 @@ materialization_settings:
spark.executor.instances: 2
spark.executor.memory: 36g
spark.sql.shuffle.partitions: 1
spark.synapse.library.python.env:
dependencies:
- python=3.10
- pip:
- mlflow-skinny==2.8.1
- azureml-mlflow
- protobuf==3.19.6
- azureml-featurestore==1.2.1
- azure-mgmt-msi
- aiohttp==3.8.4
name: envdef
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,15 @@ materialization_settings:
spark.driver.memory: 36g
spark.executor.cores: 4
spark.executor.instances: 2
spark.executor.memory: 36g
spark.executor.memory: 36g
spark.synapse.library.python.env:
dependencies:
- python=3.10
- pip:
- mlflow-skinny==2.8.1
- azureml-mlflow
- protobuf==3.19.6
- azureml-featurestore==1.2.1
- azure-mgmt-msi
- aiohttp==3.8.4
name: envdef
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ index_columns:
- name: accountID
type: string
source:
path: wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/datasources/transactions-source/*.parquet
path: abfss://data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/datasources/transactions-source/*.parquet
source_delay:
days: 0
hours: 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -565,7 +565,8 @@
"outputs": [],
"source": [
"# Remove the \".\" in the roor directory path as we need to generate absolute path to read from Spark.\n",
"transactions_source_data_path = \"wasbs://[email protected]/feature-store-prp/datasources/transactions-source/*.parquet\"\n",
"# If you get errors when accessing the data blow, try the Step 1b in Network-isolation-feature-store.ipynb to copy the sample data tp the newly created storage container with anonymous access enbled.\n",
"transactions_source_data_path = \"abfss://[email protected]/feature-store-prp/datasources/transactions-source/*.parquet\"\n",
"transactions_src_df = spark.read.parquet(transactions_source_data_path)\n",
"\n",
"display(transactions_src_df.head(5))\n",
Expand Down Expand Up @@ -637,10 +638,11 @@
" root_dir + \"/featurestore/featuresets/transactions/transformation_code\"\n",
")\n",
"\n",
"# remeber to update the path below if you use a new storage account\n",
"transactions_featureset_spec = create_feature_set_spec(\n",
" source=FeatureSource(\n",
" type=SourceType.parquet,\n",
" path=\"wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/datasources/transactions-source/*.parquet\",\n",
" path=\"abfss://data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/datasources/transactions-source/*.parquet\",\n",
" timestamp_column=TimestampColumn(name=\"timestamp\"),\n",
" source_delay=DateTimeOffset(days=0, hours=0, minutes=20),\n",
" ),\n",
Expand Down Expand Up @@ -930,7 +932,8 @@
},
"outputs": [],
"source": [
"observation_data_path = \"wasbs://[email protected]/feature-store-prp/observation_data/train/*.parquet\"\n",
"# If you get errors when accessing the data blow, try the Step 1b in Network-isolation-feature-store.ipynb to copy the sample data tp the newly created storage container with anonymous access enbled.\n",
"observation_data_path = \"abfss://[email protected]/feature-store-prp/observation_data/train/*.parquet\"\n",
"observation_data_df = spark.read.parquet(observation_data_path)\n",
"obs_data_timestamp_column = \"timestamp\"\n",
"\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,8 @@
},
"outputs": [],
"source": [
"accounts_data_path = \"wasbs://[email protected]/feature-store-prp/datasources/accounts-precalculated/*.parquet\"\n",
"# If you get errors when accessing the data blow, try the Step 1b in Network-isolation-feature-store.ipynb to copy the sample data tp the newly created storage container with anonymous access enbled.\n",
"accounts_data_path = \"abfss://[email protected]/feature-store-prp/datasources/accounts-precalculated/*.parquet\"\n",
"accounts_df = spark.read.parquet(accounts_data_path)\n",
"\n",
"display(accounts_df.head(5))"
Expand Down Expand Up @@ -526,7 +527,7 @@
"accounts_featureset_spec = create_feature_set_spec(\n",
" source=FeatureSource(\n",
" type=SourceType.parquet,\n",
" path=\"wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/datasources/accounts-precalculated/*.parquet\",\n",
" path=\"abfss://data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/datasources/accounts-precalculated/*.parquet\",\n",
" timestamp_column=TimestampColumn(name=\"timestamp\"),\n",
" ),\n",
" index_columns=[Column(name=\"accountID\", type=ColumnType.string)],\n",
Expand Down Expand Up @@ -703,7 +704,8 @@
"from azureml.featurestore import get_offline_features\n",
"\n",
"# Load the observation data. To understand observatio ndata, refer to part 1 of this tutorial\n",
"observation_data_path = \"wasbs://[email protected]/feature-store-prp/observation_data/train/*.parquet\"\n",
"# If you get errors when accessing the data blow, try the Step 1b in Network-isolation-feature-store.ipynb to copy the sample data tp the newly created storage container with anonymous access enbled.\n",
"observation_data_path = \"abfss://[email protected]/feature-store-prp/observation_data/train/*.parquet\"\n",
"observation_data_df = spark.read.parquet(observation_data_path)\n",
"obs_data_timestamp_column = \"timestamp\""
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,8 @@
"outputs": [],
"source": [
"# remove the \".\" in the roor directory path as we need to generate absolute path to read from spark\n",
"transactions_source_data_path = \"wasbs://[email protected]/feature-store-prp/datasources/transactions-source/*.parquet\"\n",
"# If you get errors when accessing the data blow, try the Step 1b in Network-isolation-feature-store.ipynb to copy the sample data tp the newly created storage container with anonymous access enbled.\n",
"transactions_source_data_path = \"abfss://[email protected]/feature-store-prp/datasources/transactions-source/*.parquet\"\n",
"transactions_src_df = spark.read.parquet(transactions_source_data_path)\n",
"\n",
"display(transactions_src_df.head(5))\n",
Expand Down Expand Up @@ -505,7 +506,7 @@
"\n",
"transactions_featureset_spec = create_feature_set_spec(\n",
" source=ParquetFeatureSource(\n",
" path=\"wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/datasources/transactions-source/*.parquet\",\n",
" path=\"abfss://data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/datasources/transactions-source/*.parquet\",\n",
" timestamp_column=TimestampColumn(name=\"timestamp\"),\n",
" source_delay=DateTimeOffset(days=0, hours=0, minutes=20),\n",
" ),\n",
Expand Down Expand Up @@ -880,7 +881,8 @@
},
"outputs": [],
"source": [
"observation_data_path = \"wasbs://[email protected]/feature-store-prp/observation_data/train/*.parquet\"\n",
"# If you get errors when accessing the data blow, try the Step 1b in Network-isolation-feature-store.ipynb to copy the sample data tp the newly created storage container with anonymous access enbled.\n",
"observation_data_path = \"abfss://[email protected]/feature-store-prp/observation_data/train/*.parquet\"\n",
"observation_data_df = spark.read.parquet(observation_data_path)\n",
"obs_data_timestamp_column = \"timestamp\"\n",
"\n",
Expand Down Expand Up @@ -1077,6 +1079,18 @@
"\n",
"transactions_fset_config = fs_client._featuresets.get(name=\"transactions\", version=\"1\")\n",
"\n",
"spark_python_env = \"\"\"\n",
"dependencies:\n",
" - python=3.10\n",
" - pip:\n",
" - mlflow-skinny==2.8.1\n",
" - azureml-mlflow\n",
" - protobuf==3.19.6\n",
" - azureml-featurestore==1.2.1\n",
" - azure-mgmt-msi\n",
" - aiohttp==3.8.4\n",
"name: envdef\n",
"\"\"\"\n",
"transactions_fset_config.materialization_settings = MaterializationSettings(\n",
" offline_enabled=True,\n",
" resource=MaterializationComputeResource(instance_type=\"standard_e8s_v3\"),\n",
Expand All @@ -1087,6 +1101,7 @@
" \"spark.executor.memory\": \"36g\",\n",
" \"spark.executor.instances\": 2,\n",
" \"spark.sql.shuffle.partitions\": 1,\n",
" \"spark.synapse.library.python.env\": spark_python_env,\n",
" },\n",
" schedule=None,\n",
")\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,8 @@
},
"outputs": [],
"source": [
"accounts_data_path = \"wasbs://[email protected]/feature-store-prp/datasources/accounts-precalculated/*.parquet\"\n",
"# If you get errors when accessing the data blow, try the Step 1b in Network-isolation-feature-store.ipynb to copy the sample data tp the newly created storage container with anonymous access enbled.\n",
"accounts_data_path = \"abfss://[email protected]/feature-store-prp/datasources/accounts-precalculated/*.parquet\"\n",
"accounts_df = spark.read.parquet(accounts_data_path)\n",
"\n",
"display(accounts_df.head(5))"
Expand Down Expand Up @@ -474,7 +475,7 @@
"\n",
"accounts_featureset_spec = create_feature_set_spec(\n",
" source=ParquetFeatureSource(\n",
" path=\"wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/datasources/accounts-precalculated/*.parquet\",\n",
" path=\"abfss://data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/datasources/accounts-precalculated/*.parquet\",\n",
" timestamp_column=TimestampColumn(name=\"timestamp\"),\n",
" ),\n",
" index_columns=[Column(name=\"accountID\", type=ColumnType.string)],\n",
Expand Down Expand Up @@ -657,7 +658,8 @@
"from azureml.featurestore import get_offline_features\n",
"\n",
"# Load the observation data. To understand observatio ndata, refer to part 1 of this tutorial\n",
"observation_data_path = \"wasbs://[email protected]/feature-store-prp/observation_data/train/*.parquet\"\n",
"# If you get errors when accessing the data blow, try the Step 1b in Network-isolation-feature-store.ipynb to copy the sample data tp the newly created storage container with anonymous access enbled.\n",
"observation_data_path = \"abfss://[email protected]/feature-store-prp/observation_data/train/*.parquet\"\n",
"observation_data_df = spark.read.parquet(observation_data_path)\n",
"obs_data_timestamp_column = \"timestamp\""
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,18 @@
"\n",
"accounts_fset_config = fs_client._featuresets.get(name=\"accounts\", version=\"1\")\n",
"\n",
"spark_python_env = \"\"\"\n",
"dependencies:\n",
" - python=3.10\n",
" - pip:\n",
" - mlflow-skinny==2.8.1\n",
" - azureml-mlflow\n",
" - protobuf==3.19.6\n",
" - azureml-featurestore==1.2.1\n",
" - azure-mgmt-msi\n",
" - aiohttp==3.8.4\n",
"name: envdef\n",
"\"\"\"\n",
"accounts_fset_config.materialization_settings = MaterializationSettings(\n",
" offline_enabled=True,\n",
" online_enabled=True,\n",
Expand All @@ -478,6 +490,7 @@
" \"spark.executor.cores\": 4,\n",
" \"spark.executor.memory\": \"36g\",\n",
" \"spark.executor.instances\": 2,\n",
" \"spark.synapse.library.python.env\": spark_python_env,\n",
" },\n",
" schedule=None,\n",
")\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -239,10 +239,11 @@
" + \"/featurestore/featuresets/transactions_custom_source/feature_process_code\"\n",
")\n",
"\n",
"# update the source_path to abfss if you copied data to new storage account\n",
"udf_featureset_spec = create_feature_set_spec(\n",
" source=CustomFeatureSource(\n",
" kwargs={\n",
" \"source_path\": \"wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/datasources/transactions-source-json/*.json\",\n",
" \"source_path\": \"data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/datasources/transactions-source-json/*.json\",\n",
" \"timestamp_column_name\": \"timestamp\",\n",
" },\n",
" timestamp_column=TimestampColumn(name=\"timestamp\"),\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -320,9 +320,10 @@
")\n",
"from azureml.featurestore.feature_source import ParquetFeatureSource\n",
"\n",
"# update the source_path to abfss if you copied data to new storage account\n",
"dsl_feature_set_spec = create_feature_set_spec(\n",
" source=ParquetFeatureSource(\n",
" path=\"wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/datasources/transactions-source/*.parquet\",\n",
" path=\"data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/datasources/transactions-source/*.parquet\",\n",
" timestamp_column=TimestampColumn(name=\"timestamp\"),\n",
" source_delay=DateTimeOffset(days=0, hours=0, minutes=20),\n",
" ),\n",
Expand Down Expand Up @@ -533,10 +534,11 @@
" + \"/featurestore/featuresets/transactions_custom_source/source_process_code\"\n",
")\n",
"\n",
"# update the source_path to abfss if you copied data to new storage account\n",
"dsl_custom_feature_set_spec = create_feature_set_spec(\n",
" source=CustomFeatureSource(\n",
" kwargs={\n",
" \"source_path\": \"wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/datasources/transactions-source-json/*.json\",\n",
" \"source_path\": \"data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/datasources/transactions-source-json/*.json\",\n",
" \"timestamp_column_name\": \"timestamp\",\n",
" },\n",
" timestamp_column=TimestampColumn(name=\"timestamp\"),\n",
Expand Down Expand Up @@ -712,9 +714,10 @@
" root_dir + \"/featurestore/featuresets/transactions/transformation_code\"\n",
")\n",
"\n",
"# update the source_path to abfss if you copied data to new storage account\n",
"udf_feature_set_spec = create_feature_set_spec(\n",
" source=ParquetFeatureSource(\n",
" path=\"wasbs://data@azuremlexampledata.blob.core.windows.net/feature-store-prp/datasources/transactions-source/*.parquet\",\n",
" path=\"data@azuremlfsexampledatatest.dfs.core.windows.net/feature-store-prp/datasources/transactions-source/*.parquet\",\n",
" timestamp_column=TimestampColumn(name=\"timestamp\"),\n",
" source_delay=DateTimeOffset(days=0, hours=0, minutes=20),\n",
" ),\n",
Expand Down Expand Up @@ -1162,6 +1165,18 @@
" MaterializationComputeResource,\n",
")\n",
"\n",
"spark_python_env = \"\"\"\n",
"dependencies:\n",
" - python=3.10\n",
" - pip:\n",
" - mlflow-skinny==2.8.1\n",
" - azureml-mlflow\n",
" - protobuf==3.19.6\n",
" - azureml-featurestore==1.2.1\n",
" - azure-mgmt-msi\n",
" - aiohttp==3.8.4\n",
"name: envdef\n",
"\"\"\"\n",
"materialization_settings = MaterializationSettings(\n",
" offline_enabled=True,\n",
" resource=MaterializationComputeResource(instance_type=\"standard_e8s_v3\"),\n",
Expand All @@ -1171,6 +1186,7 @@
" \"spark.executor.cores\": 4,\n",
" \"spark.executor.memory\": \"36g\",\n",
" \"spark.executor.instances\": 2,\n",
" \"spark.synapse.library.python.env\": spark_python_env,\n",
" },\n",
" schedule=None,\n",
")\n",
Expand Down Expand Up @@ -1354,7 +1370,8 @@
},
"outputs": [],
"source": [
"observation_data_path = \"wasbs://[email protected]/feature-store-prp/observation_data/train/*.parquet\"\n",
"# If you get errors when accessing the data blow, try the Step 1b in Network-isolation-feature-store.ipynb to copy the sample data tp the newly created storage container with anonymous access enbled.\n",
"observation_data_path = \"abfss://[email protected]/feature-store-prp/observation_data/train/*.parquet\"\n",
"observation_data_df = spark.read.parquet(observation_data_path)\n",
"obs_data_timestamp_column = \"timestamp\"\n",
"\n",
Expand Down
Loading
Loading