diff --git a/sagemaker-mlflow/sagemaker_deployment_mlflow.ipynb b/sagemaker-mlflow/sagemaker_deployment_mlflow.ipynb index cd866fa3ff..296ab997ea 100644 --- a/sagemaker-mlflow/sagemaker_deployment_mlflow.ipynb +++ b/sagemaker-mlflow/sagemaker_deployment_mlflow.ipynb @@ -23,6 +23,22 @@ "## Setup environment" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Upgrade SageMaker Python SDK" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install --upgrade --quiet sagemaker>=2.215.0" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -86,10 +102,10 @@ "region = sagemaker_session.boto_region_name\n", "\n", "# S3 prefix for the training dataset to be uploaded to\n", - "prefix = 'DEMO-scikit-iris'\n", + "prefix = \"DEMO-scikit-iris\"\n", "\n", "# Provide the ARN of the Tracking Server that you want to track your training job with\n", - "tracking_server_arn = 'your tracking server arn here'" + "tracking_server_arn = \"your tracking server arn here\"" ] }, { @@ -125,13 +141,13 @@ "\n", "s3_client = boto3.client(\"s3\")\n", "s3_client.download_file(\n", - " f\"sagemaker-example-files-prod-{region}\", 'datasets/tabular/iris/iris.data', './data/iris.csv'\n", + " f\"sagemaker-example-files-prod-{region}\", \"datasets/tabular/iris/iris.data\", \"./data/iris.csv\"\n", ")\n", "\n", - "df_iris = pd.read_csv('./data/iris.csv', header=None)\n", - "df_iris[4] = df_iris[4].map({\"Iris-setosa\": 0, 'Iris-versicolor': 1, 'Iris-virginica': 2})\n", + "df_iris = pd.read_csv(\"./data/iris.csv\", header=None)\n", + "df_iris[4] = df_iris[4].map({\"Iris-setosa\": 0, \"Iris-versicolor\": 1, \"Iris-virginica\": 2})\n", "iris = df_iris[[4, 0, 1, 2, 3]].to_numpy()\n", - "np.savetxt('./data/iris.csv', iris, delimiter=',', fmt='%1.1f, %1.3f, %1.3f, %1.3f, %1.3f')" + "np.savetxt(\"./data/iris.csv\", iris, delimiter=\",\", fmt=\"%1.1f, %1.3f, %1.3f, %1.3f, %1.3f\")" ] }, { @@ -147,10 +163,10 @@ "metadata": {}, "outputs": [], "source": [ - "WORK_DIRECTORY = 'data'\n", + "WORK_DIRECTORY = \"data\"\n", "\n", "train_input = sagemaker_session.upload_data(\n", - " WORK_DIRECTORY, key_prefix='{}/{}'.format(prefix, WORK_DIRECTORY)\n", + " WORK_DIRECTORY, key_prefix=\"{}/{}\".format(prefix, WORK_DIRECTORY)\n", ")" ] }, @@ -278,17 +294,15 @@ "outputs": [], "source": [ "sklearn = SKLearn(\n", - " entry_point='train.py',\n", - " source_dir='training_code',\n", - " framework_version='1.2-1',\n", - " instance_type='ml.c4.xlarge',\n", + " entry_point=\"train.py\",\n", + " source_dir=\"training_code\",\n", + " framework_version=\"1.2-1\",\n", + " instance_type=\"ml.c4.xlarge\",\n", " role=role,\n", " sagemaker_session=sagemaker_session,\n", - " hyperparameters={'max_leaf_nodes': 30},\n", + " hyperparameters={\"max_leaf_nodes\": 30},\n", " keep_alive_period_in_seconds=3600,\n", - " environment={\n", - " 'MLFLOW_TRACKING_ARN': tracking_server_arn\n", - " }\n", + " environment={\"MLFLOW_TRACKING_ARN\": tracking_server_arn},\n", ")" ] }, @@ -394,9 +408,7 @@ " mode=Mode.SAGEMAKER_ENDPOINT,\n", " schema_builder=sklearn_schema_builder,\n", " role_arn=role,\n", - " model_metadata={\n", - " \"MLFLOW_MODEL_PATH\": source_path\n", - " }\n", + " model_metadata={\"MLFLOW_MODEL_PATH\": source_path},\n", ")" ] }, @@ -415,10 +427,7 @@ "metadata": {}, "outputs": [], "source": [ - "predictor = built_model.deploy(\n", - " initial_instance_count=1,\n", - " instance_type=\"ml.m5.large\"\n", - ")" + "predictor = built_model.deploy(initial_instance_count=1, instance_type=\"ml.m5.large\")" ] }, { diff --git a/sagemaker-mlflow/sagemaker_hpo_mlflow.ipynb b/sagemaker-mlflow/sagemaker_hpo_mlflow.ipynb index d5d1f03a1f..4b6853403a 100644 --- a/sagemaker-mlflow/sagemaker_hpo_mlflow.ipynb +++ b/sagemaker-mlflow/sagemaker_hpo_mlflow.ipynb @@ -109,11 +109,11 @@ "bucket = sagemaker_session.default_bucket()\n", "\n", "# S3 prefix for the training dataset to be uploaded to\n", - "prefix = 'DEMO-pytorch-mnist'\n", + "prefix = \"DEMO-pytorch-mnist\"\n", "\n", "# MLflow (replace these values with your own)\n", - "tracking_server_arn = 'your tracking server arn'\n", - "experiment_name = 'MNIST'" + "tracking_server_arn = \"your tracking server arn\"\n", + "experiment_name = \"MNIST\"" ] }, { @@ -149,9 +149,9 @@ "metadata": {}, "outputs": [], "source": [ - "local_dir = 'data'\n", + "local_dir = \"data\"\n", "MNIST.mirrors = [\n", - " f'https://sagemaker-example-files-prod-{region}.s3.amazonaws.com/datasets/image/MNIST/'\n", + " f\"https://sagemaker-example-files-prod-{region}.s3.amazonaws.com/datasets/image/MNIST/\"\n", "]\n", "MNIST(\n", " local_dir,\n", @@ -177,7 +177,7 @@ "metadata": {}, "outputs": [], "source": [ - "train_input = sagemaker_session.upload_data(path='data', bucket=bucket, key_prefix=prefix)" + "train_input = sagemaker_session.upload_data(path=\"data\", bucket=bucket, key_prefix=prefix)" ] }, { @@ -577,10 +577,7 @@ "\n", "objective_metric_name = \"average test loss\"\n", "objective_type = \"Minimize\"\n", - "metric_definitions = [\n", - " {\"Name\": \"average test loss\",\n", - " \"Regex\": \"Test set: Average loss: ([0-9\\\\.]+)\"}\n", - "]" + "metric_definitions = [{\"Name\": \"average test loss\", \"Regex\": \"Test set: Average loss: ([0-9\\\\.]+)\"}]" ] }, { @@ -612,17 +609,14 @@ " framework_version=\"1.13\",\n", " instance_count=1,\n", " instance_type=\"ml.c5.2xlarge\",\n", - " hyperparameters={\n", - " \"epochs\": 5,\n", - " \"backend\": \"gloo\"\n", - " },\n", + " hyperparameters={\"epochs\": 5, \"backend\": \"gloo\"},\n", " environment={\n", - " 'MLFLOW_TRACKING_URI':tracking_server_arn,\n", - " 'MLFLOW_EXPERIMENT_NAME':experiment.name,\n", - " 'MLFLOW_PARENT_RUN_ID':run.info.run_id\n", + " \"MLFLOW_TRACKING_URI\": tracking_server_arn,\n", + " \"MLFLOW_EXPERIMENT_NAME\": experiment.name,\n", + " \"MLFLOW_PARENT_RUN_ID\": run.info.run_id,\n", " },\n", " )\n", - " \n", + "\n", " tuner = HyperparameterTuner(\n", " estimator,\n", " objective_metric_name,\n", diff --git a/sagemaker-mlflow/sagemaker_mlflow_setup.ipynb b/sagemaker-mlflow/sagemaker_mlflow_setup.ipynb new file mode 100644 index 0000000000..3ee5907980 --- /dev/null +++ b/sagemaker-mlflow/sagemaker_mlflow_setup.ipynb @@ -0,0 +1,415 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "02127090-ee33-4005-b5af-5f4e386ed1a6", + "metadata": {}, + "source": [ + "# How to Setup Amazon SageMaker with MLflow" + ] + }, + { + "cell_type": "markdown", + "id": "557f10ee-714a-4378-9493-abe2cd010754", + "metadata": {}, + "source": [ + "This notebook's CI test result for us-west-2 is as follows. CI test results in other regions can be found at the end of the notebook.\n", + "\n", + "![This us-west-2 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/us-west-2/sagemaker-mlflow|sagemaker_mlflow_setup.ipynb)" + ] + }, + { + "cell_type": "markdown", + "id": "9f09f362-71a7-409c-a4c7-0ee5e59c1581", + "metadata": {}, + "source": [ + "## Updates and Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd83cc42-fc1e-49cd-88e3-7a685add2404", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install -U --quiet boto3" + ] + }, + { + "cell_type": "markdown", + "id": "f932a722-a2cd-4aca-bdc0-d00553439966", + "metadata": {}, + "source": [ + "Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "329cf85a-352c-4f55-8e2a-4771a26fbe70", + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import sagemaker\n", + "import boto3" + ] + }, + { + "cell_type": "markdown", + "id": "95c2869e-1845-4534-bf97-d530b5c27c48", + "metadata": {}, + "source": [ + "Session variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ece22344-f747-4fb2-9051-3640dd95dd6b", + "metadata": {}, + "outputs": [], + "source": [ + "sess = sagemaker.Session()\n", + "bucket_name = sess.default_bucket()\n", + "role = sagemaker.get_execution_role()\n", + "region = sess.boto_region_name\n", + "\n", + "iam_client = boto3.client(\"iam\")\n", + "sts_client = boto3.client(\"sts\")\n", + "sm_client = boto3.client(\"sagemaker\")\n", + "account_id = sts_client.get_caller_identity()[\"Account\"]\n", + "tracking_server_name = \"my-setup-test3\"\n", + "mlflow_role_name = \"mlflow-test3\"" + ] + }, + { + "cell_type": "markdown", + "id": "6c292837-353c-4c3c-91b9-3088e8d5a02b", + "metadata": {}, + "source": [ + "## MLflow Permissions" + ] + }, + { + "cell_type": "markdown", + "id": "e6bae350-030f-4ecf-8380-5b11b73b5806", + "metadata": {}, + "source": [ + "### IAM Role for the MLflow Tracking Server\n", + "\n", + "To run the next cell, make sure the IAM role used while running this notebook has permission to create an IAM Role. \n", + "The `iam:CreateRole`, `iam:CreatePolicy`, `iam:ListPolicies`, and `iam:AttachRolePolicy` action must be allowed by the notebook execution role's policy.\n", + "\n", + "If you are running this notebook from SageMaker Studio, you can update your notebook execution role through the following steps: \n", + "\n", + "1. Navigate to the AWS Console and select the Domain you are using\n", + "2. Under the Domain, select the User Profile you are using. You will see the Execution Role listed there.\n", + "3. Navigate to the IAM Console, search for the Execution Role under \"Roles\", and update your role with a policy that allows the `iam:CreateRole`, `iam:CreatePolicy`, `iam:ListPolicies`, and `iam:AttachRolePolicy` actions. \n", + "\n", + "If you are not using a SageMaker Studio Notebook, confirm that the role you have used to configure your AWS CLI has appropriate permissions to create an IAM role and attach a policy to it. \n", + "\n", + "Here is an example of an inline policy you can add to your role - \n", + "\n", + "```json\n", + "{\n", + " \"Version\": \"2012-10-17\",\n", + " \"Statement\": [\n", + " {\n", + " \"Sid\": \"Statement1\",\n", + " \"Effect\": \"Allow\",\n", + " \"Action\": [\n", + " \"iam:ListPolicies\",\n", + " \"iam:CreatePolicy\",\n", + " \"iam:CreateRole\",\n", + " \"iam:AttachRolePolicy\"\n", + " ],\n", + " \"Resource\": [\n", + " \"*\"\n", + " ]\n", + " }\n", + " ]\n", + "}\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "96c0ad98-f237-4bfd-b134-40b46ebfa81d", + "metadata": {}, + "outputs": [], + "source": [ + "mlflow_trust_policy = {\n", + " \"Version\": \"2012-10-17\",\n", + " \"Statement\": [\n", + " {\n", + " \"Effect\": \"Allow\",\n", + " \"Principal\": {\"Service\": [\"sagemaker.amazonaws.com\"]},\n", + " \"Action\": \"sts:AssumeRole\",\n", + " }\n", + " ],\n", + "}\n", + "\n", + "# Create role for MLflow\n", + "mlflow_role = iam_client.create_role(\n", + " RoleName=mlflow_role_name, AssumeRolePolicyDocument=json.dumps(mlflow_trust_policy)\n", + ")\n", + "mlflow_role_arn = mlflow_role[\"Role\"][\"Arn\"]\n", + "\n", + "# Create policy for S3 and SageMaker Model Registry\n", + "sm_s3_model_registry_policy = {\n", + " \"Version\": \"2012-10-17\",\n", + " \"Statement\": [\n", + " {\n", + " \"Effect\": \"Allow\",\n", + " \"Action\": [\n", + " \"s3:Get*\",\n", + " \"s3:Put*\",\n", + " \"s3:List*\",\n", + " \"sagemaker:AddTags\",\n", + " \"sagemaker:CreateModelPackageGroup\",\n", + " \"sagemaker:CreateModelPackage\",\n", + " \"sagemaker:UpdateModelPackage\",\n", + " \"sagemaker:DescribeModelPackageGroup\",\n", + " ],\n", + " \"Resource\": \"*\",\n", + " }\n", + " ],\n", + "}\n", + "\n", + "mlflow_s3_sm_model_registry_iam_policy = iam_client.create_policy(\n", + " PolicyName=\"mlflow-s3-sm-model-registry\", PolicyDocument=json.dumps(sm_s3_model_registry_policy)\n", + ")\n", + "mlflow_s3_sm_model_registry_iam_policy_arn = mlflow_s3_sm_model_registry_iam_policy[\"Policy\"][\"Arn\"]\n", + "\n", + "# Attach the policy to the MLflow role\n", + "iam_client.attach_role_policy(\n", + " RoleName=mlflow_role_name, PolicyArn=mlflow_s3_sm_model_registry_iam_policy_arn\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "65e2744c-c1b4-4444-9e8f-fbf1315a71a4", + "metadata": {}, + "source": [ + "Note that your SageMaker execution role should have the following permissions to call Mlflow REST APIs:\n", + "\n", + "```json\n", + "{\n", + " \"Version\": \"2012-10-17\", \n", + " \"Statement\": [ \n", + " { \n", + " \"Effect\": \"Allow\", \n", + " \"Action\": [\n", + " \"sagemaker-mlflow:*\",\n", + " \"sagemaker:CreateMlflowTrackingServer\",\n", + " \"sagemaker:UpdateMlflowTrackingServer\",\n", + " \"sagemaker:DeleteMlflowTrackingServer\",\n", + " \"sagemaker:StartMlflowTrackingServer\",\n", + " \"sagemaker:StopMlflowTrackingServer\",\n", + " \"sagemaker:CreatePresignedMlflowTrackingServerUrl\"\n", + " ], \n", + " \"Resource\": \"*\" \n", + " } \n", + " ]\n", + "}\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "ade88b9a-961a-4ced-9320-e56d7e9cf3eb", + "metadata": {}, + "source": [ + "## Create MLflow Tracking Server" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8d496f9b-4493-4ab2-9d35-8d4ec0f79620", + "metadata": {}, + "outputs": [], + "source": [ + "sm_client.create_mlflow_tracking_server(\n", + " TrackingServerName=tracking_server_name,\n", + " ArtifactStoreUri=f\"s3://{bucket_name}/{tracking_server_name}\",\n", + " TrackingServerSize=\"Small\",\n", + " MlflowVersion=\"2.13.2\",\n", + " RoleArn=mlflow_role_arn,\n", + " AutomaticModelRegistration=False,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20d535f6-6dd2-4c5c-99e3-8b428c052c70", + "metadata": {}, + "outputs": [], + "source": [ + "tracking_server_arn = (\n", + " f\"arn:aws:sagemaker:{region}:{account_id}:mlflow-tracking-server/{tracking_server_name}\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ddff09d7-73aa-4f77-b437-1e8c05c59ea2", + "metadata": {}, + "outputs": [], + "source": [ + "sm_client.describe_mlflow_tracking_server(TrackingServerName=tracking_server_name)" + ] + }, + { + "cell_type": "markdown", + "id": "e6c50a30-89e4-4ea9-8fe8-df15a2f7726e", + "metadata": {}, + "source": [ + "Install the MLflow SDK and our MLflow AWS Plugin" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2232f516-f23c-4c0d-ada2-933a45fea6e9", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install --quiet mlflow==2.13.2 sagemaker-mlflow==0.1.0" + ] + }, + { + "cell_type": "markdown", + "id": "073d12e9-b91e-4c0c-93d1-8cae66648e49", + "metadata": {}, + "source": [ + "## MLflow tracking test" + ] + }, + { + "cell_type": "markdown", + "id": "ad90cde7-9de2-4df7-80a5-010165edafce", + "metadata": {}, + "source": [ + "Connect to tracking server" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a7a43ce7-3e9a-4b47-b051-9f59522ee43f", + "metadata": {}, + "outputs": [], + "source": [ + "import mlflow\n", + "\n", + "mlflow.set_tracking_uri(tracking_server_arn)" + ] + }, + { + "cell_type": "markdown", + "id": "c9197fca-6370-4f91-a52f-440ef5b22484", + "metadata": {}, + "source": [ + "Log a metric" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bab5d5df-c1a8-4a2b-89e1-52d36d630f3d", + "metadata": {}, + "outputs": [], + "source": [ + "with mlflow.start_run():\n", + " mlflow.log_metric(\"foo\", 1)" + ] + }, + { + "cell_type": "markdown", + "id": "d603ef2f-9c42-4ef2-896e-73ab1eaa6ace", + "metadata": {}, + "source": [ + "See results in MLflow UI. You can either launch the MLflow UI from within SageMaker Studio, or generate a pre-signed URL like this:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0690708f-976c-472e-8e4d-281aa163e9aa", + "metadata": {}, + "outputs": [], + "source": [ + "sm_client.create_presigned_mlflow_tracking_server_url(TrackingServerName=tracking_server_name)" + ] + }, + { + "cell_type": "markdown", + "id": "0f739f1a-2a97-4cc5-bb6b-bc59e4111d0f", + "metadata": {}, + "source": [ + "## Notebook CI Test Results\n", + "\n", + "This notebook was tested in multiple regions. The test results are as follows, except for us-west-2 which is shown at the top of the notebook.\n", + "\n", + "\n", + "![This us-east-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/us-east-1/sagemaker-mlflow|sagemaker_mlflow_setup.ipynb)\n", + "\n", + "![This us-east-2 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/us-east-2/sagemaker-mlflow|sagemaker_mlflow_setup.ipynb)\n", + "\n", + "![This us-west-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/us-west-1/sagemaker-mlflow|sagemaker_mlflow_setup.ipynb)\n", + "\n", + "![This ca-central-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ca-central-1/sagemaker-mlflow|sagemaker_mlflow_setup.ipynb)\n", + "\n", + "![This sa-east-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/sa-east-1/sagemaker-mlflow|sagemaker_mlflow_setup.ipynb)\n", + "\n", + "![This eu-west-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/eu-west-1/sagemaker-mlflow|sagemaker_mlflow_setup.ipynb)\n", + "\n", + "![This eu-west-2 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/eu-west-2/sagemaker-mlflow|sagemaker_mlflow_setup.ipynb)\n", + "\n", + "![This eu-west-3 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/eu-west-3/sagemaker-mlflow|sagemaker_mlflow_setup.ipynb)\n", + "\n", + "![This eu-central-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/eu-central-1/sagemaker-mlflow|sagemaker_mlflow_setup.ipynb)\n", + "\n", + "![This eu-north-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/eu-north-1/sagemaker-mlflow|sagemaker_mlflow_setup.ipynb)\n", + "\n", + "![This ap-southeast-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ap-southeast-1/sagemaker-mlflow|sagemaker_mlflow_setup.ipynb)\n", + "\n", + "![This ap-southeast-2 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ap-southeast-2/sagemaker-mlflow|sagemaker_mlflow_setup.ipynb)\n", + "\n", + "![This ap-northeast-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ap-northeast-1/sagemaker-mlflow|sagemaker_mlflow_setup.ipynb)\n", + "\n", + "![This ap-northeast-2 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ap-northeast-2/sagemaker-mlflow|sagemaker_mlflow_setup.ipynb)\n", + "\n", + "![This ap-south-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ap-south-1/sagemaker-mlflow|sagemaker_mlflow_setup.ipynb)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/sagemaker-mlflow/sagemaker_pipelines_mlflow.ipynb b/sagemaker-mlflow/sagemaker_pipelines_mlflow.ipynb index 9b145f3257..4ad94a49f3 100644 --- a/sagemaker-mlflow/sagemaker_pipelines_mlflow.ipynb +++ b/sagemaker-mlflow/sagemaker_pipelines_mlflow.ipynb @@ -76,12 +76,10 @@ "region = sagemaker_session.boto_region_name\n", "\n", "pipeline_name = \"breast-cancer-xgb\"\n", - "instance_type = ParameterString(\n", - " name=\"TrainingInstanceType\", default_value=\"ml.m5.xlarge\"\n", - ")\n", + "instance_type = ParameterString(name=\"TrainingInstanceType\", default_value=\"ml.m5.xlarge\")\n", "\n", "# Mlflow (replace these values with your own)\n", - "tracking_server_arn = 'your tracking server arn'\n", + "tracking_server_arn = \"your tracking server arn\"\n", "experiment_name = \"sm-pipelines-experiment\"" ] }, @@ -129,7 +127,7 @@ "outputs": [], "source": [ "# Set path to config file\n", - "os.environ['SAGEMAKER_USER_CONFIG_OVERRIDE'] = os.getcwd()" + "os.environ[\"SAGEMAKER_USER_CONFIG_OVERRIDE\"] = os.getcwd()" ] }, { @@ -205,45 +203,46 @@ "outputs": [], "source": [ "random_state = 2023\n", - "label_column = 'diagnosis'\n", + "label_column = \"diagnosis\"\n", "\n", "feature_names = [\n", - " 'id',\n", - " 'diagnosis',\n", - " 'radius_mean',\n", - " 'texture_mean',\n", - " 'perimeter_mean',\n", - " 'area_mean',\n", - " 'smoothness_mean',\n", - " 'compactness_mean',\n", - " 'concavity_mean',\n", - " 'concave points_mean',\n", - " 'symmetry_mean',\n", - " 'fractal_dimension_mean',\n", - " 'radius_se',\n", - " 'texture_se',\n", - " 'perimeter_se',\n", - " 'area_se',\n", - " 'smoothness_se',\n", - " 'compactness_se',\n", - " 'concavity_se',\n", - " 'concave points_se',\n", - " 'symmetry_se',\n", - " 'fractal_dimension_se',\n", - " 'radius_worst',\n", - " 'texture_worst',\n", - " 'perimeter_worst',\n", - " 'area_worst',\n", - " 'smoothness_worst',\n", - " 'compactness_worst',\n", - " 'concavity_worst',\n", - " 'concave points_worst',\n", - " 'symmetry_worst',\n", - " 'fractal_dimension_worst',\n", + " \"id\",\n", + " \"diagnosis\",\n", + " \"radius_mean\",\n", + " \"texture_mean\",\n", + " \"perimeter_mean\",\n", + " \"area_mean\",\n", + " \"smoothness_mean\",\n", + " \"compactness_mean\",\n", + " \"concavity_mean\",\n", + " \"concave points_mean\",\n", + " \"symmetry_mean\",\n", + " \"fractal_dimension_mean\",\n", + " \"radius_se\",\n", + " \"texture_se\",\n", + " \"perimeter_se\",\n", + " \"area_se\",\n", + " \"smoothness_se\",\n", + " \"compactness_se\",\n", + " \"concavity_se\",\n", + " \"concave points_se\",\n", + " \"symmetry_se\",\n", + " \"fractal_dimension_se\",\n", + " \"radius_worst\",\n", + " \"texture_worst\",\n", + " \"perimeter_worst\",\n", + " \"area_worst\",\n", + " \"smoothness_worst\",\n", + " \"compactness_worst\",\n", + " \"concavity_worst\",\n", + " \"concave points_worst\",\n", + " \"symmetry_worst\",\n", + " \"fractal_dimension_worst\",\n", "]\n", "\n", + "\n", "@step(\n", - " name='DataPreprocessing',\n", + " name=\"DataPreprocessing\",\n", " instance_type=instance_type,\n", ")\n", "def preprocess(\n", @@ -261,28 +260,26 @@ " mlflow.set_experiment(experiment_name)\n", " with mlflow.start_run(run_name=run_name) as run:\n", " run_id = run.info.run_id\n", - " with mlflow.start_run(run_name='DataPreprocessing', nested=True):\n", + " with mlflow.start_run(run_name=\"DataPreprocessing\", nested=True):\n", " df = pd.read_csv(raw_data_s3_path, header=None, names=feature_names)\n", - " df.drop(columns='id', inplace=True)\n", + " df.drop(columns=\"id\", inplace=True)\n", " mlflow.log_input(\n", " mlflow.data.from_pandas(df, raw_data_s3_path, targets=label_column),\n", - " context='DataPreprocessing',\n", - " )\n", - " \n", - " train_df, test_df = train_test_split(\n", - " df, test_size=0.2, stratify=df[label_column]\n", + " context=\"DataPreprocessing\",\n", " )\n", + "\n", + " train_df, test_df = train_test_split(df, test_size=0.2, stratify=df[label_column])\n", " validation_df, test_df = train_test_split(\n", " test_df, test_size=0.5, stratify=test_df[label_column]\n", " )\n", " train_df.reset_index(inplace=True, drop=True)\n", " validation_df.reset_index(inplace=True, drop=True)\n", " test_df.reset_index(inplace=True, drop=True)\n", - " \n", - " train_s3_path = f's3://{bucket}/{output_prefix}/train.csv'\n", - " val_s3_path = f's3://{bucket}/{output_prefix}/val.csv'\n", - " test_s3_path = f's3://{bucket}/{output_prefix}/test.csv'\n", - " \n", + "\n", + " train_s3_path = f\"s3://{bucket}/{output_prefix}/train.csv\"\n", + " val_s3_path = f\"s3://{bucket}/{output_prefix}/val.csv\"\n", + " test_s3_path = f\"s3://{bucket}/{output_prefix}/test.csv\"\n", + "\n", " train_df.to_csv(train_s3_path, index=False)\n", " validation_df.to_csv(val_s3_path, index=False)\n", " test_df.to_csv(test_s3_path, index=False)\n", @@ -317,19 +314,19 @@ "source": [ "use_gpu = False\n", "param = dict(\n", - " objective='binary:logistic',\n", + " objective=\"binary:logistic\",\n", " max_depth=5,\n", " eta=0.2,\n", " gamma=4,\n", " min_child_weight=6,\n", " subsample=0.7,\n", - " tree_method='gpu_hist' if use_gpu else 'hist', # Use GPU accelerated algorithm\n", + " tree_method=\"gpu_hist\" if use_gpu else \"hist\", # Use GPU accelerated algorithm\n", ")\n", "num_round = 50\n", "\n", "\n", "@step(\n", - " name='ModelTraining',\n", + " name=\"ModelTraining\",\n", " instance_type=instance_type,\n", ")\n", "def train(\n", @@ -348,24 +345,24 @@ " mlflow.set_experiment(experiment_name)\n", "\n", " with mlflow.start_run(run_id=run_id):\n", - " with mlflow.start_run(run_name='ModelTraining', nested=True) as training_run:\n", + " with mlflow.start_run(run_name=\"ModelTraining\", nested=True) as training_run:\n", " training_run_id = training_run.info.run_id\n", " mlflow.xgboost.autolog(\n", " log_input_examples=True,\n", " log_model_signatures=True,\n", " log_models=True,\n", " log_datasets=True,\n", - " model_format='xgb',\n", + " model_format=\"xgb\",\n", " )\n", - " \n", + "\n", " # read data files from S3\n", " train_df = pd.read_csv(train_s3_path)\n", " validation_df = pd.read_csv(validation_s3_path)\n", - " \n", + "\n", " # create dataframe and label series\n", - " y_train = (train_df.pop(label_column) == 'M').astype('int')\n", - " y_validation = (validation_df.pop(label_column) == 'M').astype('int')\n", - " \n", + " y_train = (train_df.pop(label_column) == \"M\").astype(\"int\")\n", + " y_validation = (validation_df.pop(label_column) == \"M\").astype(\"int\")\n", + "\n", " xgb = XGBClassifier(n_estimators=num_round, **param)\n", " xgb.fit(\n", " train_df,\n", @@ -404,7 +401,7 @@ "outputs": [], "source": [ "@step(\n", - " name='ModelEvaluation',\n", + " name=\"ModelEvaluation\",\n", " instance_type=instance_type,\n", ")\n", "def evaluate(\n", @@ -420,19 +417,19 @@ " mlflow.set_experiment(experiment_name)\n", "\n", " with mlflow.start_run(run_id=run_id):\n", - " with mlflow.start_run(run_name='ModelEvaluation', nested=True):\n", + " with mlflow.start_run(run_name=\"ModelEvaluation\", nested=True):\n", " test_df = pd.read_csv(test_s3_path)\n", - " test_df[label_column] = (test_df[label_column] == 'M').astype('int')\n", - " model = mlflow.pyfunc.load_model(f'runs:/{training_run_id}/model')\n", - " \n", + " test_df[label_column] = (test_df[label_column] == \"M\").astype(\"int\")\n", + " model = mlflow.pyfunc.load_model(f\"runs:/{training_run_id}/model\")\n", + "\n", " results = mlflow.evaluate(\n", " model=model,\n", " data=test_df,\n", " targets=label_column,\n", - " model_type='classifier',\n", - " evaluators=['default'],\n", + " model_type=\"classifier\",\n", + " evaluators=[\"default\"],\n", " )\n", - " return {'f1_score': results.metrics['f1_score']}" + " return {\"f1_score\": results.metrics[\"f1_score\"]}" ] }, { @@ -459,7 +456,7 @@ "outputs": [], "source": [ "@step(\n", - " name='ModelRegistration',\n", + " name=\"ModelRegistration\",\n", " instance_type=instance_type,\n", ")\n", "def register(\n", @@ -474,8 +471,8 @@ " mlflow.set_experiment(experiment_name)\n", "\n", " with mlflow.start_run(run_id=run_id):\n", - " with mlflow.start_run(run_name='ModelRegistration', nested=True):\n", - " mlflow.register_model(f'runs:/{training_run_id}/model', pipeline_name)" + " with mlflow.start_run(run_name=\"ModelRegistration\", nested=True):\n", + " mlflow.register_model(f\"runs:/{training_run_id}/model\", pipeline_name)" ] }, { @@ -499,7 +496,7 @@ "source": [ "preprocessing_step = preprocess(\n", " raw_data_s3_path=input_path,\n", - " output_prefix=f'{pipeline_name}/dataset',\n", + " output_prefix=f\"{pipeline_name}/dataset\",\n", " experiment_name=experiment_name,\n", " run_name=ExecutionVariables.PIPELINE_EXECUTION_ID,\n", ")\n", @@ -512,7 +509,7 @@ ")\n", "\n", "conditional_register_step = ConditionStep(\n", - " name='ConditionalRegister',\n", + " name=\"ConditionalRegister\",\n", " conditions=[\n", " ConditionGreaterThanOrEqualTo(\n", " left=evaluate(\n", @@ -520,16 +517,17 @@ " experiment_name=preprocessing_step[3],\n", " run_id=preprocessing_step[4],\n", " training_run_id=training_step[2],\n", - " )['f1_score'],\n", + " )[\"f1_score\"],\n", " right=0.8,\n", " )\n", " ],\n", - " if_steps=[register(\n", - " pipeline_name=pipeline_name,\n", - " experiment_name=preprocessing_step[3],\n", - " run_id=preprocessing_step[4],\n", - " training_run_id=training_step[2],\n", - " )\n", + " if_steps=[\n", + " register(\n", + " pipeline_name=pipeline_name,\n", + " experiment_name=preprocessing_step[3],\n", + " run_id=preprocessing_step[4],\n", + " training_run_id=training_step[2],\n", + " )\n", " ],\n", " else_steps=[FailStep(name=\"Fail\", error_message=\"Model performance is not good enough\")],\n", ")\n", @@ -539,11 +537,7 @@ " parameters=[\n", " instance_type,\n", " ],\n", - " steps=[\n", - " preprocessing_step,\n", - " training_step,\n", - " conditional_register_step\n", - " ],\n", + " steps=[preprocessing_step, training_step, conditional_register_step],\n", ")" ] }, diff --git a/sagemaker-mlflow/sagemaker_training_mlflow.ipynb b/sagemaker-mlflow/sagemaker_training_mlflow.ipynb index a178ffec67..21bdcc7d7a 100644 --- a/sagemaker-mlflow/sagemaker_training_mlflow.ipynb +++ b/sagemaker-mlflow/sagemaker_training_mlflow.ipynb @@ -66,10 +66,10 @@ "region = sagemaker_session.boto_region_name\n", "\n", "# S3 prefix for the training dataset to be uploaded to\n", - "prefix = 'DEMO-scikit-iris'\n", + "prefix = \"DEMO-scikit-iris\"\n", "\n", "# MLflow (replace these values with your own)\n", - "tracking_server_arn = 'your tracking server arn'" + "tracking_server_arn = \"your tracking server arn\"" ] }, { @@ -105,13 +105,13 @@ "\n", "s3_client = boto3.client(\"s3\")\n", "s3_client.download_file(\n", - " f\"sagemaker-example-files-prod-{region}\", 'datasets/tabular/iris/iris.data', './data/iris.csv'\n", + " f\"sagemaker-example-files-prod-{region}\", \"datasets/tabular/iris/iris.data\", \"./data/iris.csv\"\n", ")\n", "\n", - "df_iris = pd.read_csv('./data/iris.csv', header=None)\n", - "df_iris[4] = df_iris[4].map({\"Iris-setosa\": 0, 'Iris-versicolor': 1, 'Iris-virginica': 2})\n", + "df_iris = pd.read_csv(\"./data/iris.csv\", header=None)\n", + "df_iris[4] = df_iris[4].map({\"Iris-setosa\": 0, \"Iris-versicolor\": 1, \"Iris-virginica\": 2})\n", "iris = df_iris[[4, 0, 1, 2, 3]].to_numpy()\n", - "np.savetxt('./data/iris.csv', iris, delimiter=',', fmt='%1.1f, %1.3f, %1.3f, %1.3f, %1.3f')" + "np.savetxt(\"./data/iris.csv\", iris, delimiter=\",\", fmt=\"%1.1f, %1.3f, %1.3f, %1.3f, %1.3f\")" ] }, { @@ -127,10 +127,10 @@ "metadata": {}, "outputs": [], "source": [ - "WORK_DIRECTORY = 'data'\n", + "WORK_DIRECTORY = \"data\"\n", "\n", "train_input = sagemaker_session.upload_data(\n", - " WORK_DIRECTORY, key_prefix='{}/{}'.format(prefix, WORK_DIRECTORY)\n", + " WORK_DIRECTORY, key_prefix=\"{}/{}\".format(prefix, WORK_DIRECTORY)\n", ")" ] }, @@ -251,17 +251,15 @@ "outputs": [], "source": [ "sklearn = SKLearn(\n", - " entry_point='train.py',\n", - " source_dir='training_code',\n", - " framework_version='1.2-1',\n", - " instance_type='ml.c4.xlarge',\n", + " entry_point=\"train.py\",\n", + " source_dir=\"training_code\",\n", + " framework_version=\"1.2-1\",\n", + " instance_type=\"ml.c4.xlarge\",\n", " role=role,\n", " sagemaker_session=sagemaker_session,\n", - " hyperparameters={'max_leaf_nodes': 30},\n", + " hyperparameters={\"max_leaf_nodes\": 30},\n", " keep_alive_period_in_seconds=3600,\n", - " environment={\n", - " 'MLFLOW_TRACKING_ARN': tracking_server_arn\n", - " }\n", + " environment={\"MLFLOW_TRACKING_ARN\": tracking_server_arn},\n", ")" ] },