Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
710 changes: 0 additions & 710 deletions use-cases/computer_vision/1-metastases-detection-train-model.ipynb

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# Computer Vision for Medical Imaging: Part 4. SageMaker Pipelines\n",
"This notebook is the final part of a 4-part series of techniques and services offer by SageMaker to build a model which predicts if an image of cells contains cancer. This notebook describes how to automate the ML workflow using SageMaker Pipelines."
"# Computer Vision for Medical Imaging - Pipeline Mode\n",
"This notebook showcases techniques and services offer by SageMaker to build a model which predicts if an image of cells contains cancer. This notebook describes how to automate the ML workflow using SageMaker Pipelines."
]
},
{
Expand Down Expand Up @@ -37,8 +37,7 @@
"metadata": {},
"outputs": [],
"source": [
"%store -r\n",
"%store"
"! pip install --upgrade sagemaker boto3"
]
},
{
Expand All @@ -48,6 +47,28 @@
"## Import Libraries"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pip\n",
"\n",
"\n",
"def import_or_install(package):\n",
" try:\n",
" __import__(package)\n",
" except ImportError:\n",
" ! pip install $package\n",
"\n",
"\n",
"required_packages = [\"sagemaker\", \"boto3\", \"h5py\", \"tqdm\", \"matplotlib\", \"opencv-python\"]\n",
"\n",
"for package in required_packages:\n",
" import_or_install(package)"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -59,6 +80,12 @@
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import cv2\n",
"import os\n",
"import zipfile\n",
"import h5py\n",
"import mxnet as mx\n",
"from datetime import datetime\n",
"from tqdm import tqdm\n",
"\n",
"from sagemaker.workflow.pipeline import Pipeline\n",
"from sagemaker.workflow.steps import CreateModelStep\n",
Expand Down Expand Up @@ -96,6 +123,208 @@
"bucket = sagemaker.Session().default_bucket()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load Dataset"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# check if directory exists\n",
"if not os.path.isdir(\"data\"):\n",
" os.mkdir(\"data\")\n",
"\n",
"# download zip file from public s3 bucket\n",
"!wget -P data https://sagemaker-sample-files.s3.amazonaws.com/datasets/image/pcam/medical_images.zip"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with zipfile.ZipFile(\"data/medical_images.zip\") as zf:\n",
" zf.extractall()\n",
"with open(\"data/camelyon16_tiles.h5\", \"rb\") as hf:\n",
" f = h5py.File(hf, \"r\")\n",
"\n",
" X = f[\"x\"][()]\n",
" y = f[\"y\"][()]\n",
"\n",
"print(\"Shape of X:\", X.shape)\n",
"print(\"Shape of y:\", y.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# write to session s3 bucket\n",
"s3_client.upload_file(\"data/medical_images.zip\", bucket, f\"data/medical_images.zip\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# delete local copy\n",
"import os\n",
"\n",
"if os.path.exists(\"data/medical_images.zip\"):\n",
" os.remove(\"data/medical_images.zip\")\n",
"else:\n",
" print(\"The file does not exist\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## View Sample Images from Dataset"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def preview_images(X, y, n, cols):\n",
" sample_images = X[:n]\n",
" sample_labels = y[:n]\n",
"\n",
" rows = int(np.ceil(n / cols))\n",
" fig, axs = plt.subplots(rows, cols, figsize=(11.5, 7))\n",
"\n",
" for i, ax in enumerate(axs.flatten()):\n",
" image = sample_images[i]\n",
" label = sample_labels[i]\n",
" ax.imshow(image)\n",
" ax.axis(\"off\")\n",
" ax.set_title(f\"Label: {label}\")\n",
"\n",
" plt.tight_layout()\n",
"\n",
"\n",
"preview_images(X, y, 15, 5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Shuffle and Split Dataset"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"\n",
"X_numpy = X[:]\n",
"y_numpy = y[:]\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(\n",
" X_numpy, y_numpy, test_size=1000, random_state=0\n",
")\n",
"X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=2000, random_state=1)\n",
"\n",
"print(X_train.shape)\n",
"print(X_val.shape)\n",
"print(X_test.shape)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Convert Splits to RecordIO Format"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def write_to_recordio(X: np.ndarray, y: np.ndarray, prefix: str):\n",
" record = mx.recordio.MXIndexedRecordIO(idx_path=f\"{prefix}.idx\", uri=f\"{prefix}.rec\", flag=\"w\")\n",
" for idx, arr in enumerate(tqdm(X)):\n",
" header = mx.recordio.IRHeader(0, y[idx], idx, 0)\n",
" s = mx.recordio.pack_img(\n",
" header,\n",
" arr,\n",
" quality=95,\n",
" img_fmt=\".jpg\",\n",
" )\n",
" record.write_idx(idx, s)\n",
" record.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"write_to_recordio(X_train, y_train, prefix=\"data/train\")\n",
"write_to_recordio(X_val, y_val, prefix=\"data/val\")\n",
"write_to_recordio(X_test, y_test, prefix=\"data/test\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Upload Data Splits to S3"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"prefix = \"cv-metastasis\"\n",
"\n",
"try:\n",
" s3_client.create_bucket(\n",
" Bucket=bucket, ACL=\"private\", CreateBucketConfiguration={\"LocationConstraint\": region}\n",
" )\n",
" print(f\"Created S3 bucket: {bucket}\")\n",
"\n",
"except Exception as e:\n",
" if e.response[\"Error\"][\"Code\"] == \"BucketAlreadyOwnedByYou\":\n",
" print(f\"Using existing bucket: {bucket}\")\n",
" else:\n",
" raise (e)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"s3_client.upload_file(\"data/train.rec\", bucket, f\"{prefix}/data/train/train.rec\")\n",
"s3_client.upload_file(\"data/val.rec\", bucket, f\"{prefix}/data/val/val.rec\")\n",
"s3_client.upload_file(\"data/test.rec\", bucket, f\"{prefix}/data/test/test.rec\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand All @@ -110,13 +339,15 @@
"outputs": [],
"source": [
"training_image = sagemaker.image_uris.retrieve(\"image-classification\", region)\n",
"num_training_samples = X_train.shape[0]\n",
"num_classes = len(np.unique(y_train))\n",
"\n",
"hyperparameters = {\n",
" \"num_layers\": 18,\n",
" \"use_pretrained_model\": 1,\n",
" \"augmentation_type\": \"crop_color_transform\",\n",
" \"image_shape\": \"3,96,96\",\n",
" \"num_classes\": 2,\n",
" \"num_classes\": num_classes,\n",
" \"num_training_samples\": num_training_samples,\n",
" \"mini_batch_size\": 64,\n",
" \"epochs\": 5,\n",
Expand Down Expand Up @@ -255,6 +486,8 @@
"metadata": {},
"outputs": [],
"source": [
"mpg_name = \"cv-metastasis-{}\".format(datetime.now().strftime(\"%Y-%m-%d-%H-%M-%S\"))\n",
"\n",
"model_approval_status = ParameterString(\n",
" name=\"ModelApprovalStatus\", default_value=\"PendingManualApproval\"\n",
")\n",
Expand Down Expand Up @@ -287,7 +520,7 @@
"source": [
"model = sagemaker.model.Model(\n",
" name=f\"{mpg_name}-pipline\",\n",
" image_uri=train_step.properties.AlgorithmSpecification.TrainingImage,\n",
" image_uri=training_image,\n",
" model_data=train_step.properties.ModelArtifacts.S3ModelArtifacts,\n",
" sagemaker_session=sagemaker_session,\n",
" role=sagemaker_role,\n",
Expand Down Expand Up @@ -315,6 +548,7 @@
" Filename=\"deploy_model.py\", Bucket=bucket, Key=f\"{prefix}/code/deploy_model.py\"\n",
")\n",
"deploy_model_script_uri = f\"s3://{bucket}/{prefix}/code/deploy_model.py\"\n",
"deploy_instance_type = \"ml.m4.xlarge\"\n",
"\n",
"deploy_model_processor = SKLearnProcessor(\n",
" framework_version=\"0.23-1\",\n",
Expand Down Expand Up @@ -355,7 +589,7 @@
"metadata": {},
"outputs": [],
"source": [
"pipeline_name = f\"{prefix}-pipeline\"\n",
"pipeline_name = \"{}-pipeline-{}\".format(prefix, datetime.now().strftime(\"%Y-%m-%d-%H-%M-%S\"))\n",
"\n",
"pipeline = Pipeline(\n",
" name=pipeline_name,\n",
Expand Down Expand Up @@ -419,7 +653,49 @@
"metadata": {},
"outputs": [],
"source": [
"best_model.sagemaker_session.delete_endpoint(mpg_name)"
"def delete_model_package_group(sm_client, package_group_name):\n",
" try:\n",
" model_versions = sm_client.list_model_packages(ModelPackageGroupName=package_group_name)\n",
"\n",
" except Exception as e:\n",
" print(\"{} \\n\".format(e))\n",
" return\n",
"\n",
" for model_version in model_versions[\"ModelPackageSummaryList\"]:\n",
" try:\n",
" sm_client.delete_model_package(ModelPackageName=model_version[\"ModelPackageArn\"])\n",
" except Exception as e:\n",
" print(\"{} \\n\".format(e))\n",
" time.sleep(0.5) # Ensure requests aren't throttled\n",
"\n",
" try:\n",
" sm_client.delete_model_package_group(ModelPackageGroupName=package_group_name)\n",
" print(\"{} model package group deleted\".format(package_group_name))\n",
" except Exception as e:\n",
" print(\"{} \\n\".format(e))\n",
" return\n",
"\n",
"\n",
"def delete_sagemaker_pipeline(sm_client, pipeline_name):\n",
" try:\n",
" sm_client.delete_pipeline(\n",
" PipelineName=pipeline_name,\n",
" )\n",
" print(\"{} pipeline deleted\".format(pipeline_name))\n",
" except Exception as e:\n",
" print(\"{} \\n\".format(e))\n",
" return"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"client = sagemaker.Session().sagemaker_client\n",
"delete_model_package_group(client, mpg_name)\n",
"delete_sagemaker_pipeline(client, pipeline_name)"
]
},
{
Expand All @@ -433,9 +709,9 @@
"metadata": {
"instance_type": "ml.t3.medium",
"kernelspec": {
"display_name": "conda_python3",
"display_name": "conda_mxnet_p36",
"language": "python",
"name": "conda_python3"
"name": "conda_mxnet_p36"
},
"language_info": {
"codemirror_mode": {
Expand Down
Loading