Skip to content

Commit 058a7f2

Browse files
committed
refactor sequential notebooks into independent notebooks
1 parent f2112be commit 058a7f2

File tree

3 files changed

+1237
-720
lines changed

3 files changed

+1237
-720
lines changed

use-cases/computer_vision/1-metastases-detection-train-model.ipynb

Lines changed: 0 additions & 710 deletions
This file was deleted.

use-cases/computer_vision/4-metastases-detection-pipeline.ipynb renamed to use-cases/computer_vision/metastases-detection-pipeline.ipynb

Lines changed: 286 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
"cell_type": "markdown",
55
"metadata": {},
66
"source": [
7-
"# Computer Vision for Medical Imaging: Part 4. SageMaker Pipelines\n",
8-
"This notebook is the final part of a 4-part series of techniques and services offer by SageMaker to build a model which predicts if an image of cells contains cancer. This notebook describes how to automate the ML workflow using SageMaker Pipelines."
7+
"# Computer Vision for Medical Imaging - Pipeline Mode\n",
8+
"This notebook showcases techniques and services offer by SageMaker to build a model which predicts if an image of cells contains cancer. This notebook describes how to automate the ML workflow using SageMaker Pipelines."
99
]
1010
},
1111
{
@@ -37,8 +37,7 @@
3737
"metadata": {},
3838
"outputs": [],
3939
"source": [
40-
"%store -r\n",
41-
"%store"
40+
"! pip install --upgrade sagemaker boto3"
4241
]
4342
},
4443
{
@@ -48,6 +47,28 @@
4847
"## Import Libraries"
4948
]
5049
},
50+
{
51+
"cell_type": "code",
52+
"execution_count": null,
53+
"metadata": {},
54+
"outputs": [],
55+
"source": [
56+
"import pip\n",
57+
"\n",
58+
"\n",
59+
"def import_or_install(package):\n",
60+
" try:\n",
61+
" __import__(package)\n",
62+
" except ImportError:\n",
63+
" pip.main([\"install\", package])\n",
64+
"\n",
65+
"\n",
66+
"required_packages = [\"sagemaker\", \"boto3\", \"h5py\", \"tqdm\", \"matplotlib\"]\n",
67+
"\n",
68+
"for package in required_packages:\n",
69+
" import_or_install(package)"
70+
]
71+
},
5172
{
5273
"cell_type": "code",
5374
"execution_count": null,
@@ -59,6 +80,12 @@
5980
"import numpy as np\n",
6081
"import matplotlib.pyplot as plt\n",
6182
"import cv2\n",
83+
"import os\n",
84+
"import zipfile\n",
85+
"import h5py\n",
86+
"import mxnet as mx\n",
87+
"from datetime import datetime\n",
88+
"from tqdm import tqdm\n",
6289
"\n",
6390
"from sagemaker.workflow.pipeline import Pipeline\n",
6491
"from sagemaker.workflow.steps import CreateModelStep\n",
@@ -96,6 +123,208 @@
96123
"bucket = sagemaker.Session().default_bucket()"
97124
]
98125
},
126+
{
127+
"cell_type": "markdown",
128+
"metadata": {},
129+
"source": [
130+
"## Load Dataset"
131+
]
132+
},
133+
{
134+
"cell_type": "code",
135+
"execution_count": null,
136+
"metadata": {},
137+
"outputs": [],
138+
"source": [
139+
"# check if directory exists\n",
140+
"if not os.path.isdir(\"data\"):\n",
141+
" os.mkdir(\"data\")\n",
142+
"\n",
143+
"# download zip file from public s3 bucket\n",
144+
"!wget -P data https://sagemaker-sample-files.s3.amazonaws.com/datasets/image/pcam/medical_images.zip"
145+
]
146+
},
147+
{
148+
"cell_type": "code",
149+
"execution_count": null,
150+
"metadata": {},
151+
"outputs": [],
152+
"source": [
153+
"with zipfile.ZipFile(\"data/medical_images.zip\") as zf:\n",
154+
" zf.extractall()\n",
155+
"with open(\"data/camelyon16_tiles.h5\", \"rb\") as hf:\n",
156+
" f = h5py.File(hf, \"r\")\n",
157+
"\n",
158+
" X = f[\"x\"][()]\n",
159+
" y = f[\"y\"][()]\n",
160+
"\n",
161+
"print(\"Shape of X:\", X.shape)\n",
162+
"print(\"Shape of y:\", y.shape)"
163+
]
164+
},
165+
{
166+
"cell_type": "code",
167+
"execution_count": null,
168+
"metadata": {},
169+
"outputs": [],
170+
"source": [
171+
"# write to session s3 bucket\n",
172+
"s3_client.upload_file(\"data/medical_images.zip\", bucket, f\"data/medical_images.zip\")"
173+
]
174+
},
175+
{
176+
"cell_type": "code",
177+
"execution_count": null,
178+
"metadata": {},
179+
"outputs": [],
180+
"source": [
181+
"# delete local copy\n",
182+
"import os\n",
183+
"\n",
184+
"if os.path.exists(\"data/medical_images.zip\"):\n",
185+
" os.remove(\"data/medical_images.zip\")\n",
186+
"else:\n",
187+
" print(\"The file does not exist\")"
188+
]
189+
},
190+
{
191+
"cell_type": "markdown",
192+
"metadata": {},
193+
"source": [
194+
"## View Sample Images from Dataset"
195+
]
196+
},
197+
{
198+
"cell_type": "code",
199+
"execution_count": null,
200+
"metadata": {},
201+
"outputs": [],
202+
"source": [
203+
"def preview_images(X, y, n, cols):\n",
204+
" sample_images = X[:n]\n",
205+
" sample_labels = y[:n]\n",
206+
"\n",
207+
" rows = int(np.ceil(n / cols))\n",
208+
" fig, axs = plt.subplots(rows, cols, figsize=(11.5, 7))\n",
209+
"\n",
210+
" for i, ax in enumerate(axs.flatten()):\n",
211+
" image = sample_images[i]\n",
212+
" label = sample_labels[i]\n",
213+
" ax.imshow(image)\n",
214+
" ax.axis(\"off\")\n",
215+
" ax.set_title(f\"Label: {label}\")\n",
216+
"\n",
217+
" plt.tight_layout()\n",
218+
"\n",
219+
"\n",
220+
"preview_images(X, y, 15, 5)"
221+
]
222+
},
223+
{
224+
"cell_type": "markdown",
225+
"metadata": {},
226+
"source": [
227+
"## Shuffle and Split Dataset"
228+
]
229+
},
230+
{
231+
"cell_type": "code",
232+
"execution_count": null,
233+
"metadata": {},
234+
"outputs": [],
235+
"source": [
236+
"from sklearn.model_selection import train_test_split\n",
237+
"\n",
238+
"X_numpy = X[:]\n",
239+
"y_numpy = y[:]\n",
240+
"\n",
241+
"X_train, X_test, y_train, y_test = train_test_split(\n",
242+
" X_numpy, y_numpy, test_size=1000, random_state=0\n",
243+
")\n",
244+
"X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=2000, random_state=1)\n",
245+
"\n",
246+
"print(X_train.shape)\n",
247+
"print(X_val.shape)\n",
248+
"print(X_test.shape)"
249+
]
250+
},
251+
{
252+
"cell_type": "markdown",
253+
"metadata": {},
254+
"source": [
255+
"## Convert Splits to RecordIO Format"
256+
]
257+
},
258+
{
259+
"cell_type": "code",
260+
"execution_count": null,
261+
"metadata": {},
262+
"outputs": [],
263+
"source": [
264+
"def write_to_recordio(X: np.ndarray, y: np.ndarray, prefix: str):\n",
265+
" record = mx.recordio.MXIndexedRecordIO(idx_path=f\"{prefix}.idx\", uri=f\"{prefix}.rec\", flag=\"w\")\n",
266+
" for idx, arr in enumerate(tqdm(X)):\n",
267+
" header = mx.recordio.IRHeader(0, y[idx], idx, 0)\n",
268+
" s = mx.recordio.pack_img(\n",
269+
" header,\n",
270+
" arr,\n",
271+
" quality=95,\n",
272+
" img_fmt=\".jpg\",\n",
273+
" )\n",
274+
" record.write_idx(idx, s)\n",
275+
" record.close()"
276+
]
277+
},
278+
{
279+
"cell_type": "code",
280+
"execution_count": null,
281+
"metadata": {},
282+
"outputs": [],
283+
"source": [
284+
"write_to_recordio(X_train, y_train, prefix=\"data/train\")\n",
285+
"write_to_recordio(X_val, y_val, prefix=\"data/val\")\n",
286+
"write_to_recordio(X_test, y_test, prefix=\"data/test\")"
287+
]
288+
},
289+
{
290+
"cell_type": "markdown",
291+
"metadata": {},
292+
"source": [
293+
"## Upload Data Splits to S3"
294+
]
295+
},
296+
{
297+
"cell_type": "code",
298+
"execution_count": null,
299+
"metadata": {},
300+
"outputs": [],
301+
"source": [
302+
"prefix = \"cv-metastasis\"\n",
303+
"\n",
304+
"try:\n",
305+
" s3_client.create_bucket(\n",
306+
" Bucket=bucket, ACL=\"private\", CreateBucketConfiguration={\"LocationConstraint\": region}\n",
307+
" )\n",
308+
" print(f\"Created S3 bucket: {bucket}\")\n",
309+
"\n",
310+
"except Exception as e:\n",
311+
" if e.response[\"Error\"][\"Code\"] == \"BucketAlreadyOwnedByYou\":\n",
312+
" print(f\"Using existing bucket: {bucket}\")\n",
313+
" else:\n",
314+
" raise (e)"
315+
]
316+
},
317+
{
318+
"cell_type": "code",
319+
"execution_count": null,
320+
"metadata": {},
321+
"outputs": [],
322+
"source": [
323+
"s3_client.upload_file(\"data/train.rec\", bucket, f\"{prefix}/data/train/train.rec\")\n",
324+
"s3_client.upload_file(\"data/val.rec\", bucket, f\"{prefix}/data/val/val.rec\")\n",
325+
"s3_client.upload_file(\"data/test.rec\", bucket, f\"{prefix}/data/test/test.rec\")"
326+
]
327+
},
99328
{
100329
"cell_type": "markdown",
101330
"metadata": {},
@@ -110,13 +339,15 @@
110339
"outputs": [],
111340
"source": [
112341
"training_image = sagemaker.image_uris.retrieve(\"image-classification\", region)\n",
342+
"num_training_samples = X_train.shape[0]\n",
343+
"num_classes = len(np.unique(y_train))\n",
113344
"\n",
114345
"hyperparameters = {\n",
115346
" \"num_layers\": 18,\n",
116347
" \"use_pretrained_model\": 1,\n",
117348
" \"augmentation_type\": \"crop_color_transform\",\n",
118349
" \"image_shape\": \"3,96,96\",\n",
119-
" \"num_classes\": 2,\n",
350+
" \"num_classes\": num_classes,\n",
120351
" \"num_training_samples\": num_training_samples,\n",
121352
" \"mini_batch_size\": 64,\n",
122353
" \"epochs\": 5,\n",
@@ -255,6 +486,8 @@
255486
"metadata": {},
256487
"outputs": [],
257488
"source": [
489+
"mpg_name = \"cv-metastasis-{}\".format(datetime.now().strftime(\"%Y-%m-%d-%H-%M-%S\"))\n",
490+
"\n",
258491
"model_approval_status = ParameterString(\n",
259492
" name=\"ModelApprovalStatus\", default_value=\"PendingManualApproval\"\n",
260493
")\n",
@@ -287,7 +520,7 @@
287520
"source": [
288521
"model = sagemaker.model.Model(\n",
289522
" name=f\"{mpg_name}-pipline\",\n",
290-
" image_uri=train_step.properties.AlgorithmSpecification.TrainingImage,\n",
523+
" image_uri=training_image,\n",
291524
" model_data=train_step.properties.ModelArtifacts.S3ModelArtifacts,\n",
292525
" sagemaker_session=sagemaker_session,\n",
293526
" role=sagemaker_role,\n",
@@ -315,6 +548,7 @@
315548
" Filename=\"deploy_model.py\", Bucket=bucket, Key=f\"{prefix}/code/deploy_model.py\"\n",
316549
")\n",
317550
"deploy_model_script_uri = f\"s3://{bucket}/{prefix}/code/deploy_model.py\"\n",
551+
"deploy_instance_type = \"ml.m4.xlarge\"\n",
318552
"\n",
319553
"deploy_model_processor = SKLearnProcessor(\n",
320554
" framework_version=\"0.23-1\",\n",
@@ -355,7 +589,7 @@
355589
"metadata": {},
356590
"outputs": [],
357591
"source": [
358-
"pipeline_name = f\"{prefix}-pipeline\"\n",
592+
"pipeline_name = \"{}-pipeline-{}\".format(prefix, datetime.now().strftime(\"%Y-%m-%d-%H-%M-%S\"))\n",
359593
"\n",
360594
"pipeline = Pipeline(\n",
361595
" name=pipeline_name,\n",
@@ -419,7 +653,49 @@
419653
"metadata": {},
420654
"outputs": [],
421655
"source": [
422-
"best_model.sagemaker_session.delete_endpoint(mpg_name)"
656+
"def delete_model_package_group(sm_client, package_group_name):\n",
657+
" try:\n",
658+
" model_versions = sm_client.list_model_packages(ModelPackageGroupName=package_group_name)\n",
659+
"\n",
660+
" except Exception as e:\n",
661+
" print(\"{} \\n\".format(e))\n",
662+
" return\n",
663+
"\n",
664+
" for model_version in model_versions[\"ModelPackageSummaryList\"]:\n",
665+
" try:\n",
666+
" sm_client.delete_model_package(ModelPackageName=model_version[\"ModelPackageArn\"])\n",
667+
" except Exception as e:\n",
668+
" print(\"{} \\n\".format(e))\n",
669+
" time.sleep(0.5) # Ensure requests aren't throttled\n",
670+
"\n",
671+
" try:\n",
672+
" sm_client.delete_model_package_group(ModelPackageGroupName=package_group_name)\n",
673+
" print(\"{} model package group deleted\".format(package_group_name))\n",
674+
" except Exception as e:\n",
675+
" print(\"{} \\n\".format(e))\n",
676+
" return\n",
677+
"\n",
678+
"\n",
679+
"def delete_sagemaker_pipeline(sm_client, pipeline_name):\n",
680+
" try:\n",
681+
" sm_client.delete_pipeline(\n",
682+
" PipelineName=pipeline_name,\n",
683+
" )\n",
684+
" print(\"{} pipeline deleted\".format(pipeline_name))\n",
685+
" except Exception as e:\n",
686+
" print(\"{} \\n\".format(e))\n",
687+
" return"
688+
]
689+
},
690+
{
691+
"cell_type": "code",
692+
"execution_count": null,
693+
"metadata": {},
694+
"outputs": [],
695+
"source": [
696+
"client = sagemaker.Session().sagemaker_client\n",
697+
"delete_model_package_group(client, mpg_name)\n",
698+
"delete_sagemaker_pipeline(client, pipeline_name)"
423699
]
424700
},
425701
{
@@ -433,9 +709,9 @@
433709
"metadata": {
434710
"instance_type": "ml.t3.medium",
435711
"kernelspec": {
436-
"display_name": "conda_python3",
712+
"display_name": "conda_mxnet_p36",
437713
"language": "python",
438-
"name": "conda_python3"
714+
"name": "conda_mxnet_p36"
439715
},
440716
"language_info": {
441717
"codemirror_mode": {

0 commit comments

Comments
 (0)