diff --git a/hyper-parameter-optimmization/environment.yml b/hyper-parameter-optimmization/environment.yml
new file mode 100644
index 0000000..b908644
--- /dev/null
+++ b/hyper-parameter-optimmization/environment.yml
@@ -0,0 +1,17 @@
+name: pytorch
+channels:
+  - conda-forge
+  - pytorch
+  - defaults
+dependencies:
+  - python=3.7
+  - dask
+  - numpy
+  - pandas
+  - coiled
+  - dask-ml
+  - skorch
+  - scipy
+  - matplotlib
+  - pytorch>1.1.0
+  - s3fs
\ No newline at end of file
diff --git a/hyper-parameter-optimmization/hyper-parameter-optimization.ipynb b/hyper-parameter-optimmization/hyper-parameter-optimization.ipynb
new file mode 100644
index 0000000..ffeff57
--- /dev/null
+++ b/hyper-parameter-optimmization/hyper-parameter-optimization.ipynb
@@ -0,0 +1,528 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Hyperparameter Optimization with Dask and Coiled\n",
+    "\n",
+    "This example will walk through the following:\n",
+    "\n",
+    "* **Getting and processing the data.**\n",
+    "* **Defining a model and parameters.**\n",
+    "* **Finding the best parameters,** and some details on why we're using the chosen search algorithm.\n",
+    "* **Scoring** and deploying.\n",
+    "\n",
+    "All of these tasks will be performed on the New York City Taxi Cab dataset."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Setup cluster"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Creating Cluster. This takes about a minute ...\r"
+     ]
+    }
+   ],
+   "source": [
+    "# Create cluster with Coiled\n",
+    "import coiled\n",
+    "\n",
+    "cluster = coiled.Cluster(\n",
+    "    n_workers=20,\n",
+    "    configuration=\"coiled-examples/pytorch\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<table style=\"border: 2px solid white;\">\n",
+       "<tr>\n",
+       "<td style=\"vertical-align: top; border: 0px solid white\">\n",
+       "<h3 style=\"text-align: left;\">Client</h3>\n",
+       "<ul style=\"text-align: left; list-style: none; margin: 0; padding: 0;\">\n",
+       "  <li><b>Scheduler: </b>tls://ec2-3-18-106-62.us-east-2.compute.amazonaws.com:8786</li>\n",
+       "  <li><b>Dashboard: </b><a href='http://ec2-3-18-106-62.us-east-2.compute.amazonaws.com:8787/status' target='_blank'>http://ec2-3-18-106-62.us-east-2.compute.amazonaws.com:8787/status</a></li>\n",
+       "</ul>\n",
+       "</td>\n",
+       "<td style=\"vertical-align: top; border: 0px solid white\">\n",
+       "<h3 style=\"text-align: left;\">Cluster</h3>\n",
+       "<ul style=\"text-align: left; list-style:none; margin: 0; padding: 0;\">\n",
+       "  <li><b>Workers: </b>20</li>\n",
+       "  <li><b>Cores: </b>80</li>\n",
+       "  <li><b>Memory: </b>343.60 GB</li>\n",
+       "</ul>\n",
+       "</td>\n",
+       "</tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "<Client: 'tls://10.1.13.248:8786' processes=20 threads=80, memory=343.60 GB>"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Connect Dask to the cluster\n",
+    "import dask.distributed\n",
+    "\n",
+    "client = dask.distributed.Client(cluster)\n",
+    "client"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### ☝️ Don’t forget to click the \"Dashboard\" link above to view the cluster dashboard!"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Get and pre-process data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This example will mirror the Kaggle \"[NYC Taxi Trip Duration][1]\" example with different data.\n",
+    "\n",
+    "These data have records on 84 million taxi rides.\n",
+    "\n",
+    "[1]:https://www.kaggle.com/c/nyc-taxi-trip-duration/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import dask.dataframe as dd\n",
+    "\n",
+    "features = [\"passenger_count\", \"trip_distance\", \"fare_amount\"]\n",
+    "categorical_features = [\"RatecodeID\", \"payment_type\"]\n",
+    "output = [\"tpep_pickup_datetime\", \"tpep_dropoff_datetime\"]\n",
+    "\n",
+    "df = dd.read_csv(\n",
+    "    \"s3://nyc-tlc/trip data/yellow_tripdata_2019-*.csv\", \n",
+    "    parse_dates=output,\n",
+    "    usecols=features + categorical_features + output,\n",
+    "    dtype={\n",
+    "        \"passenger_count\": \"UInt8\",\n",
+    "        \"RatecodeID\": \"category\",\n",
+    "        \"payment_type\": \"category\",\n",
+    "    },\n",
+    "    blocksize=\"16 MiB\",\n",
+    ")\n",
+    "\n",
+    "df = df.repartition(partition_size=\"10 MiB\").persist()\n",
+    "\n",
+    "# one hot encode the categorical columns\n",
+    "df = df.categorize(categorical_features)\n",
+    "df = dd.get_dummies(df, columns=categorical_features)\n",
+    "\n",
+    "# persist so only download once\n",
+    "df = df.persist()\n",
+    "\n",
+    "data = df[[c for c in df.columns if c not in output]]\n",
+    "data = data.fillna(0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "durations = (df[\"tpep_dropoff_datetime\"] - df[\"tpep_pickup_datetime\"]).dt.total_seconds() / 60  # minutes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from dask_ml.model_selection import train_test_split\n",
+    "import dask\n",
+    "\n",
+    "X = data.to_dask_array(lengths=True).astype(\"float32\")\n",
+    "y = durations.to_dask_array(lengths=True).astype(\"float32\")\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=2, shuffle=True)\n",
+    "\n",
+    "# persist the data so it's not re-computed\n",
+    "X_train, X_test, y_train, y_test = dask.persist(X_train, X_test, y_train, y_test)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Define model and hyperparameters"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's use a simple neural network from [PyTorch] using [Skorch], a simple wrapper that provides a Scikit-Learn API for PyTorch.\n",
+    "\n",
+    "This network is only small for demonstration. If desired, we could use much larger networks on GPUs.\n",
+    "\n",
+    "[PyTorch]:https://pytorch.org/\n",
+    "[skorch]:https://skorch.readthedocs.io/en/stable/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import our HiddenLayerNet pytorch model from a local torch_model.py module\n",
+    "from torch_model import HiddenLayerNet\n",
+    "# Send module with HiddenLayerNet to workers on cluster\n",
+    "client.upload_file(\"torch_model.py\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "import torch\n",
+      "import torch.optim as optim\n",
+      "import torch.nn as nn\n",
+      "import torch.nn.functional as F\n",
+      "\n",
+      "class HiddenLayerNet(nn.Module):\n",
+      "    def __init__(self, n_features=10, n_outputs=1, n_hidden=100, activation=\"relu\"):\n",
+      "        super().__init__()\n",
+      "        self.fc1 = nn.Linear(n_features, n_hidden)\n",
+      "        self.fc2 = nn.Linear(n_hidden, n_outputs)\n",
+      "        self.activation = getattr(F, activation)\n",
+      "\n",
+      "    def forward(self, x, **kwargs):\n",
+      "        return self.fc2(self.activation(self.fc1(x)))"
+     ]
+    }
+   ],
+   "source": [
+    "# Print contents of torch_model.py module\n",
+    "!cat torch_model.py"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import torch.optim as optim\n",
+    "import torch.nn as nn\n",
+    "from skorch import NeuralNetRegressor\n",
+    "\n",
+    "niceties = {\n",
+    "    \"callbacks\": False,\n",
+    "    \"warm_start\": True,\n",
+    "    \"train_split\": None,\n",
+    "    \"max_epochs\": 1,\n",
+    "}\n",
+    "\n",
+    "class NonNanLossRegressor(NeuralNetRegressor):\n",
+    "    def get_loss(self, y_pred, y_true, X=None, training=False):\n",
+    "        if torch.abs(y_true - y_pred).abs().mean() > 1e6:\n",
+    "            return torch.tensor([0.0], requires_grad=True)\n",
+    "        return super().get_loss(y_pred, y_true, X=X, training=training)\n",
+    "\n",
+    "model = NonNanLossRegressor(\n",
+    "    module=HiddenLayerNet,\n",
+    "    module__n_features=X_train.shape[1],\n",
+    "    optimizer=optim.SGD,\n",
+    "    criterion=nn.MSELoss,\n",
+    "    lr=0.0001,\n",
+    "    **niceties,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from scipy.stats import loguniform, uniform\n",
+    "\n",
+    "params = {\n",
+    "    \"module__activation\": [\"relu\", \"elu\", \"softsign\", \"leaky_relu\", \"rrelu\"],\n",
+    "    \"batch_size\": [32, 64, 128, 256],\n",
+    "    \"optimizer__lr\": loguniform(1e-4, 1e-3),\n",
+    "    \"optimizer__weight_decay\": loguniform(1e-6, 1e-3),\n",
+    "    \"optimizer__momentum\": uniform(0, 1),\n",
+    "    \"optimizer__nesterov\": [True],\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "All of these parameters control model architecture, execpt for two basic optimizatino parameters, `batch_size` and `learning_rate_init`. They control finding the best model of a particular architecture."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Find the best hyperparameters"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Our search is \"computationally-constrained\" because (hypothetically) it requires GPUs and has a pretty complicated search space (in reality it has neither of those features). And obviously it's \"memory-constrained\" because the dataset doesn't fit in memory.\n",
+    "\n",
+    "[Dask-ML's documentation on hyperparameter searches][2] indicates that we should use `HyperbandSearchCV`.\n",
+    "\n",
+    "[2]:https://ml.dask.org/hyper-parameter-search.html"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from dask_ml.model_selection import HyperbandSearchCV\n",
+    "search = HyperbandSearchCV(model, params, random_state=2, verbose=True, max_iter=9)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "By default, `HyperbandSearchCV` will call `partial_fit` on each chunk of the Dask Array. `HyperbandSearchCV`'s rule of thumb specifies how to train for longer or sample more parameters."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[CV, bracket=2] creating 9 models\n",
+      "[CV, bracket=1] creating 5 models\n",
+      "[CV, bracket=0] creating 3 models\n",
+      "[CV, bracket=0] For training there are between 119153 and 249047 examples in each chunk\n",
+      "[CV, bracket=1] For training there are between 119153 and 249047 examples in each chunk\n",
+      "[CV, bracket=2] For training there are between 119153 and 249047 examples in each chunk\n",
+      "[CV, bracket=1] validation score of 0.0202 received after 1 partial_fit calls\n",
+      "[CV, bracket=0] validation score of -3.3790 received after 1 partial_fit calls\n",
+      "[CV, bracket=1] validation score of 0.0210 received after 3 partial_fit calls\n",
+      "[CV, bracket=2] validation score of 0.0229 received after 1 partial_fit calls\n",
+      "[CV, bracket=1] validation score of -299404463816680.2500 received after 9 partial_fit calls\n",
+      "[CV, bracket=0] validation score of -11.9127 received after 9 partial_fit calls\n",
+      "[CV, bracket=2] validation score of 0.0232 received after 3 partial_fit calls\n",
+      "[CV, bracket=2] validation score of 0.0280 received after 9 partial_fit calls\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "HyperbandSearchCV(estimator=<class '__main__.NonNanLossRegressor'>[uninitialized](\n",
+       "  module=<class 'torch_model.HiddenLayerNet'>,\n",
+       "  module__n_features=15,\n",
+       "),\n",
+       "                  max_iter=9,\n",
+       "                  parameters={'batch_size': [32, 64, 128, 256],\n",
+       "                              'module__activation': ['relu', 'elu', 'softsign',\n",
+       "                                                     'leaky_relu', 'rrelu'],\n",
+       "                              'optimizer__lr': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7ff9de9e5450>,\n",
+       "                              'optimizer__momentum': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7ff9df575e50>,\n",
+       "                              'optimizer__nesterov': [True],\n",
+       "                              'optimizer__weight_decay': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7ff9de9e5850>},\n",
+       "                  random_state=2, verbose=True)"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "y_train2 = y_train.reshape(-1, 1).persist()\n",
+    "search.fit(X_train, y_train2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Score"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "`HyperbandSearchCV` and the like mirror the Scikit-Learn model selection interface, so all attributes of Scikit-Learn's [RandomizedSearchCV][rscv] are available:\n",
+    "\n",
+    "[rscv]:https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.028028356182226544"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "search.best_score_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'batch_size': 256,\n",
+       " 'module__activation': 'softsign',\n",
+       " 'optimizer__lr': 0.00015404537696021744,\n",
+       " 'optimizer__momentum': 0.15141540401838427,\n",
+       " 'optimizer__nesterov': True,\n",
+       " 'optimizer__weight_decay': 0.000576470051148445}"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "search.best_params_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<class '__main__.NonNanLossRegressor'>[initialized](\n",
+       "  module_=HiddenLayerNet(\n",
+       "    (fc1): Linear(in_features=15, out_features=100, bias=True)\n",
+       "    (fc2): Linear(in_features=100, out_features=1, bias=True)\n",
+       "  ),\n",
+       ")"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "search.best_estimator_"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This means we can deploy the best model and score on the testing dataset:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.028248285332490686"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from dask_ml.wrappers import ParallelPostFit\n",
+    "deployed_model = ParallelPostFit(search.best_estimator_)\n",
+    "deployed_model.score(X_test, y_test)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/hyper-parameter-optimmization/torch_model.py b/hyper-parameter-optimmization/torch_model.py
new file mode 100644
index 0000000..94d9a90
--- /dev/null
+++ b/hyper-parameter-optimmization/torch_model.py
@@ -0,0 +1,14 @@
+import torch
+import torch.optim as optim
+import torch.nn as nn
+import torch.nn.functional as F
+
+class HiddenLayerNet(nn.Module):
+    def __init__(self, n_features=10, n_outputs=1, n_hidden=100, activation="relu"):
+        super().__init__()
+        self.fc1 = nn.Linear(n_features, n_hidden)
+        self.fc2 = nn.Linear(n_hidden, n_outputs)
+        self.activation = getattr(F, activation)
+
+    def forward(self, x, **kwargs):
+        return self.fc2(self.activation(self.fc1(x)))
\ No newline at end of file