diff --git a/hyper-parameter-optimmization/environment.yml b/hyper-parameter-optimmization/environment.yml
new file mode 100644
index 0000000..b908644
--- /dev/null
+++ b/hyper-parameter-optimmization/environment.yml
@@ -0,0 +1,17 @@
+name: pytorch
+channels:
+ - conda-forge
+ - pytorch
+ - defaults
+dependencies:
+ - python=3.7
+ - dask
+ - numpy
+ - pandas
+ - coiled
+ - dask-ml
+ - skorch
+ - scipy
+ - matplotlib
+ - pytorch>1.1.0
+ - s3fs
\ No newline at end of file
diff --git a/hyper-parameter-optimmization/hyper-parameter-optimization.ipynb b/hyper-parameter-optimmization/hyper-parameter-optimization.ipynb
new file mode 100644
index 0000000..ffeff57
--- /dev/null
+++ b/hyper-parameter-optimmization/hyper-parameter-optimization.ipynb
@@ -0,0 +1,528 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Hyperparameter Optimization with Dask and Coiled\n",
+ "\n",
+ "This example will walk through the following:\n",
+ "\n",
+ "* **Getting and processing the data.**\n",
+ "* **Defining a model and parameters.**\n",
+ "* **Finding the best parameters,** and some details on why we're using the chosen search algorithm.\n",
+ "* **Scoring** and deploying.\n",
+ "\n",
+ "All of these tasks will be performed on the New York City Taxi Cab dataset."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Setup cluster"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Creating Cluster. This takes about a minute ...\r"
+ ]
+ }
+ ],
+ "source": [
+ "# Create cluster with Coiled\n",
+ "import coiled\n",
+ "\n",
+ "cluster = coiled.Cluster(\n",
+ " n_workers=20,\n",
+ " configuration=\"coiled-examples/pytorch\",\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "\n",
+ "Client\n",
+ "\n",
+ " | \n",
+ "\n",
+ "Cluster\n",
+ "\n",
+ " - Workers: 20
\n",
+ " - Cores: 80
\n",
+ " - Memory: 343.60 GB
\n",
+ " \n",
+ " | \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Connect Dask to the cluster\n",
+ "import dask.distributed\n",
+ "\n",
+ "client = dask.distributed.Client(cluster)\n",
+ "client"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### ☝️ Don’t forget to click the \"Dashboard\" link above to view the cluster dashboard!"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Get and pre-process data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This example will mirror the Kaggle \"[NYC Taxi Trip Duration][1]\" example with different data.\n",
+ "\n",
+ "These data have records on 84 million taxi rides.\n",
+ "\n",
+ "[1]:https://www.kaggle.com/c/nyc-taxi-trip-duration/"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import dask.dataframe as dd\n",
+ "\n",
+ "features = [\"passenger_count\", \"trip_distance\", \"fare_amount\"]\n",
+ "categorical_features = [\"RatecodeID\", \"payment_type\"]\n",
+ "output = [\"tpep_pickup_datetime\", \"tpep_dropoff_datetime\"]\n",
+ "\n",
+ "df = dd.read_csv(\n",
+ " \"s3://nyc-tlc/trip data/yellow_tripdata_2019-*.csv\", \n",
+ " parse_dates=output,\n",
+ " usecols=features + categorical_features + output,\n",
+ " dtype={\n",
+ " \"passenger_count\": \"UInt8\",\n",
+ " \"RatecodeID\": \"category\",\n",
+ " \"payment_type\": \"category\",\n",
+ " },\n",
+ " blocksize=\"16 MiB\",\n",
+ ")\n",
+ "\n",
+ "df = df.repartition(partition_size=\"10 MiB\").persist()\n",
+ "\n",
+ "# one hot encode the categorical columns\n",
+ "df = df.categorize(categorical_features)\n",
+ "df = dd.get_dummies(df, columns=categorical_features)\n",
+ "\n",
+ "# persist so only download once\n",
+ "df = df.persist()\n",
+ "\n",
+ "data = df[[c for c in df.columns if c not in output]]\n",
+ "data = data.fillna(0)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "durations = (df[\"tpep_dropoff_datetime\"] - df[\"tpep_pickup_datetime\"]).dt.total_seconds() / 60 # minutes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from dask_ml.model_selection import train_test_split\n",
+ "import dask\n",
+ "\n",
+ "X = data.to_dask_array(lengths=True).astype(\"float32\")\n",
+ "y = durations.to_dask_array(lengths=True).astype(\"float32\")\n",
+ "X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=2, shuffle=True)\n",
+ "\n",
+ "# persist the data so it's not re-computed\n",
+ "X_train, X_test, y_train, y_test = dask.persist(X_train, X_test, y_train, y_test)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Define model and hyperparameters"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's use a simple neural network from [PyTorch] using [Skorch], a simple wrapper that provides a Scikit-Learn API for PyTorch.\n",
+ "\n",
+ "This network is only small for demonstration. If desired, we could use much larger networks on GPUs.\n",
+ "\n",
+ "[PyTorch]:https://pytorch.org/\n",
+ "[skorch]:https://skorch.readthedocs.io/en/stable/"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Import our HiddenLayerNet pytorch model from a local torch_model.py module\n",
+ "from torch_model import HiddenLayerNet\n",
+ "# Send module with HiddenLayerNet to workers on cluster\n",
+ "client.upload_file(\"torch_model.py\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "import torch\n",
+ "import torch.optim as optim\n",
+ "import torch.nn as nn\n",
+ "import torch.nn.functional as F\n",
+ "\n",
+ "class HiddenLayerNet(nn.Module):\n",
+ " def __init__(self, n_features=10, n_outputs=1, n_hidden=100, activation=\"relu\"):\n",
+ " super().__init__()\n",
+ " self.fc1 = nn.Linear(n_features, n_hidden)\n",
+ " self.fc2 = nn.Linear(n_hidden, n_outputs)\n",
+ " self.activation = getattr(F, activation)\n",
+ "\n",
+ " def forward(self, x, **kwargs):\n",
+ " return self.fc2(self.activation(self.fc1(x)))"
+ ]
+ }
+ ],
+ "source": [
+ "# Print contents of torch_model.py module\n",
+ "!cat torch_model.py"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import torch\n",
+ "import torch.optim as optim\n",
+ "import torch.nn as nn\n",
+ "from skorch import NeuralNetRegressor\n",
+ "\n",
+ "niceties = {\n",
+ " \"callbacks\": False,\n",
+ " \"warm_start\": True,\n",
+ " \"train_split\": None,\n",
+ " \"max_epochs\": 1,\n",
+ "}\n",
+ "\n",
+ "class NonNanLossRegressor(NeuralNetRegressor):\n",
+ " def get_loss(self, y_pred, y_true, X=None, training=False):\n",
+ " if torch.abs(y_true - y_pred).abs().mean() > 1e6:\n",
+ " return torch.tensor([0.0], requires_grad=True)\n",
+ " return super().get_loss(y_pred, y_true, X=X, training=training)\n",
+ "\n",
+ "model = NonNanLossRegressor(\n",
+ " module=HiddenLayerNet,\n",
+ " module__n_features=X_train.shape[1],\n",
+ " optimizer=optim.SGD,\n",
+ " criterion=nn.MSELoss,\n",
+ " lr=0.0001,\n",
+ " **niceties,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from scipy.stats import loguniform, uniform\n",
+ "\n",
+ "params = {\n",
+ " \"module__activation\": [\"relu\", \"elu\", \"softsign\", \"leaky_relu\", \"rrelu\"],\n",
+ " \"batch_size\": [32, 64, 128, 256],\n",
+ " \"optimizer__lr\": loguniform(1e-4, 1e-3),\n",
+ " \"optimizer__weight_decay\": loguniform(1e-6, 1e-3),\n",
+ " \"optimizer__momentum\": uniform(0, 1),\n",
+ " \"optimizer__nesterov\": [True],\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "All of these parameters control model architecture, execpt for two basic optimizatino parameters, `batch_size` and `learning_rate_init`. They control finding the best model of a particular architecture."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Find the best hyperparameters"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Our search is \"computationally-constrained\" because (hypothetically) it requires GPUs and has a pretty complicated search space (in reality it has neither of those features). And obviously it's \"memory-constrained\" because the dataset doesn't fit in memory.\n",
+ "\n",
+ "[Dask-ML's documentation on hyperparameter searches][2] indicates that we should use `HyperbandSearchCV`.\n",
+ "\n",
+ "[2]:https://ml.dask.org/hyper-parameter-search.html"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from dask_ml.model_selection import HyperbandSearchCV\n",
+ "search = HyperbandSearchCV(model, params, random_state=2, verbose=True, max_iter=9)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "By default, `HyperbandSearchCV` will call `partial_fit` on each chunk of the Dask Array. `HyperbandSearchCV`'s rule of thumb specifies how to train for longer or sample more parameters."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[CV, bracket=2] creating 9 models\n",
+ "[CV, bracket=1] creating 5 models\n",
+ "[CV, bracket=0] creating 3 models\n",
+ "[CV, bracket=0] For training there are between 119153 and 249047 examples in each chunk\n",
+ "[CV, bracket=1] For training there are between 119153 and 249047 examples in each chunk\n",
+ "[CV, bracket=2] For training there are between 119153 and 249047 examples in each chunk\n",
+ "[CV, bracket=1] validation score of 0.0202 received after 1 partial_fit calls\n",
+ "[CV, bracket=0] validation score of -3.3790 received after 1 partial_fit calls\n",
+ "[CV, bracket=1] validation score of 0.0210 received after 3 partial_fit calls\n",
+ "[CV, bracket=2] validation score of 0.0229 received after 1 partial_fit calls\n",
+ "[CV, bracket=1] validation score of -299404463816680.2500 received after 9 partial_fit calls\n",
+ "[CV, bracket=0] validation score of -11.9127 received after 9 partial_fit calls\n",
+ "[CV, bracket=2] validation score of 0.0232 received after 3 partial_fit calls\n",
+ "[CV, bracket=2] validation score of 0.0280 received after 9 partial_fit calls\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "HyperbandSearchCV(estimator=[uninitialized](\n",
+ " module=,\n",
+ " module__n_features=15,\n",
+ "),\n",
+ " max_iter=9,\n",
+ " parameters={'batch_size': [32, 64, 128, 256],\n",
+ " 'module__activation': ['relu', 'elu', 'softsign',\n",
+ " 'leaky_relu', 'rrelu'],\n",
+ " 'optimizer__lr': ,\n",
+ " 'optimizer__momentum': ,\n",
+ " 'optimizer__nesterov': [True],\n",
+ " 'optimizer__weight_decay': },\n",
+ " random_state=2, verbose=True)"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "y_train2 = y_train.reshape(-1, 1).persist()\n",
+ "search.fit(X_train, y_train2)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Score"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "`HyperbandSearchCV` and the like mirror the Scikit-Learn model selection interface, so all attributes of Scikit-Learn's [RandomizedSearchCV][rscv] are available:\n",
+ "\n",
+ "[rscv]:https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.028028356182226544"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "search.best_score_"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'batch_size': 256,\n",
+ " 'module__activation': 'softsign',\n",
+ " 'optimizer__lr': 0.00015404537696021744,\n",
+ " 'optimizer__momentum': 0.15141540401838427,\n",
+ " 'optimizer__nesterov': True,\n",
+ " 'optimizer__weight_decay': 0.000576470051148445}"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "search.best_params_"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[initialized](\n",
+ " module_=HiddenLayerNet(\n",
+ " (fc1): Linear(in_features=15, out_features=100, bias=True)\n",
+ " (fc2): Linear(in_features=100, out_features=1, bias=True)\n",
+ " ),\n",
+ ")"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "search.best_estimator_"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This means we can deploy the best model and score on the testing dataset:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.028248285332490686"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from dask_ml.wrappers import ParallelPostFit\n",
+ "deployed_model = ParallelPostFit(search.best_estimator_)\n",
+ "deployed_model.score(X_test, y_test)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/hyper-parameter-optimmization/torch_model.py b/hyper-parameter-optimmization/torch_model.py
new file mode 100644
index 0000000..94d9a90
--- /dev/null
+++ b/hyper-parameter-optimmization/torch_model.py
@@ -0,0 +1,14 @@
+import torch
+import torch.optim as optim
+import torch.nn as nn
+import torch.nn.functional as F
+
+class HiddenLayerNet(nn.Module):
+ def __init__(self, n_features=10, n_outputs=1, n_hidden=100, activation="relu"):
+ super().__init__()
+ self.fc1 = nn.Linear(n_features, n_hidden)
+ self.fc2 = nn.Linear(n_hidden, n_outputs)
+ self.activation = getattr(F, activation)
+
+ def forward(self, x, **kwargs):
+ return self.fc2(self.activation(self.fc1(x)))
\ No newline at end of file