diff --git a/examples/vision/captcha_ocr.py b/examples/vision/captcha_ocr.py index fc4430b1f3..740fdebc6b 100644 --- a/examples/vision/captcha_ocr.py +++ b/examples/vision/captcha_ocr.py @@ -22,6 +22,10 @@ ## Setup """ +import os + +os.environ["KERAS_BACKEND"] = "tensorflow" + import os import numpy as np import matplotlib.pyplot as plt @@ -30,9 +34,8 @@ from collections import Counter import tensorflow as tf -from tensorflow import keras -from tensorflow.keras import layers - +import keras +from keras import layers """ ## Load the data: [Captcha Images](https://www.kaggle.com/fournierp/captcha-version-2-images) @@ -180,10 +183,64 @@ def encode_single_sample(img_path, label): """ +def ctc_batch_cost(y_true, y_pred, input_length, label_length): + label_length = tf.cast(tf.squeeze(label_length, axis=-1), tf.int32) + input_length = tf.cast(tf.squeeze(input_length, axis=-1), tf.int32) + sparse_labels = tf.cast(ctc_label_dense_to_sparse(y_true, label_length), tf.int32) + + y_pred = tf.math.log(tf.transpose(y_pred, perm=[1, 0, 2]) + keras.backend.epsilon()) + + return tf.expand_dims( + tf.compat.v1.nn.ctc_loss( + inputs=y_pred, labels=sparse_labels, sequence_length=input_length + ), + 1, + ) + + +def ctc_label_dense_to_sparse(labels, label_lengths): + label_shape = tf.shape(labels) + num_batches_tns = tf.stack([label_shape[0]]) + max_num_labels_tns = tf.stack([label_shape[1]]) + + def range_less_than(old_input, current_input): + return tf.expand_dims(tf.range(tf.shape(old_input)[1]), 0) < tf.fill( + max_num_labels_tns, current_input + ) + + init = tf.cast(tf.fill([1, label_shape[1]], 0), tf.bool) + dense_mask = tf.compat.v1.scan( + range_less_than, label_lengths, initializer=init, parallel_iterations=1 + ) + dense_mask = dense_mask[:, 0, :] + + label_array = tf.reshape( + tf.tile(tf.range(0, label_shape[1]), num_batches_tns), label_shape + ) + label_ind = tf.compat.v1.boolean_mask(label_array, dense_mask) + + batch_array = tf.transpose( + tf.reshape( + tf.tile(tf.range(0, label_shape[0]), max_num_labels_tns), + tf.reverse(label_shape, [0]), + ) + ) + batch_ind = tf.compat.v1.boolean_mask(batch_array, dense_mask) + indices = tf.transpose( + tf.reshape(tf.concat([batch_ind, label_ind], axis=0), [2, -1]) + ) + + vals_sparse = tf.compat.v1.gather_nd(labels, indices) + + return tf.SparseTensor( + tf.cast(indices, tf.int64), vals_sparse, tf.cast(label_shape, tf.int64) + ) + + class CTCLayer(layers.Layer): def __init__(self, name=None): super().__init__(name=name) - self.loss_fn = keras.backend.ctc_batch_cost + self.loss_fn = ctc_batch_cost def call(self, y_true, y_pred): # Compute the training-time loss value and add it @@ -272,7 +329,8 @@ def build_model(): """ -epochs = 100 +# TODO restore epoch count. +epochs = 2 early_stopping_patience = 10 # Add early stopping early_stopping = keras.callbacks.EarlyStopping( @@ -296,9 +354,33 @@ def build_model(): """ +def ctc_decode(y_pred, input_length, greedy=True, beam_width=100, top_paths=1): + input_shape = tf.shape(y_pred) + num_samples, num_steps = input_shape[0], input_shape[1] + y_pred = tf.math.log(tf.transpose(y_pred, perm=[1, 0, 2]) + keras.backend.epsilon()) + input_length = tf.cast(input_length, tf.int32) + + if greedy: + (decoded, log_prob) = tf.nn.ctc_greedy_decoder( + inputs=y_pred, sequence_length=input_length + ) + else: + (decoded, log_prob) = tf.compat.v1.nn.ctc_beam_search_decoder( + inputs=y_pred, + sequence_length=input_length, + beam_width=beam_width, + top_paths=top_paths, + ) + decoded_dense = [] + for st in decoded: + st = tf.SparseTensor(st.indices, st.values, (num_samples, num_steps)) + decoded_dense.append(tf.sparse.to_dense(sp_input=st, default_value=-1)) + return (decoded_dense, log_prob) + + # Get the prediction model by extracting layers till the output layer prediction_model = keras.models.Model( - model.get_layer(name="image").input, model.get_layer(name="dense2").output + model.input[0], model.get_layer(name="dense2").output ) prediction_model.summary() @@ -307,7 +389,7 @@ def build_model(): def decode_batch_predictions(pred): input_len = np.ones(pred.shape[0]) * pred.shape[1] # Use greedy search. For complex tasks, you can use beam search - results = keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0][ + results = ctc_decode(pred, input_length=input_len, greedy=True)[0][0][ :, :max_length ] # Iterate over the results and get back the text diff --git a/examples/vision/img/captcha_ocr/captcha_ocr_13_0.png b/examples/vision/img/captcha_ocr/captcha_ocr_13_0.png index d8f4b5e135..946d8b72bd 100644 Binary files a/examples/vision/img/captcha_ocr/captcha_ocr_13_0.png and b/examples/vision/img/captcha_ocr/captcha_ocr_13_0.png differ diff --git a/examples/vision/img/captcha_ocr/captcha_ocr_19_6.png b/examples/vision/img/captcha_ocr/captcha_ocr_19_6.png new file mode 100644 index 0000000000..ea2588e453 Binary files /dev/null and b/examples/vision/img/captcha_ocr/captcha_ocr_19_6.png differ diff --git a/examples/vision/ipynb/captcha_ocr.ipynb b/examples/vision/ipynb/captcha_ocr.ipynb index b34e3d9f37..5cbb2c5a92 100644 --- a/examples/vision/ipynb/captcha_ocr.ipynb +++ b/examples/vision/ipynb/captcha_ocr.ipynb @@ -41,12 +41,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ + "import os\n", + "\n", + "os.environ[\"KERAS_BACKEND\"] = \"tensorflow\"\n", + "\n", "import os\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", @@ -55,8 +59,8 @@ "from collections import Counter\n", "\n", "import tensorflow as tf\n", - "from tensorflow import keras\n", - "from tensorflow.keras import layers\n" + "import keras\n", + "from keras import layers" ] }, { @@ -71,7 +75,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 0, "metadata": { "colab_type": "code" }, @@ -97,7 +101,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 0, "metadata": { "colab_type": "code" }, @@ -133,7 +137,8 @@ "downsample_factor = 4\n", "\n", "# Maximum length of any captcha in the dataset\n", - "max_length = max([len(label) for label in labels])\n" + "max_length = max([len(label) for label in labels])\n", + "" ] }, { @@ -147,7 +152,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 0, "metadata": { "colab_type": "code" }, @@ -155,9 +160,7 @@ "source": [ "\n", "# Mapping characters to integers\n", - "char_to_num = layers.StringLookup(\n", - " vocabulary=list(characters), mask_token=None\n", - ")\n", + "char_to_num = layers.StringLookup(vocabulary=list(characters), mask_token=None)\n", "\n", "# Mapping integers back to original characters\n", "num_to_char = layers.StringLookup(\n", @@ -199,7 +202,8 @@ " # 6. Map the characters in label to numbers\n", " label = char_to_num(tf.strings.unicode_split(label, input_encoding=\"UTF-8\"))\n", " # 7. Return a dict as our model is expecting two inputs\n", - " return {\"image\": img, \"label\": label}\n" + " return {\"image\": img, \"label\": label}\n", + "" ] }, { @@ -213,7 +217,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 0, "metadata": { "colab_type": "code" }, @@ -222,18 +226,14 @@ "\n", "train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))\n", "train_dataset = (\n", - " train_dataset.map(\n", - " encode_single_sample, num_parallel_calls=tf.data.AUTOTUNE\n", - " )\n", + " train_dataset.map(encode_single_sample, num_parallel_calls=tf.data.AUTOTUNE)\n", " .batch(batch_size)\n", " .prefetch(buffer_size=tf.data.AUTOTUNE)\n", ")\n", "\n", "validation_dataset = tf.data.Dataset.from_tensor_slices((x_valid, y_valid))\n", "validation_dataset = (\n", - " validation_dataset.map(\n", - " encode_single_sample, num_parallel_calls=tf.data.AUTOTUNE\n", - " )\n", + " validation_dataset.map(encode_single_sample, num_parallel_calls=tf.data.AUTOTUNE)\n", " .batch(batch_size)\n", " .prefetch(buffer_size=tf.data.AUTOTUNE)\n", ")" @@ -250,7 +250,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 0, "metadata": { "colab_type": "code" }, @@ -281,17 +281,71 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ + "\n", + "def ctc_batch_cost(y_true, y_pred, input_length, label_length):\n", + " label_length = tf.cast(tf.squeeze(label_length, axis=-1), tf.int32)\n", + " input_length = tf.cast(tf.squeeze(input_length, axis=-1), tf.int32)\n", + " sparse_labels = tf.cast(ctc_label_dense_to_sparse(y_true, label_length), tf.int32)\n", + "\n", + " y_pred = tf.math.log(tf.transpose(y_pred, perm=[1, 0, 2]) + keras.backend.epsilon())\n", + "\n", + " return tf.expand_dims(\n", + " tf.compat.v1.nn.ctc_loss(\n", + " inputs=y_pred, labels=sparse_labels, sequence_length=input_length\n", + " ),\n", + " 1,\n", + " )\n", + "\n", + "\n", + "def ctc_label_dense_to_sparse(labels, label_lengths):\n", + " label_shape = tf.shape(labels)\n", + " num_batches_tns = tf.stack([label_shape[0]])\n", + " max_num_labels_tns = tf.stack([label_shape[1]])\n", + "\n", + " def range_less_than(old_input, current_input):\n", + " return tf.expand_dims(tf.range(tf.shape(old_input)[1]), 0) < tf.fill(\n", + " max_num_labels_tns, current_input\n", + " )\n", + "\n", + " init = tf.cast(tf.fill([1, label_shape[1]], 0), tf.bool)\n", + " dense_mask = tf.compat.v1.scan(\n", + " range_less_than, label_lengths, initializer=init, parallel_iterations=1\n", + " )\n", + " dense_mask = dense_mask[:, 0, :]\n", + "\n", + " label_array = tf.reshape(\n", + " tf.tile(tf.range(0, label_shape[1]), num_batches_tns), label_shape\n", + " )\n", + " label_ind = tf.compat.v1.boolean_mask(label_array, dense_mask)\n", + "\n", + " batch_array = tf.transpose(\n", + " tf.reshape(\n", + " tf.tile(tf.range(0, label_shape[0]), max_num_labels_tns),\n", + " tf.reverse(label_shape, [0]),\n", + " )\n", + " )\n", + " batch_ind = tf.compat.v1.boolean_mask(batch_array, dense_mask)\n", + " indices = tf.transpose(\n", + " tf.reshape(tf.concat([batch_ind, label_ind], axis=0), [2, -1])\n", + " )\n", + "\n", + " vals_sparse = tf.compat.v1.gather_nd(labels, indices)\n", + "\n", + " return tf.SparseTensor(\n", + " tf.cast(indices, tf.int64), vals_sparse, tf.cast(label_shape, tf.int64)\n", + " )\n", + "\n", "\n", "class CTCLayer(layers.Layer):\n", " def __init__(self, name=None):\n", " super().__init__(name=name)\n", - " self.loss_fn = keras.backend.ctc_batch_cost\n", + " self.loss_fn = ctc_batch_cost\n", "\n", " def call(self, y_true, y_pred):\n", " # Compute the training-time loss value and add it\n", @@ -387,14 +441,15 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ "\n", - "epochs = 100\n", + "# TODO restore epoch count.\n", + "epochs = 2\n", "early_stopping_patience = 10\n", "# Add early stopping\n", "early_stopping = keras.callbacks.EarlyStopping(\n", @@ -407,7 +462,8 @@ " validation_data=validation_dataset,\n", " epochs=epochs,\n", " callbacks=[early_stopping],\n", - ")\n" + ")\n", + "" ] }, { @@ -418,30 +474,55 @@ "source": [ "## Inference\n", "\n", - "You can use the trained model hosted on [Hugging Face Hub](https://huggingface.co/keras-io/ocr-for-captcha) \n", + "You can use the trained model hosted on [Hugging Face Hub](https://huggingface.co/keras-io/ocr-for-captcha)\n", "and try the demo on [Hugging Face Spaces](https://huggingface.co/spaces/keras-io/ocr-for-captcha)." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 0, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ + "\n", + "def ctc_decode(y_pred, input_length, greedy=True, beam_width=100, top_paths=1):\n", + " input_shape = tf.shape(y_pred)\n", + " num_samples, num_steps = input_shape[0], input_shape[1]\n", + " y_pred = tf.math.log(tf.transpose(y_pred, perm=[1, 0, 2]) + keras.backend.epsilon())\n", + " input_length = tf.cast(input_length, tf.int32)\n", + "\n", + " if greedy:\n", + " (decoded, log_prob) = tf.nn.ctc_greedy_decoder(\n", + " inputs=y_pred, sequence_length=input_length\n", + " )\n", + " else:\n", + " (decoded, log_prob) = tf.compat.v1.nn.ctc_beam_search_decoder(\n", + " inputs=y_pred,\n", + " sequence_length=input_length,\n", + " beam_width=beam_width,\n", + " top_paths=top_paths,\n", + " )\n", + " decoded_dense = []\n", + " for st in decoded:\n", + " st = tf.SparseTensor(st.indices, st.values, (num_samples, num_steps))\n", + " decoded_dense.append(tf.sparse.to_dense(sp_input=st, default_value=-1))\n", + " return (decoded_dense, log_prob)\n", + "\n", "\n", "# Get the prediction model by extracting layers till the output layer\n", "prediction_model = keras.models.Model(\n", - " model.get_layer(name=\"image\").input, model.get_layer(name=\"dense2\").output\n", + " model.input[0], model.get_layer(name=\"dense2\").output\n", ")\n", "prediction_model.summary()\n", "\n", + "\n", "# A utility function to decode the output of the network\n", "def decode_batch_predictions(pred):\n", " input_len = np.ones(pred.shape[0]) * pred.shape[1]\n", " # Use greedy search. For complex tasks, you can use beam search\n", - " results = keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0][\n", + " results = ctc_decode(pred, input_length=input_len, greedy=True)[0][0][\n", " :, :max_length\n", " ]\n", " # Iterate over the results and get back the text\n", @@ -506,4 +587,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} +} \ No newline at end of file diff --git a/examples/vision/md/captcha_ocr.md b/examples/vision/md/captcha_ocr.md index 75329fe5f2..b9e41ecf5b 100644 --- a/examples/vision/md/captcha_ocr.md +++ b/examples/vision/md/captcha_ocr.md @@ -25,6 +25,10 @@ in the developer guides. ```python +import os + +os.environ["KERAS_BACKEND"] = "tensorflow" + import os import numpy as np import matplotlib.pyplot as plt @@ -33,9 +37,8 @@ from pathlib import Path from collections import Counter import tensorflow as tf -from tensorflow import keras -from tensorflow.keras import layers - +import keras +from keras import layers ``` --- @@ -52,8 +55,8 @@ Let's download the data. ``` % Total % Received % Xferd Average Speed Time Time Time Current Dload Upload Total Spent Left Speed -100 159 100 159 0 0 164 0 --:--:-- --:--:-- --:--:-- 164 -100 8863k 100 8863k 0 0 4882k 0 0:00:01 0:00:01 --:--:-- 33.0M + 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0 +100 8863k 100 8863k 0 0 19.6M 0 --:--:-- --:--:-- --:--:-- 19.6M ``` @@ -105,7 +108,7 @@ max_length = max([len(label) for label in labels]) Number of images found: 1040 Number of labels found: 1040 Number of unique characters: 19 -Characters present: {'d', 'w', 'y', '4', 'f', '6', 'g', 'e', '3', '5', 'p', 'x', '2', 'c', '7', 'n', 'b', '8', 'm'} +Characters present: ['2', '3', '4', '5', '6', '7', '8', 'b', 'c', 'd', 'e', 'f', 'g', 'm', 'n', 'p', 'w', 'x', 'y'] ``` @@ -116,9 +119,7 @@ Characters present: {'d', 'w', 'y', '4', 'f', '6', 'g', 'e', '3', '5', 'p', 'x' ```python # Mapping characters to integers -char_to_num = layers.StringLookup( - vocabulary=list(characters), mask_token=None -) +char_to_num = layers.StringLookup(vocabulary=list(characters), mask_token=None) # Mapping integers back to original characters num_to_char = layers.StringLookup( @@ -172,18 +173,14 @@ def encode_single_sample(img_path, label): train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)) train_dataset = ( - train_dataset.map( - encode_single_sample, num_parallel_calls=tf.data.AUTOTUNE - ) + train_dataset.map(encode_single_sample, num_parallel_calls=tf.data.AUTOTUNE) .batch(batch_size) .prefetch(buffer_size=tf.data.AUTOTUNE) ) validation_dataset = tf.data.Dataset.from_tensor_slices((x_valid, y_valid)) validation_dataset = ( - validation_dataset.map( - encode_single_sample, num_parallel_calls=tf.data.AUTOTUNE - ) + validation_dataset.map(encode_single_sample, num_parallel_calls=tf.data.AUTOTUNE) .batch(batch_size) .prefetch(buffer_size=tf.data.AUTOTUNE) ) @@ -209,7 +206,9 @@ plt.show() ``` +  + --- @@ -218,10 +217,64 @@ plt.show() ```python +def ctc_batch_cost(y_true, y_pred, input_length, label_length): + label_length = tf.cast(tf.squeeze(label_length, axis=-1), tf.int32) + input_length = tf.cast(tf.squeeze(input_length, axis=-1), tf.int32) + sparse_labels = tf.cast(ctc_label_dense_to_sparse(y_true, label_length), tf.int32) + + y_pred = tf.math.log(tf.transpose(y_pred, perm=[1, 0, 2]) + keras.backend.epsilon()) + + return tf.expand_dims( + tf.compat.v1.nn.ctc_loss( + inputs=y_pred, labels=sparse_labels, sequence_length=input_length + ), + 1, + ) + + +def ctc_label_dense_to_sparse(labels, label_lengths): + label_shape = tf.shape(labels) + num_batches_tns = tf.stack([label_shape[0]]) + max_num_labels_tns = tf.stack([label_shape[1]]) + + def range_less_than(old_input, current_input): + return tf.expand_dims(tf.range(tf.shape(old_input)[1]), 0) < tf.fill( + max_num_labels_tns, current_input + ) + + init = tf.cast(tf.fill([1, label_shape[1]], 0), tf.bool) + dense_mask = tf.compat.v1.scan( + range_less_than, label_lengths, initializer=init, parallel_iterations=1 + ) + dense_mask = dense_mask[:, 0, :] + + label_array = tf.reshape( + tf.tile(tf.range(0, label_shape[1]), num_batches_tns), label_shape + ) + label_ind = tf.compat.v1.boolean_mask(label_array, dense_mask) + + batch_array = tf.transpose( + tf.reshape( + tf.tile(tf.range(0, label_shape[0]), max_num_labels_tns), + tf.reverse(label_shape, [0]), + ) + ) + batch_ind = tf.compat.v1.boolean_mask(batch_array, dense_mask) + indices = tf.transpose( + tf.reshape(tf.concat([batch_ind, label_ind], axis=0), [2, -1]) + ) + + vals_sparse = tf.compat.v1.gather_nd(labels, indices) + + return tf.SparseTensor( + tf.cast(indices, tf.int64), vals_sparse, tf.cast(label_shape, tf.int64) + ) + + class CTCLayer(layers.Layer): def __init__(self, name=None): super().__init__(name=name) - self.loss_fn = keras.backend.ctc_batch_cost + self.loss_fn = ctc_batch_cost def call(self, y_true, y_pred): # Compute the training-time loss value and add it @@ -306,53 +359,80 @@ model = build_model() model.summary() ``` -
Model: "ocr_model_v1"
+
+
+
+
+
+┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┓ +┃ Layer (type) ┃ Output Shape ┃ Param # ┃ Connected to ┃ +┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━┩ +│ image (InputLayer) │ (None, 200, 50, │ 0 │ - │ +│ │ 1) │ │ │ +├─────────────────────┼───────────────────┼─────────┼──────────────────────┤ +│ Conv1 (Conv2D) │ (None, 200, 50, │ 320 │ image[0][0] │ +│ │ 32) │ │ │ +├─────────────────────┼───────────────────┼─────────┼──────────────────────┤ +│ pool1 │ (None, 100, 25, │ 0 │ Conv1[0][0] │ +│ (MaxPooling2D) │ 32) │ │ │ +├─────────────────────┼───────────────────┼─────────┼──────────────────────┤ +│ Conv2 (Conv2D) │ (None, 100, 25, │ 18,496 │ pool1[0][0] │ +│ │ 64) │ │ │ +├─────────────────────┼───────────────────┼─────────┼──────────────────────┤ +│ pool2 │ (None, 50, 12, │ 0 │ Conv2[0][0] │ +│ (MaxPooling2D) │ 64) │ │ │ +├─────────────────────┼───────────────────┼─────────┼──────────────────────┤ +│ reshape (Reshape) │ (None, 50, 768) │ 0 │ pool2[0][0] │ +├─────────────────────┼───────────────────┼─────────┼──────────────────────┤ +│ dense1 (Dense) │ (None, 50, 64) │ 49,216 │ reshape[0][0] │ +├─────────────────────┼───────────────────┼─────────┼──────────────────────┤ +│ dropout (Dropout) │ (None, 50, 64) │ 0 │ dense1[0][0] │ +├─────────────────────┼───────────────────┼─────────┼──────────────────────┤ +│ bidirectional │ (None, 50, 256) │ 197,632 │ dropout[0][0] │ +│ (Bidirectional) │ │ │ │ +├─────────────────────┼───────────────────┼─────────┼──────────────────────┤ +│ bidirectional_1 │ (None, 50, 128) │ 164,352 │ bidirectional[0][0] │ +│ (Bidirectional) │ │ │ │ +├─────────────────────┼───────────────────┼─────────┼──────────────────────┤ +│ label (InputLayer) │ (None, None) │ 0 │ - │ +├─────────────────────┼───────────────────┼─────────┼──────────────────────┤ +│ dense2 (Dense) │ (None, 50, 21) │ 2,709 │ bidirectional_1[0][… │ +├─────────────────────┼───────────────────┼─────────┼──────────────────────┤ +│ ctc_loss (CTCLayer) │ (None, 50, 21) │ 0 │ label[0][0], │ +│ │ │ │ dense2[0][0] │ +└─────────────────────┴───────────────────┴─────────┴──────────────────────┘ ++ + + + +
Total params: 432,725 (1.65 MB) ++ + + + +
Trainable params: 432,725 (1.65 MB) ++ + + + +
Non-trainable params: 0 (0.00 B) ++ + + --- ## Training ```python -epochs = 100 +# TODO restore epoch count. +epochs = 2 early_stopping_patience = 10 # Add early stopping early_stopping = keras.callbacks.EarlyStopping( @@ -371,227 +451,58 @@ history = model.fit(
Model: "functional_1"
+
+
+
+
+
+┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┓ +┃ Layer (type) ┃ Output Shape ┃ Param # ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━┩ +│ image (InputLayer) │ (None, 200, 50, 1) │ 0 │ +├─────────────────────────────────┼───────────────────────────┼────────────┤ +│ Conv1 (Conv2D) │ (None, 200, 50, 32) │ 320 │ +├─────────────────────────────────┼───────────────────────────┼────────────┤ +│ pool1 (MaxPooling2D) │ (None, 100, 25, 32) │ 0 │ +├─────────────────────────────────┼───────────────────────────┼────────────┤ +│ Conv2 (Conv2D) │ (None, 100, 25, 64) │ 18,496 │ +├─────────────────────────────────┼───────────────────────────┼────────────┤ +│ pool2 (MaxPooling2D) │ (None, 50, 12, 64) │ 0 │ +├─────────────────────────────────┼───────────────────────────┼────────────┤ +│ reshape (Reshape) │ (None, 50, 768) │ 0 │ +├─────────────────────────────────┼───────────────────────────┼────────────┤ +│ dense1 (Dense) │ (None, 50, 64) │ 49,216 │ +├─────────────────────────────────┼───────────────────────────┼────────────┤ +│ dropout (Dropout) │ (None, 50, 64) │ 0 │ +├─────────────────────────────────┼───────────────────────────┼────────────┤ +│ bidirectional (Bidirectional) │ (None, 50, 256) │ 197,632 │ +├─────────────────────────────────┼───────────────────────────┼────────────┤ +│ bidirectional_1 (Bidirectional) │ (None, 50, 128) │ 164,352 │ +├─────────────────────────────────┼───────────────────────────┼────────────┤ +│ dense2 (Dense) │ (None, 50, 21) │ 2,709 │ +└─────────────────────────────────┴───────────────────────────┴────────────┘ ++ + + + +
Total params: 432,725 (1.65 MB) ++ + + + +
Trainable params: 432,725 (1.65 MB) ++ + + + +
Non-trainable params: 0 (0.00 B) ++ + +