ikkez
diff --git a/‎anki_vector/util.py
Lines changed: 2 additions & 2 deletions b/‎anki_vector/util.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/experimental/sign_language_recognition_system/README.md
Lines changed: 93 additions & 0 deletions b/‎examples/experimental/sign_language_recognition_system/README.md
Lines changed: 93 additions & 0 deletions
diff --git a/‎examples/experimental/sign_language_recognition_system/data_gen.py
Lines changed: 157 additions & 0 deletions b/‎examples/experimental/sign_language_recognition_system/data_gen.py
Lines changed: 157 additions & 0 deletions
diff --git a/‎examples/experimental/sign_language_recognition_system/dataset.zip
10.9 MB b/‎examples/experimental/sign_language_recognition_system/dataset.zip
10.9 MB
diff --git a/‎examples/experimental/sign_language_recognition_system/example.png
168 KB b/‎examples/experimental/sign_language_recognition_system/example.png
168 KB
@@ -1046,8 +1046,8 @@ def apply_overlay(self, image: Image.Image) -> None:
         image_width, image_height = image.size
         remaining_width = image_width - self.width
         remaining_height = image_height - self.height
-        x1, y1 = remaining_height // 2, remaining_width // 2
-        x2, y2 = (image_height - (remaining_height // 2)), (image_width - (remaining_width // 2))
+        x1, y1 = remaining_width // 2, remaining_height // 2
+        x2, y2 = (image_width - (remaining_width // 2)), (image_height - (remaining_height // 2))
 
         for i in range(0, self.line_thickness):
             d.rectangle([x1 + i, y1 + i, x2 - i, y2 - i], outline=self.line_color)
 
@@ -0,0 +1,93 @@
+# Overview:
+
+This project passes the robot's camera feed through a [Convolutional Neural Network](https://en.wikipedia.org/wiki/Convolutional_neural_network) (CNN) built to recognize American Sign Language hand signs. The network is built using [Keras](https://keras.io/) with a [TensorFlow](https://www.tensorflow.org/guide/keras) Backend. It predicts the sign visible (if any) per frame and therefore cannot predict more complex sign language gestures.
+
+>Network Architecture:
+>ConvLayer -> MaxPoolLayer -> ConvLayer -> MaxPoolLayer -> ConvLayer -> Dropout -> Flatten -> Dense -> Dropout -> Dense
+
+The network is built as a Keras [Sequential](https://keras.io/getting-started/sequential-model-guide/) model which consists of a linear stack of layers of the following types:
+
+ConvLayer (Convolutional layer): https://keras.io/layers/convolutional/
+
+MaxPoolLayer (Max Pooling layer): https://keras.io/layers/pooling/
+
+Flatten, Dense, Dropout layers: https://keras.io/layers/core/
+
+The `data_gen.py` script can be used to build/expand the dataset to train and test the model. Each image captured is used to generate a multiplier number of other images to expand the dataset. The images are translated to 200x200 black and white images to reduce the complexity of the network. While capturing images from the feed ensure that your hand is positioned within the red square which represents the cropped image dimensions.
+
+>Note: This project has not been tested on Windows and Linux environments. Currently the `data_gen.py` script will not run on Windows due to its dependency on the `curses` library.
+
+
+# Additional Dependencies:
+
+Install the additional dependencies required for the project:
+```
+pip3 install keras
+pip3 install numpy
+pip3 install scipy
+pip3 install scikit-learn
+pip3 install tensorflow
+```
+
+# Dataset:
+
+### Import Dataset:
+
+This project includes a sample dataset that can be used as a starting point to train and test the neural network.
+
+Unpack the dataset:
+```
+unzip dataset.zip
+```
+
+>Note: This dataset contains 200x200 black-and-white images of two hand signs ("a", "b"). Additionally, the `stats.json` file provides the number of images of each alphabet type.
+
+### Generate Dataset:
+
+![hand-sign](./example.png)
+
+Use the following command to capture more images and expand the dataset. Expanding the dataset to include more images can help improve the network's performance since:
+
+	1. The network may not have seen any reference images to help distinguish your background. Especially if you are testing against a noisy background.
+
+	2. The dataset only contains images of the same hand and might not identify others that look significantly different.
+
+
+```python3 data_gen.py --dataset_root_folder <path_to_folder>```
+
+>Note: In order to capture an image, display the hand sign within the red frame on the camera feed displayed and press the key corresponding to the label representing the hand sign. Dimensions of images in the dataset are 200x200.
+
+>Note: Along with capturing images of hand signs it is also important to capture images of the background without any hand sign in the frame. This ensures that while predicting a frame without any hand signs, it is correctly classified. Use the space bar on your keyboard to record a background image.
+
+
+# Run Project:
+
+### Project Phases:
+
+There are two main phases: `train` and `predict`. In the training phase, the neural network is given a set of labeled hand sign images. Once the network has been trained to classify the images we move to the prediction phase. The `predict` option launches the robot's camera feed and looks to classify any visible hand signs. If a frame is classified the recognized letter is spoken out loud.
+
+### Training:
+
+Train the neural network using an image dataset to classify different hand signs.
+
+```
+python3 recognizer.py --train --dataset_root_folder <path_to_folder> [--model_config <path_to_config_file>] [--model_weights <path_to_weights_file>]
+```
+
+>Note: Use the `model_config` and `model_weights` flags to save the model's configurations after it has been trained. This way the model does not need to be re-trained before predicting.
+
+### Prediction:
+
+Use a trained neural network to predict any visible hand signs in the robot's field of view (within the region of interest marked with a red square).
+
+```
+python3 recognizer.py --predict --model_config <path_to_config_file> --model_weights <path_to_weights_file>
+```
+
+>Note: Use the `model_config` and `model_weights` flags to load an existing model's configuration. If not using an existing model's configuration, train the model first.
+
+### Train and Predict:
+
+```
+python3 recognizer.py --train --predict --dataset_root_folder <path_to_folder>
+```
@@ -0,0 +1,157 @@
+# Copyright (c) 2019 Anki, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License in the file LICENSE.txt or at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Data generation script to build a training and test dataset.
+
+A sample dataset is included in the project ("dataset.zip"). Unzip the folder and use the
+--dataset_root_folder option to specify the file path and expand this dataset.
+
+Use this script to build/expand the data needed to train the sign language recognition system.
+"""
+
+from concurrent.futures import CancelledError
+import curses
+import json
+import os
+import platform
+from pathlib import Path
+import random
+import sys
+import tempfile
+import time
+
+try:
+    import numpy as np
+except ImportError as exc:
+    sys.exit("Cannot import numpy: Do `pip3 install numpy` to install")
+
+try:
+    from PIL import Image
+except ImportError:
+    sys.exit("Cannot import from PIL: Do `pip3 install --user Pillow` to install")
+
+try:
+    from scipy import ndimage
+except ImportError as exc:
+    sys.exit("Cannot import scipy: Do `pip3 install scipy` to install")
+
+import anki_vector
+import util
+
+
+def data_capture(camera: anki_vector.camera.CameraComponent, stats: dict, root_folder: str) -> None:
+    """Build an image dataset using the camera feed from Vector.
+
+    This method uses an image from the camera and generates a multiplier number of images by
+    rotating the original image. The keystroke used to initiate the image capture and processing
+    is used to label the image.
+    """
+
+    try:
+        # TODO: curses works well with Mac OS and Linux, explore msvcrt for Windows
+        terminal = curses.initscr()
+        curses.cbreak()
+        curses.noecho()
+        terminal.nodelay(True)
+
+        # The number of images to generate using the image captured as a seed
+        image_multiplier = 10
+        # The maximum amount of rotation by which to rotate the original image to generate more images
+        min_rotation = -10
+        max_rotation = 10
+
+        print("------ capturing hand signs dataset, press ctrl+c to exit ------")
+        while True:
+            key = terminal.getch()
+            if (ord("a") <= key <= ord("z")) or (key == ord(" ")):
+
+                # Represents background images, filenames are switched to be prefixed with "background" instead of " "
+                if key == ord(" "):
+                    key = "background"
+                else:
+                    key = chr(key)
+
+                # Pull image from camera
+                original_image = camera.latest_image.raw_image
+                if original_image:
+                    # Convert image to black and white
+                    black_white_image = original_image.convert("L")
+                    rotation_axes = [1, 1, 0]
+
+                    # Generate more images with random rotation
+                    for rotation in random.sample(range(min_rotation, max_rotation), image_multiplier):
+                        # Randomly define which axis to rotate the image by
+                        random.shuffle(rotation_axes)
+                        x_axis_rotation_enabled, y_axis_rotation_enabled = rotation_axes[:2]
+                        rotated_image_array = ndimage.rotate(black_white_image,
+                                                             rotation,
+                                                             axes=(x_axis_rotation_enabled, y_axis_rotation_enabled),
+                                                             reshape=False)
+
+                        # Convert to a 200*200 image
+                        rotated_image = Image.fromarray(rotated_image_array)
+                        cropped_image = util.crop_image(rotated_image, util.NetworkConstants.IMAGE_WIDTH, util.NetworkConstants.IMAGE_HEIGHT)
+
+                        # Save the image
+                        image_filename = key + "_" + str(stats.get(key, 0)) + ".png"
+                        stats[key] = stats.get(key, 0) + 1
+                        cropped_image.save(os.path.join(root_folder, image_filename))
+
+                    # Character
+                    print(f"Recorded images for {key}\n\r")
+    except (CancelledError, KeyboardInterrupt):
+        pass
+    finally:
+        curses.nocbreak()
+        curses.echo()
+        curses.endwin()
+
+
+def main():
+    stats = {}
+
+    args = util.parse_command_args()
+    if not args.dataset_root_folder:
+        args.dataset_root_folder = str(Path(tempfile.gettempdir(), "dataset"))
+        print(f"No data folder defined, saving to {args.dataset_root_folder}")
+        os.makedirs(args.dataset_root_folder, exist_ok=True)
+        time.sleep(2)
+
+    # Read existing stats or set new stats up
+    if os.path.isfile(os.path.join(args.dataset_root_folder, "stats.json")):
+        with open(os.path.join(args.dataset_root_folder, "stats.json"), "r") as stats_file:
+            stats = json.load(stats_file)
+    else:
+        stats = {}
+
+    with anki_vector.Robot(args.serial) as robot:
+        try:
+            # Add a rectangular overlay describing the portion of image that is used after cropping.
+            # TODO: The rectangle overlay should feed in a full rect, not just a size
+            frame_of_interest = anki_vector.util.RectangleOverlay(util.NetworkConstants.IMAGE_WIDTH, util.NetworkConstants.IMAGE_HEIGHT)
+            robot.viewer.overlays.append(frame_of_interest)
+            robot.camera.init_camera_feed()
+            robot.viewer.show()
+            data_capture(robot.camera, stats, args.dataset_root_folder)
+        finally:
+            with open(os.path.join(args.dataset_root_folder, "stats.json"), "w") as stats_file:
+                # Save the stats of expanded dataset
+                json.dump(stats, stats_file)
+
+            # Reset the terminal
+            print(f"Data collection done!\nData stored in {args.dataset_root_folder}")
+
+
+if __name__ == '__main__':
+    main()