tue-robotics · GustavoDCC · Jul 5, 2023 · Jul 5, 2023 · Jul 5, 2023 · Jul 5, 2023
diff --git a/image_recognition_footwear/scripts/get_footwear b/image_recognition_footwear/scripts/get_footwear
@@ -0,0 +1,45 @@
+#!/usr/bin/env python3
+
+from __future__ import print_function
+import argparse
+from image_recognition_footwear.model import Model
+from image_recognition_footwear.process_data import heroPreprocess, detection_RGB
+from PIL import Image
+import os
+import torch
+
+# Assign description to the help doc
+parser = argparse.ArgumentParser(description='Get footwear detected using PyTorch')
+
+# Add arguments
+parser.add_argument('image', type=str, help='Image')
+parser.add_argument('--weights-path', type=str, help='Path to the weights of the VGG model',
+                    default=os.path.expanduser('~/data/pytorch_models/footwearModel.pth'))
+
+parser.add_argument('--input-channel', type=int, help='Size of the input model channel', default=3)
+parser.add_argument('--channel1-size', type=int, help='Size channel 1', default=128)
+parser.add_argument('--channel2-size', type=int, help='Size channel 2', default=256)
+parser.add_argument('--channel3-size', type=int, help='Size channel 3', default=512)
+parser.add_argument('--nodes-fclayer1-size', type=int, help='Size fully connected layer 1 neurons', default=1024)
+parser.add_argument('--nodes-fclayer2-size', type=int, help='Size fully connected layer 2 neurons', default=1024)
+parser.add_argument('--class-size', type=int, help='Classes of the network', default=2)
+
+device = torch.device('cuda')
+dtype = torch.float32
+
+args = parser.parse_args()
+
+# Read the image and preprocess
+img = Image.open(args.image)
+preprocessed_img = heroPreprocess(img)
+
+# Load the model
+model = Model(in_channel=args.input_channel, channel_1=args.channel1_size, channel_2=args.channel2_size, channel_3=args.channel3_size, node_1=args.nodes_fclayer1_size, node_2=args.nodes_fclayer2_size, num_classes=args.class_size)
+model.load_state_dict(torch.load(args.weights_path))
+model.to(device=device)
+
+# Detection
+detector = detection_RGB(preprocessed_img, model)
+
+print(detector)
+
diff --git a/image_recognition_footwear/setup.py b/image_recognition_footwear/setup.py
@@ -1,9 +1,6 @@
 from setuptools import setup
 from catkin_pkg.python_setup import generate_distutils_setup
 
-d = generate_distutils_setup(
-    packages=['image_recognition_footwear'],
-    package_dir={'': 'src'}
-)
+d = generate_distutils_setup(packages=["image_recognition_footwear"], package_dir={"": "src"})
 
 setup(**d)
diff --git a/image_recognition_footwear/src/image_recognition_footwear/__init__.py b/image_recognition_footwear/src/image_recognition_footwear/__init__.py
@@ -0,0 +1,2 @@
+from . import model
+from . import process_data
diff --git a/image_recognition_footwear/src/image_recognition_footwear/model.py b/image_recognition_footwear/src/image_recognition_footwear/model.py
@@ -0,0 +1,54 @@
+import torch
+import torch.nn as nn
+
+
+class Model(nn.Module):
+    def __init__(self, in_channels, channel_1, channel_2, channel_3, node_1, node_2, num_classes):
+        super().__init__()
+        ####### Convolutional layers ######
+        self.conv1 = nn.Sequential(
+            nn.Conv2d(in_channels, channel_1, kernel_size=3, padding=1, stride=1),
+            nn.BatchNorm2d(channel_1),
+            nn.LeakyReLU(),
+            nn.Conv2d(channel_1, channel_1, kernel_size=3, padding=1, stride=1),
+            nn.BatchNorm2d(channel_1),
+            nn.LeakyReLU(),
+            nn.MaxPool2d(kernel_size=2, stride=2),
+        )
+        self.conv2 = nn.Sequential(
+            nn.Conv2d(channel_1, channel_2, kernel_size=3, padding=1, stride=1),
+            nn.BatchNorm2d(channel_2),
+            nn.LeakyReLU(),
+            nn.Conv2d(channel_2, channel_2, kernel_size=3, padding=1, stride=1),
+            nn.BatchNorm2d(channel_2),
+            nn.LeakyReLU(),
+            nn.MaxPool2d(kernel_size=2, stride=2),
+        )
+        self.conv3 = nn.Sequential(
+            nn.Conv2d(channel_2, channel_3, kernel_size=3, padding=1, stride=1),
+            nn.BatchNorm2d(channel_3),
+            nn.LeakyReLU(),
+            nn.Conv2d(channel_3, channel_3, kernel_size=3, padding=1, stride=1),
+            nn.BatchNorm2d(channel_3),
+            nn.LeakyReLU(),
+            nn.MaxPool2d(kernel_size=7, stride=2),
+        )
+
+        ######## Affine layers ########
+        self.fc = nn.Sequential(
+            nn.Flatten(),
+            nn.Linear(channel_3, node_1),
+            nn.BatchNorm1d(node_1),
+            nn.Dropout(p=0.5),
+            nn.Linear(node_1, node_2),
+            nn.BatchNorm1d(node_2),
+            nn.Linear(node_2, num_classes),
+        )
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.conv2(x)
+        x = self.conv3(x)
+
+        scores = self.fc(x)
+        return scores
diff --git a/image_recognition_footwear/src/image_recognition_footwear/process_data.py b/image_recognition_footwear/src/image_recognition_footwear/process_data.py
@@ -0,0 +1,58 @@
+from torchvision import transforms as T
+import torch
+from PIL import Image
+
+
+def preprocess_rgb(img):
+    """
+    Preproces image
+    Input is a PIL image.
+    Output image should be pytorch tensor that is compatible with your model"""
+    img = T.functional.resize(img, size=(32, 32), interpolation=Image.NEAREST)
+    trans = T.Compose(
+        [
+            T.ToTensor(),
+            T.Grayscale(num_output_channels=3),
+            T.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
+        ]
+    )
+    img = trans(img)
+    img = img.unsqueeze(0)
+
+    return img
+
+
+def hero_preprocess(img):
+    """
+    Pre-proces image.
+    Expected input is a PIL image.
+    Output image should be pytorch tensor that is compatible with your model"""
+    width, height = img.size  # Hero image size (640x480)
+    left = width / 2 - 100
+    top = height / 2 + 140
+    right = width / 2
+    bottom = height
+    im1 = img.crop((left, top, right, bottom))
+    img2 = T.functional.resize(im1, size=(32, 32), interpolation=Image.NEAREST)
+    trans = T.Compose([T.ToTensor(), T.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])
+    img_trans = trans(img2)
+    img_trans = img_trans.unsqueeze(0)
+
+    return img_trans
+
+
+def detection_rgb(img, model):
+    """
+    Detection of footwear.
+    Input is a preprocessed image to provide to the model.
+    Output should be binary classification [True, False], where True is the detection of the footwear."""
+    model.eval()
+    info = next(model.parameters())  # Retrieve the first parameter tensor from the iterator
+    device = info.device
+    dtype = info.dtype
+    with torch.no_grad():
+        img = img.to(device=device, dtype=dtype)
+        scores = model(img)
+        preds = torch.argmax(scores, axis=1)
+        score_max_numpy = int(preds.cpu().detach().numpy())
+    return score_max_numpy
diff --git a/image_recognition_footwear/test/assests/no_shoe.jpg b/image_recognition_footwear/test/assests/no_shoe.jpg
diff --git a/image_recognition_footwear/test/assests/yes_shoe.jpg b/image_recognition_footwear/test/assests/yes_shoe.jpg
diff --git a/image_recognition_footwear/test/run_tests.bash b/image_recognition_footwear/test/run_tests.bash
@@ -0,0 +1,2 @@
+#!/bin/bash
+nosetests -vv "$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
diff --git a/image_recognition_footwear/test/test_footwear.py b/image_recognition_footwear/test/test_footwear.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+
+import os
+import re
+from future.moves.urllib.request import urlretrieve
+import unittest
+
+from PIL import Image
+import rospkg
+from image_recognition_footwear.model import Model
+from image_recognition_footwear.process_data import heroPreprocess, detection_RGB
+import torch
+
+
+@unittest.skip
+def test_footwear():
+    local_path = "~/data/pytorch_models/footwearModel.pth"
+
+    if not os.path.exists(local_path):
+        print("File does not exit {}".format(local_path))
+
+    def is_there_footwear_from_asset_name(asset_name):
+        binary_str = re.search("(\w+)_shoe", asset_name).groups()
+        return binary_str == "yes"
+
+    assets_path = os.path.join(rospkg.RosPack().get_path("image_recognition_footwear"), "test/assets")
+    images_gt = [
+        (Image.open(os.path.join(assets_path, asset)), is_there_footwear_from_asset_name(asset))
+        for asset in os.listdir(assets_path)
+    ]
+
+    device = torch.device("cuda")
+    model = Model(in_channel=3, channel_1=128, channel_2=256, channel_3=512, node_1=1024, node_2=1024, num_classes=2)
+    model.load_state_dict(torch.load(local_path))
+    model.to(device=device)
+    detections = detection_RGB([image for image, _ in images_gt], model)
+
+    estimations = AgeGenderEstimator(local_path, 64, 16, 8).estimate([image for image, _ in images_gt])
+
+    for (_, (is_footwear_gt)), (binary_detection) in zip(images_gt, detections):
+        binary_detection = int(binary_detection)
+        assert is_footwear_gt == binary_detection, f"{binary_detection=}, {is_footwear_gt=}"
+
+
+if __name__ == "__main__":
+    test_footwear()
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		from . import model
		from . import process_data
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		#!/bin/bash
		nosetests -vv "$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"