tue-robotics
diff --git a/‎image_recognition_footwear/scripts/get_footwear b/‎image_recognition_footwear/scripts/get_footwear
diff --git a/‎image_recognition_footwear/src/image_recognition_footwear/model.py
Lines changed: 55 additions & 0 deletions b/‎image_recognition_footwear/src/image_recognition_footwear/model.py
Lines changed: 55 additions & 0 deletions
diff --git a/‎image_recognition_footwear/src/image_recognition_footwear/process_data.py
Lines changed: 46 additions & 0 deletions b/‎image_recognition_footwear/src/image_recognition_footwear/process_data.py
Lines changed: 46 additions & 0 deletions
diff --git a/‎image_recognition_footwear/test/test_footwear.py b/‎image_recognition_footwear/test/test_footwear.py
@@ -0,0 +1,55 @@
+import torch
+import torch.nn as nn
+class Model(nn.Module):
+    def __init__(self, in_channels, channel_1, channel_2, channel_3, \
+                 node_1, node_2, num_classes):
+        super().__init__()
+        ####### Convolutional layers ######
+        self.conv1 = nn.Sequential(
+            nn.Conv2d(in_channels, channel_1, kernel_size=3, padding=1, stride=1),
+            nn.BatchNorm2d(channel_1),
+            nn.LeakyReLU(),
+            nn.Conv2d(channel_1, channel_1, kernel_size=3, padding=1, stride=1),
+            nn.BatchNorm2d(channel_1),
+            nn.LeakyReLU(),
+            nn.MaxPool2d(kernel_size=2, stride=2),
+        )
+        self.conv2 = nn.Sequential(
+            nn.Conv2d(channel_1, channel_2, kernel_size=3, padding=1, stride=1),
+            nn.BatchNorm2d(channel_2),
+            nn.LeakyReLU(),
+            nn.Conv2d(channel_2, channel_2, kernel_size=3, padding=1, stride=1),
+            nn.BatchNorm2d(channel_2),
+            nn.LeakyReLU(),
+            nn.MaxPool2d(kernel_size=2, stride=2),
+        )
+        self.conv3 = nn.Sequential(
+            nn.Conv2d(channel_2, channel_3, kernel_size=3, padding=1, stride=1),
+            nn.BatchNorm2d(channel_3),
+            nn.LeakyReLU(),
+            nn.Conv2d(channel_3, channel_3, kernel_size=3, padding=1, stride=1),
+            nn.BatchNorm2d(channel_3),
+            nn.LeakyReLU(),
+            nn.MaxPool2d(kernel_size=7, stride=2),
+        )
+
+        ######## Affine layers ########
+        self.fc = nn.Sequential(
+            nn.Flatten(),
+            nn.Linear(channel_3, node_1),
+            nn.BatchNorm1d(node_1),
+            nn.Dropout(p=0.5),
+
+            nn.Linear(node_1, node_2),
+            nn.BatchNorm1d(node_2),
+
+            nn.Linear(node_2, num_classes)
+        )
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.conv2(x)
+        x = self.conv3(x)
+
+        scores = self.fc(x)
+        return scores
@@ -0,0 +1,46 @@
+import numpy as np
+from torchvision import transforms as T
+import torch
+from PIL import Image, ImageOps
+
+def preprocess_RGB(img):
+    """preproces image:
+    input is a PIL image.
+    Output image should be pytorch tensor that is compatible with your model"""
+    img = T.functional.resize(img, size=(32, 32), interpolation=Image.NEAREST)
+    trans = T.Compose([T.ToTensor(),T.Grayscale(num_output_channels=3),T.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])
+    img = trans(img)
+    img = img.unsqueeze(0)
+
+    return img
+def heroPreprocess(img):
+    """preproces image:
+    expected input is a PIL image from Hero.
+    Output image should be pytorch tensor that is compatible with your model"""
+    width, height = img.size # Hero image size (640x480)
+    left = width/2 - 100
+    top = height/2 + 140
+    right = width/2
+    bottom = height
+    im1 = img.crop((left, top, right, bottom))
+    img2 = T.functional.resize(im1, size=(32, 32), interpolation=Image.NEAREST)
+    trans = T.Compose([T.ToTensor(),T.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])
+    img_trans = trans(img2)
+    img_trans = img_trans.unsqueeze(0)
+
+    return img_trans
+
+def detection_RGB(img, model):
+    """Detection of foortwear:
+    Input is a preprocessed image to provide to the model.
+    Output should be binary classification [True, False], where True is the detection of the footwear."""
+    model.eval()
+    info = next(model.parameters()) # Retrieve the first parameter tensor from the iterator
+    device = info.device
+    dtype  = info.dtype
+    with torch.no_grad():
+        img    = img.to(device=device, dtype=dtype)
+        scores = model(img)
+        preds  = torch.argmax(scores, axis=1)
+        score_max_numpy = int(preds.cpu().detach().numpy())
+    return score_max_numpy