Add script footwear recognition

GustavoDCC · GustavoDCC · commit bcc6f5ddde8e · 2023-06-03T16:03:49.000+02:00
diff --git a/image_recognition_footwear/src/image_recognition_footwear/Footwear_detection.ipynb b/image_recognition_footwear/src/image_recognition_footwear/Footwear_detection.ipynb
diff --git a/image_recognition_footwear/src/image_recognition_footwear/model.py b/image_recognition_footwear/src/image_recognition_footwear/model.py
@@ -0,0 +1,62 @@
+import torch
+import torch.nn as nn
+class Model(nn.Module):
+    def __init__(self, in_channels, channel_1, channel_2, channel_3, \
+                 node_1, node_2, num_classes):
+        super().__init__()
+        ####### Convolutional layers ######
+        self.conv1 = nn.Sequential(
+            nn.Conv2d(in_channels, channel_1, kernel_size=3, padding=1, stride=1),
+            nn.BatchNorm2d(channel_1),
+            nn.LeakyReLU(),
+            nn.Conv2d(channel_1, channel_1, kernel_size=3, padding=1, stride=1),
+            nn.BatchNorm2d(channel_1),
+            nn.LeakyReLU(),
+            nn.MaxPool2d(kernel_size=2, stride=2),
+        )
+        self.conv2 = nn.Sequential(
+            nn.Conv2d(channel_1, channel_2, kernel_size=3, padding=1, stride=1),
+            nn.BatchNorm2d(channel_2),
+            nn.LeakyReLU(),
+            nn.Conv2d(channel_2, channel_2, kernel_size=3, padding=1, stride=1),
+            nn.BatchNorm2d(channel_2),
+            nn.LeakyReLU(),
+            nn.MaxPool2d(kernel_size=2, stride=2),
+        )
+        self.conv3 = nn.Sequential(
+            nn.Conv2d(channel_2, channel_3, kernel_size=3, padding=1, stride=1),
+            nn.BatchNorm2d(channel_3),
+            nn.LeakyReLU(),
+            nn.Conv2d(channel_3, channel_3, kernel_size=3, padding=1, stride=1),
+            nn.BatchNorm2d(channel_3),
+            nn.LeakyReLU(),
+            nn.MaxPool2d(kernel_size=7, stride=2),
+        )
+
+        ######## Affine layers ########
+        self.fc = nn.Sequential(
+            nn.Flatten(),
+            nn.Linear(channel_3, node_1),
+            nn.BatchNorm1d(node_1),
+            nn.Dropout(p=0.5),
+
+            nn.Linear(node_1, node_2),
+            nn.BatchNorm1d(node_2),
+
+            nn.Linear(node_2, num_classes)
+        )
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.conv2(x)
+        x = self.conv3(x)
+
+        scores = self.fc(x)
+        return scores
+
+
+
+
+
+
+
diff --git a/image_recognition_footwear/src/image_recognition_footwear/process_data.py b/image_recognition_footwear/src/image_recognition_footwear/process_data.py
@@ -0,0 +1,62 @@
+import numpy as np
+from torchvision import transforms as T
+import torch
+from PIL import Image, ImageOps
+
+def preprocess_RGB(img):
+    """preproces image:
+    input is a PIL image.
+    Output image should be pytorch tensor that is compatible with your model"""
+    img = T.functional.resize(img, size=(32, 32), interpolation=Image.NEAREST)
+    trans = T.Compose([T.ToTensor(),T.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])
+    img = trans(img)
+    img = img.unsqueeze(0)
+
+    return img
+
+def detection_RGB(img, model):
+    """Detection of foortwear:
+    Input is a preprocessed image to provide to the model.
+    Output should be binary classification [True, False], where True is the detection of the footwear."""
+    model.eval()
+    info = next(model.parameters()) # Retrieve the first parameter tensor from the iterator
+    device = info.device
+    dtype  = info.dtype
+    with torch.no_grad():
+        img    = img.to(device=device, dtype=dtype)
+        scores = model(img)
+        preds  = torch.argmax(scores, axis=1)
+        score_max_numpy = int(preds.cpu().detach().numpy())
+    return score_max_numpy
+
+def preprocess_grayscale(img):
+    """preproces image:
+    input is a PIL image.
+    Output image should be pytorch tensor that is compatible with your model"""
+    img = ImageOps.grayscale(img)
+    img = T.functional.resize(img, size=(28, 28), interpolation=Image.NEAREST)
+    trans = T.Compose([T.ToTensor(),T.Normalize((0.5,), (0.5,))])
+    img = trans(img)
+    img = img.unsqueeze(0)
+
+    return img
+
+def detection_grayscale(img, model):
+    """Detection of foortwear:
+    Input is a preprocessed image to provide to the model.
+    Output should be binary classification [True, False], where True is the detection of the footwear."""
+    model.eval()
+    info = next(model.parameters()) # Retrieve the first parameter tensor from the iterator
+    device = info.device
+    dtype  = info.dtype
+    with torch.no_grad():
+        img    = img.to(device=device, dtype=dtype)
+        scores = model(img)
+        preds  = torch.argmax(scores, axis=1)
+        score_max_numpy = int(preds.cpu().detach().numpy())
+    if score_max_numpy == 5 or score_max_numpy == 7 or score_max_numpy == 9:
+        #{'Sandal': 5,'Sneaker': 7,'Ankle boot': 9}
+        detected = True
+    else:
+        detected = False
+    return detected