diff --git a/image_recognition_footwear/scripts/get_footwear b/image_recognition_footwear/scripts/get_footwear new file mode 100644 index 00000000..8ea754e6 --- /dev/null +++ b/image_recognition_footwear/scripts/get_footwear @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 + +from __future__ import print_function +import argparse +from image_recognition_footwear.model import Model +from image_recognition_footwear.process_data import heroPreprocess, detection_RGB +from PIL import Image +import os +import torch + +# Assign description to the help doc +parser = argparse.ArgumentParser(description='Get footwear detected using PyTorch') + +# Add arguments +parser.add_argument('image', type=str, help='Image') +parser.add_argument('--weights-path', type=str, help='Path to the weights of the VGG model', + default=os.path.expanduser('~/data/pytorch_models/footwearModel.pth')) + +parser.add_argument('--input-channel', type=int, help='Size of the input model channel', default=3) +parser.add_argument('--channel1-size', type=int, help='Size channel 1', default=128) +parser.add_argument('--channel2-size', type=int, help='Size channel 2', default=256) +parser.add_argument('--channel3-size', type=int, help='Size channel 3', default=512) +parser.add_argument('--nodes-fclayer1-size', type=int, help='Size fully connected layer 1 neurons', default=1024) +parser.add_argument('--nodes-fclayer2-size', type=int, help='Size fully connected layer 2 neurons', default=1024) +parser.add_argument('--class-size', type=int, help='Classes of the network', default=2) + +device = torch.device('cuda') +dtype = torch.float32 + +args = parser.parse_args() + +# Read the image and preprocess +img = Image.open(args.image) +preprocessed_img = heroPreprocess(img) + +# Load the model +model = Model(in_channel=args.input_channel, channel_1=args.channel1_size, channel_2=args.channel2_size, channel_3=args.channel3_size, node_1=args.nodes_fclayer1_size, node_2=args.nodes_fclayer2_size, num_classes=args.class_size) +model.load_state_dict(torch.load(args.weights_path)) +model.to(device=device) + +# Detection +detector = detection_RGB(preprocessed_img, model) + +print(detector) + diff --git a/image_recognition_footwear/setup.py b/image_recognition_footwear/setup.py index ee31247d..7bd2b912 100644 --- a/image_recognition_footwear/setup.py +++ b/image_recognition_footwear/setup.py @@ -1,9 +1,6 @@ from setuptools import setup from catkin_pkg.python_setup import generate_distutils_setup -d = generate_distutils_setup( - packages=['image_recognition_footwear'], - package_dir={'': 'src'} -) +d = generate_distutils_setup(packages=["image_recognition_footwear"], package_dir={"": "src"}) setup(**d) diff --git a/image_recognition_footwear/src/image_recognition_footwear/__init__.py b/image_recognition_footwear/src/image_recognition_footwear/__init__.py index e69de29b..0e6ee3cc 100644 --- a/image_recognition_footwear/src/image_recognition_footwear/__init__.py +++ b/image_recognition_footwear/src/image_recognition_footwear/__init__.py @@ -0,0 +1,2 @@ +from . import model +from . import process_data diff --git a/image_recognition_footwear/src/image_recognition_footwear/model.py b/image_recognition_footwear/src/image_recognition_footwear/model.py new file mode 100644 index 00000000..e4cef071 --- /dev/null +++ b/image_recognition_footwear/src/image_recognition_footwear/model.py @@ -0,0 +1,54 @@ +import torch +import torch.nn as nn + + +class Model(nn.Module): + def __init__(self, in_channels, channel_1, channel_2, channel_3, node_1, node_2, num_classes): + super().__init__() + ####### Convolutional layers ###### + self.conv1 = nn.Sequential( + nn.Conv2d(in_channels, channel_1, kernel_size=3, padding=1, stride=1), + nn.BatchNorm2d(channel_1), + nn.LeakyReLU(), + nn.Conv2d(channel_1, channel_1, kernel_size=3, padding=1, stride=1), + nn.BatchNorm2d(channel_1), + nn.LeakyReLU(), + nn.MaxPool2d(kernel_size=2, stride=2), + ) + self.conv2 = nn.Sequential( + nn.Conv2d(channel_1, channel_2, kernel_size=3, padding=1, stride=1), + nn.BatchNorm2d(channel_2), + nn.LeakyReLU(), + nn.Conv2d(channel_2, channel_2, kernel_size=3, padding=1, stride=1), + nn.BatchNorm2d(channel_2), + nn.LeakyReLU(), + nn.MaxPool2d(kernel_size=2, stride=2), + ) + self.conv3 = nn.Sequential( + nn.Conv2d(channel_2, channel_3, kernel_size=3, padding=1, stride=1), + nn.BatchNorm2d(channel_3), + nn.LeakyReLU(), + nn.Conv2d(channel_3, channel_3, kernel_size=3, padding=1, stride=1), + nn.BatchNorm2d(channel_3), + nn.LeakyReLU(), + nn.MaxPool2d(kernel_size=7, stride=2), + ) + + ######## Affine layers ######## + self.fc = nn.Sequential( + nn.Flatten(), + nn.Linear(channel_3, node_1), + nn.BatchNorm1d(node_1), + nn.Dropout(p=0.5), + nn.Linear(node_1, node_2), + nn.BatchNorm1d(node_2), + nn.Linear(node_2, num_classes), + ) + + def forward(self, x): + x = self.conv1(x) + x = self.conv2(x) + x = self.conv3(x) + + scores = self.fc(x) + return scores diff --git a/image_recognition_footwear/src/image_recognition_footwear/process_data.py b/image_recognition_footwear/src/image_recognition_footwear/process_data.py new file mode 100644 index 00000000..34c9fea3 --- /dev/null +++ b/image_recognition_footwear/src/image_recognition_footwear/process_data.py @@ -0,0 +1,58 @@ +from torchvision import transforms as T +import torch +from PIL import Image + + +def preprocess_rgb(img): + """ + Preproces image + Input is a PIL image. + Output image should be pytorch tensor that is compatible with your model""" + img = T.functional.resize(img, size=(32, 32), interpolation=Image.NEAREST) + trans = T.Compose( + [ + T.ToTensor(), + T.Grayscale(num_output_channels=3), + T.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), + ] + ) + img = trans(img) + img = img.unsqueeze(0) + + return img + + +def hero_preprocess(img): + """ + Pre-proces image. + Expected input is a PIL image. + Output image should be pytorch tensor that is compatible with your model""" + width, height = img.size # Hero image size (640x480) + left = width / 2 - 100 + top = height / 2 + 140 + right = width / 2 + bottom = height + im1 = img.crop((left, top, right, bottom)) + img2 = T.functional.resize(im1, size=(32, 32), interpolation=Image.NEAREST) + trans = T.Compose([T.ToTensor(), T.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))]) + img_trans = trans(img2) + img_trans = img_trans.unsqueeze(0) + + return img_trans + + +def detection_rgb(img, model): + """ + Detection of footwear. + Input is a preprocessed image to provide to the model. + Output should be binary classification [True, False], where True is the detection of the footwear.""" + model.eval() + info = next(model.parameters()) # Retrieve the first parameter tensor from the iterator + device = info.device + dtype = info.dtype + with torch.no_grad(): + img = img.to(device=device, dtype=dtype) + scores = model(img) + preds = torch.argmax(scores, axis=1) + score_max_numpy = int(preds.cpu().detach().numpy()) + return score_max_numpy diff --git a/image_recognition_footwear/test/assests/no_shoe.jpg b/image_recognition_footwear/test/assests/no_shoe.jpg new file mode 100644 index 00000000..b33d707f Binary files /dev/null and b/image_recognition_footwear/test/assests/no_shoe.jpg differ diff --git a/image_recognition_footwear/test/assests/yes_shoe.jpg b/image_recognition_footwear/test/assests/yes_shoe.jpg new file mode 100644 index 00000000..21ada6c8 Binary files /dev/null and b/image_recognition_footwear/test/assests/yes_shoe.jpg differ diff --git a/image_recognition_footwear/test/run_tests.bash b/image_recognition_footwear/test/run_tests.bash new file mode 100644 index 00000000..d5b561c3 --- /dev/null +++ b/image_recognition_footwear/test/run_tests.bash @@ -0,0 +1,2 @@ +#!/bin/bash +nosetests -vv "$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" \ No newline at end of file diff --git a/image_recognition_footwear/test/test_footwear.py b/image_recognition_footwear/test/test_footwear.py new file mode 100644 index 00000000..6fda4342 --- /dev/null +++ b/image_recognition_footwear/test/test_footwear.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python + +import os +import re +from future.moves.urllib.request import urlretrieve +import unittest + +from PIL import Image +import rospkg +from image_recognition_footwear.model import Model +from image_recognition_footwear.process_data import heroPreprocess, detection_RGB +import torch + + +@unittest.skip +def test_footwear(): + local_path = "~/data/pytorch_models/footwearModel.pth" + + if not os.path.exists(local_path): + print("File does not exit {}".format(local_path)) + + def is_there_footwear_from_asset_name(asset_name): + binary_str = re.search("(\w+)_shoe", asset_name).groups() + return binary_str == "yes" + + assets_path = os.path.join(rospkg.RosPack().get_path("image_recognition_footwear"), "test/assets") + images_gt = [ + (Image.open(os.path.join(assets_path, asset)), is_there_footwear_from_asset_name(asset)) + for asset in os.listdir(assets_path) + ] + + device = torch.device("cuda") + model = Model(in_channel=3, channel_1=128, channel_2=256, channel_3=512, node_1=1024, node_2=1024, num_classes=2) + model.load_state_dict(torch.load(local_path)) + model.to(device=device) + detections = detection_RGB([image for image, _ in images_gt], model) + + estimations = AgeGenderEstimator(local_path, 64, 16, 8).estimate([image for image, _ in images_gt]) + + for (_, (is_footwear_gt)), (binary_detection) in zip(images_gt, detections): + binary_detection = int(binary_detection) + assert is_footwear_gt == binary_detection, f"{binary_detection=}, {is_footwear_gt=}" + + +if __name__ == "__main__": + test_footwear()