Skip to content

Commit 9fc2ecd

Browse files
committed
Migrate to PyTorch 0.4.1
1 parent 7a7ffc2 commit 9fc2ecd

File tree

11 files changed

+119
-130
lines changed

11 files changed

+119
-130
lines changed

README.md

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,11 @@ An easy implementation of Faster R-CNN in PyTorch.
2626

2727
* **25 minutes** every 10000 steps
2828

29-
* **3 hours** for 70000 steps (which leads to mAP=70.29%)
29+
* **3 hours** for 70000 steps (which leads to mAP=xx.xx%)
3030

3131
* Inference
3232

33-
* **~9 examples** per second
33+
* **~13 examples** per second
3434

3535
### Trained Model
3636

@@ -39,8 +39,8 @@ An easy implementation of Faster R-CNN in PyTorch.
3939
## Requirements
4040

4141
* Python 3.6
42-
* torch 0.3.1
43-
* torchvision 0.2.0
42+
* torch 0.4.1
43+
* torchvision 0.2.1
4444
* tqdm
4545

4646
```
@@ -50,8 +50,8 @@ An easy implementation of Faster R-CNN in PyTorch.
5050
## Setup
5151
5252
1. Download VOC 2007 Dataset
53-
- [Training / Validation](http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar)
54-
- [Test](http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar)
53+
- [Training / Validation](http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar) (5011 images)
54+
- [Test](http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar) (4952 images)
5555
5656
1. Extract to data folder, now your folder structure should be like:
5757
```
@@ -83,7 +83,9 @@ An easy implementation of Faster R-CNN in PyTorch.
8383
$ python test_nms.py
8484
```
8585
> sm_61 is for GTX-1080-Ti, to see others, visit [here](http://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/)
86-
86+
87+
> Try to rebuild module if unit test fails
88+
8789
* result after running `test_nms.py`
8890
8991
![](https://github.com/potterhsu/easy-faster-rcnn.pytorch/blob/master/images/test_nms.png?raw=true)

bbox.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
1-
import torch
21
import numpy as np
2+
import torch
3+
from torch import Tensor
34

45

56
class BBox(object):
67

7-
def __init__(self, left: float, top: float, right: float, bottom: float) -> None:
8+
def __init__(self, left: float, top: float, right: float, bottom: float):
89
super().__init__()
910
self.left = left
1011
self.top = top
@@ -19,7 +20,7 @@ def tolist(self):
1920
return [self.left, self.top, self.right, self.bottom]
2021

2122
@staticmethod
22-
def to_center_base(bboxes):
23+
def to_center_base(bboxes: Tensor):
2324
return torch.stack([
2425
(bboxes[:, 0] + bboxes[:, 2]) / 2,
2526
(bboxes[:, 1] + bboxes[:, 3]) / 2,
@@ -28,7 +29,7 @@ def to_center_base(bboxes):
2829
], dim=1)
2930

3031
@staticmethod
31-
def from_center_base(center_based_bboxes):
32+
def from_center_base(center_based_bboxes: Tensor) -> Tensor:
3233
return torch.stack([
3334
center_based_bboxes[:, 0] - center_based_bboxes[:, 2] / 2,
3435
center_based_bboxes[:, 1] - center_based_bboxes[:, 3] / 2,
@@ -37,7 +38,7 @@ def from_center_base(center_based_bboxes):
3738
], dim=1)
3839

3940
@staticmethod
40-
def calc_transformer(src_bboxes, dst_bboxes):
41+
def calc_transformer(src_bboxes: Tensor, dst_bboxes: Tensor) -> Tensor:
4142
center_based_src_bboxes = BBox.to_center_base(src_bboxes)
4243
center_based_dst_bboxes = BBox.to_center_base(dst_bboxes)
4344
transformers = torch.stack([
@@ -49,7 +50,7 @@ def calc_transformer(src_bboxes, dst_bboxes):
4950
return transformers
5051

5152
@staticmethod
52-
def apply_transformer(src_bboxes, transformers):
53+
def apply_transformer(src_bboxes: Tensor, transformers: Tensor) -> Tensor:
5354
center_based_src_bboxes = BBox.to_center_base(src_bboxes)
5455
center_based_dst_bboxes = torch.stack([
5556
transformers[:, 0] * center_based_src_bboxes[:, 2] + center_based_src_bboxes[:, 0],
@@ -61,7 +62,7 @@ def apply_transformer(src_bboxes, transformers):
6162
return dst_bboxes
6263

6364
@staticmethod
64-
def iou(source, other):
65+
def iou(source: Tensor, other: Tensor) -> Tensor:
6566
source = source.repeat(other.shape[0], 1, 1).permute(1, 0, 2)
6667
other = other.repeat(source.shape[0], 1, 1)
6768

@@ -79,14 +80,14 @@ def iou(source, other):
7980
return intersection_area / (source_area + other_area - intersection_area)
8081

8182
@staticmethod
82-
def inside(source, other) -> bool:
83+
def inside(source: Tensor, other: Tensor) -> bool:
8384
source = source.repeat(other.shape[0], 1, 1).permute(1, 0, 2)
8485
other = other.repeat(source.shape[0], 1, 1)
8586
return ((source[:, :, 0] >= other[:, :, 0]) * (source[:, :, 1] >= other[:, :, 1]) *
8687
(source[:, :, 2] <= other[:, :, 2]) * (source[:, :, 3] <= other[:, :, 3]))
8788

8889
@staticmethod
89-
def clip(bboxes, left: float, top: float, right: float, bottom: float):
90+
def clip(bboxes: Tensor, left: float, top: float, right: float, bottom: float) -> Tensor:
9091
return torch.stack([
9192
torch.clamp(bboxes[:, 0], min=left, max=right),
9293
torch.clamp(bboxes[:, 1], min=top, max=bottom),
@@ -95,7 +96,7 @@ def clip(bboxes, left: float, top: float, right: float, bottom: float):
9596
], dim=1)
9697

9798
@staticmethod
98-
def generate_anchors(max_x: int, max_y: int, stride: int):
99+
def generate_anchors(max_x: int, max_y: int, stride: int) -> Tensor:
99100
center_based_anchor_bboxes = []
100101

101102
# NOTE: it's important to let `anchor_y` be the major index of list (i.e., move horizontally and then vertically) for consistency with 2D convolution
@@ -110,7 +111,7 @@ def generate_anchors(max_x: int, max_y: int, stride: int):
110111
width = size * np.sqrt(1 / r)
111112
center_based_anchor_bboxes.append([center_x, center_y, width, height])
112113

113-
center_based_anchor_bboxes = torch.FloatTensor(center_based_anchor_bboxes)
114+
center_based_anchor_bboxes = torch.tensor(center_based_anchor_bboxes, dtype=torch.float)
114115
anchor_bboxes = BBox.from_center_base(center_based_anchor_bboxes)
115116

116117
return anchor_bboxes

dataset.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
import PIL
88
import torch.utils.data
99
from PIL import Image, ImageOps
10+
from torch import Tensor
1011
from torchvision import transforms
11-
from torch import FloatTensor, LongTensor
1212

1313
from bbox import BBox
1414

@@ -21,7 +21,7 @@ class Mode(Enum):
2121

2222
class Annotation(object):
2323
class Object(object):
24-
def __init__(self, name: str, difficult: bool, bbox: BBox) -> None:
24+
def __init__(self, name: str, difficult: bool, bbox: BBox):
2525
super().__init__()
2626
self.name = name
2727
self.difficult = difficult
@@ -31,7 +31,7 @@ def __repr__(self) -> str:
3131
return 'Object[name={:s}, difficult={!s}, bbox={!s}]'.format(
3232
self.name, self.difficult, self.bbox)
3333

34-
def __init__(self, filename: str, objects: List[Object]) -> None:
34+
def __init__(self, filename: str, objects: List[Object]):
3535
super().__init__()
3636
self.filename = filename
3737
self.objects = objects
@@ -46,7 +46,7 @@ def __init__(self, filename: str, objects: List[Object]) -> None:
4646

4747
LABEL_TO_CATEGORY_DICT = {v: k for k, v in CATEGORY_TO_LABEL_DICT.items()}
4848

49-
def __init__(self, path_to_data_dir: str, mode: Mode) -> None:
49+
def __init__(self, path_to_data_dir: str, mode: Mode):
5050
super().__init__()
5151

5252
self._mode = mode
@@ -89,15 +89,15 @@ def __init__(self, path_to_data_dir: str, mode: Mode) -> None:
8989
def __len__(self) -> int:
9090
return len(self._image_id_to_annotation_dict)
9191

92-
def __getitem__(self, index: int) -> Tuple[str, FloatTensor, float, FloatTensor, LongTensor]:
92+
def __getitem__(self, index: int) -> Tuple[str, Tensor, float, Tensor, Tensor]:
9393
image_id = self._image_ids[index]
9494
annotation = self._image_id_to_annotation_dict[image_id]
9595

9696
bboxes = [obj.bbox.tolist() for obj in annotation.objects if not obj.difficult]
9797
labels = [Dataset.CATEGORY_TO_LABEL_DICT[obj.name] for obj in annotation.objects if not obj.difficult]
9898

99-
bboxes = torch.FloatTensor(bboxes)
100-
labels = torch.LongTensor(labels)
99+
bboxes = torch.tensor(bboxes, dtype=torch.float)
100+
labels = torch.tensor(labels, dtype=torch.long)
101101

102102
image = Image.open(os.path.join(self._path_to_jpeg_images_dir, annotation.filename))
103103

@@ -112,7 +112,7 @@ def __getitem__(self, index: int) -> Tuple[str, FloatTensor, float, FloatTensor,
112112
return image_id, image, scale, bboxes, labels
113113

114114
@staticmethod
115-
def preprocess(image: PIL.Image.Image):
115+
def preprocess(image: PIL.Image.Image) -> Tuple[Tensor, float]:
116116
# resize according to the rules:
117117
# 1. scale shorter edge to 600
118118
# 2. after scaling, if longer edge > 1000, scale longer edge to 1000

eval.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from model import Model
88

99

10-
def _eval(path_to_checkpoint, path_to_data_dir, path_to_results_dir):
10+
def _eval(path_to_checkpoint: str, path_to_data_dir: str, path_to_results_dir: str):
1111
dataset = Dataset(path_to_data_dir, Dataset.Mode.TEST)
1212
evaluator = Evaluator(dataset, path_to_data_dir, path_to_results_dir)
1313

evaluator.py

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import os
2+
from typing import Dict, List
23

4+
import torch
35
from torch.utils.data import DataLoader
46
from tqdm import tqdm
57

@@ -9,29 +11,30 @@
911

1012

1113
class Evaluator(object):
12-
def __init__(self, dataset, path_to_data_dir, path_to_results_dir):
14+
def __init__(self, dataset: Dataset, path_to_data_dir: str, path_to_results_dir: str):
1315
super().__init__()
1416
self.dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=8, pin_memory=True)
1517
self._path_to_data_dir = path_to_data_dir
1618
self._path_to_results_dir = path_to_results_dir
1719
os.makedirs(self._path_to_results_dir, exist_ok=True)
1820

19-
def evaluate(self, model):
21+
def evaluate(self, model: Model) -> Dict[int, float]:
2022
all_image_ids, all_pred_bboxes, all_pred_labels, all_pred_probs = [], [], [], []
2123

22-
for batch_index, (image_id_batch, image_batch, scale_batch, _, _) in enumerate(tqdm(self.dataloader)):
23-
image_id = image_id_batch[0]
24-
image = image_batch[0].cuda()
25-
scale = scale_batch[0]
24+
with torch.no_grad():
25+
for batch_index, (image_id_batch, image_batch, scale_batch, _, _) in enumerate(tqdm(self.dataloader)):
26+
image_id = image_id_batch[0]
27+
image = image_batch[0].cuda()
28+
scale = scale_batch[0].item()
2629

27-
pred_bboxes, pred_labels, pred_probs = model.detect(image)
30+
pred_bboxes, pred_labels, pred_probs = model.detect(image)
2831

29-
pred_bboxes = [[it / scale for it in bbox] for bbox in pred_bboxes]
32+
pred_bboxes = [[it / scale for it in bbox] for bbox in pred_bboxes]
3033

31-
all_pred_bboxes.extend(pred_bboxes)
32-
all_pred_labels.extend(pred_labels)
33-
all_pred_probs.extend(pred_probs)
34-
all_image_ids.extend([image_id] * len(pred_labels))
34+
all_pred_bboxes.extend(pred_bboxes)
35+
all_pred_labels.extend(pred_labels)
36+
all_pred_probs.extend(pred_probs)
37+
all_image_ids.extend([image_id] * len(pred_labels))
3538

3639
self._write_results(all_image_ids, all_pred_bboxes, all_pred_labels, all_pred_probs)
3740

@@ -57,13 +60,13 @@ def evaluate(self, model):
5760

5861
return label_to_ap_dict
5962

60-
def _write_results(self, image_ids, bboxes, labels, preds):
63+
def _write_results(self, image_ids: List[str], bboxes: List[List[float]], labels: List[int], probs: List[float]):
6164
label_to_txt_files_dict = {}
6265
for c in range(1, Model.NUM_CLASSES):
6366
label_to_txt_files_dict[c] = open(os.path.join(self._path_to_results_dir, 'comp3_det_test_{:s}.txt'.format(Dataset.LABEL_TO_CATEGORY_DICT[c])), 'w')
6467

65-
for image_id, bbox, label, pred in zip(image_ids, bboxes, labels, preds):
66-
label_to_txt_files_dict[label].write('{:s} {:f} {:f} {:f} {:f} {:f}\n'.format(image_id, pred,
68+
for image_id, bbox, label, prob in zip(image_ids, bboxes, labels, probs):
69+
label_to_txt_files_dict[label].write('{:s} {:f} {:f} {:f} {:f} {:f}\n'.format(image_id, prob,
6770
bbox[0], bbox[1], bbox[2], bbox[3]))
6871

6972
for _, f in label_to_txt_files_dict.items():

infer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from model import Model
1010

1111

12-
def _infer(path_to_input_image, path_to_output_image, path_to_checkpoint):
12+
def _infer(path_to_input_image: str, path_to_output_image: str, path_to_checkpoint: str):
1313
image = transforms.Image.open(path_to_input_image)
1414
image_tensor, scale = Dataset.preprocess(image)
1515

0 commit comments

Comments
 (0)