diff --git a/lib/datasets/pascal_voc.py b/lib/datasets/pascal_voc.py index 7a29bd9d2..a389f05a6 100644 --- a/lib/datasets/pascal_voc.py +++ b/lib/datasets/pascal_voc.py @@ -46,11 +46,7 @@ def __init__(self, image_set, year, devkit_path=None): else devkit_path self._data_path = os.path.join(self._devkit_path, 'VOC' + self._year) self._classes = ('__background__', # always index 0 - 'aeroplane', 'bicycle', 'bird', 'boat', - 'bottle', 'bus', 'car', 'cat', 'chair', - 'cow', 'diningtable', 'dog', 'horse', - 'motorbike', 'person', 'pottedplant', - 'sheep', 'sofa', 'train', 'tvmonitor') + 'person','car','van','truck','misc','dontcare','cyclist','tram','person_sitting') self._class_to_ind = dict(zip(self.classes, xrange(self.num_classes))) self._image_ext = '.jpg' self._image_index = self._load_image_set_index() diff --git a/lib/model/faster_rcnn/faster_rcnn.py b/lib/model/faster_rcnn/faster_rcnn.py index c60c493ed..12a5bd7eb 100644 --- a/lib/model/faster_rcnn/faster_rcnn.py +++ b/lib/model/faster_rcnn/faster_rcnn.py @@ -19,7 +19,7 @@ class _fasterRCNN(nn.Module): """ faster RCNN """ def __init__(self, classes, class_agnostic): - super(_fasterRCNN, self).__init__() + super(_fasterRCNN, self).__init__() #继承Module的舒适化 self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic @@ -33,7 +33,7 @@ def __init__(self, classes, class_agnostic): self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) - self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE + self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE #def larger(num1, num2): return num1 if num1 > num2 else num2, this variable may be declare in other place self.RCNN_roi_crop = _RoICrop() def forward(self, im_data, im_info, gt_boxes, num_boxes): @@ -77,13 +77,13 @@ def forward(self, im_data, im_info, gt_boxes, num_boxes): pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) - elif cfg.POOLING_MODE == 'align': + elif cfg.POOLING_MODE == 'align': #this is the most common ROI Pooling method. pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1,5)) # feed pooled features to top model - pooled_feat = self._head_to_tail(pooled_feat) + pooled_feat = self._head_to_tail(pooled_feat) #problem1 # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) diff --git a/lib/model/faster_rcnn/vgg16.py b/lib/model/faster_rcnn/vgg16.py index 90fe0d7b9..aa7c78828 100644 --- a/lib/model/faster_rcnn/vgg16.py +++ b/lib/model/faster_rcnn/vgg16.py @@ -35,7 +35,7 @@ def _init_modules(self): vgg.classifier = nn.Sequential(*list(vgg.classifier._modules.values())[:-1]) # not using the last maxpool layer - self.RCNN_base = nn.Sequential(*list(vgg.features._modules.values())[:-1]) + self.RCNN_base = nn.Sequential(*list(vgg.features._modules.values())[:-1]) #[:-1]是去掉最后个元素的意思 # Fix the layers before conv3: for layer in range(10): @@ -47,9 +47,10 @@ def _init_modules(self): # not using the last maxpool layer self.RCNN_cls_score = nn.Linear(4096, self.n_classes) - + if self.class_agnostic: - self.RCNN_bbox_pred = nn.Linear(4096, 4) + #important!!!!!!!!! + self.RCNN_bbox_pred = nn.Linear(4096, 4) #LINEAR is full connection layer, 4096 is input , 4 is output,相当于最后一个全联接层1000个输出那一项去掉。 else: self.RCNN_bbox_pred = nn.Linear(4096, 4 * self.n_classes) diff --git a/lib/roi_data_layer/roibatchLoader.py b/lib/roi_data_layer/roibatchLoader.py index 9b6e54ecf..8adcbcdb3 100644 --- a/lib/roi_data_layer/roibatchLoader.py +++ b/lib/roi_data_layer/roibatchLoader.py @@ -50,8 +50,8 @@ def __init__(self, roidb, ratio_list, ratio_index, batch_size, num_classes, trai else: # for ratio cross 1, we make it to be 1. target_ratio = 1 - - self.ratio_list_batch[left_idx:(right_idx+1)] = target_ratio + temp = torch.ones(batch_size)*target_ratio + self.ratio_list_batch[left_idx:(right_idx+1)] = temp def __getitem__(self, index):