diff --git a/README.md b/README.md index f74b0ae8..8c705ac3 100644 --- a/README.md +++ b/README.md @@ -1,170 +1,20 @@ -# PointPillars +For point_pillars installation refer to https://github.com/nutonomy/second.pytorch. -Welcome to PointPillars. +The code has been modified to run on Argoverse 3D dataset. Changes are done mainly in dataloading part in point_pillars/second/data/preprocess.py -This repo demonstrates how to reproduce the results from -[_PointPillars: Fast Encoders for Object Detection from Point Clouds_](https://arxiv.org/abs/1812.05784) (to be published at CVPR 2019) on the -[KITTI dataset](http://www.cvlibs.net/datasets/kitti/) by making the minimum required changes from the preexisting -open source codebase [SECOND](https://github.com/traveller59/second.pytorch). +Command to train final model (in point_pillars/second/) - -This is not an official nuTonomy codebase, but it can be used to match the published PointPillars results. +python ./pytorch/train.py train --config_path=./configs/pointpillars/car/xyres_20_argo_upper.proto --model_dir=./models --device=0 --include_roi=True --dr_area=False --include_road_points=False -**WARNING: This code is not being actively maintained. This code can be used to reproduce the results in the first version of the paper, https://arxiv.org/abs/1812.05784v1. For an actively maintained repository that can also reproduce PointPillars results on nuScenes, we recommend using [SECOND](https://github.com/traveller59/second.pytorch). We are not the owners of the repository, but we have worked with the author and endorse his code.** +Pre-trained model can be downloaded from - +https://drive.google.com/file/d/1ebtmoQSJdfIKVVJ93GSCdeGy-ZGqGXyN/view?usp=sharing -![Example Results](https://raw.githubusercontent.com/nutonomy/second.pytorch/master/images/pointpillars_kitti_results.png) +For inference on test set (in point_pillars/second)- -## Getting Started +python pp_inference.py --config_path=./configs/pointpillars/car/xyres_20_argo_upper.proto --model_dir=./models --device=0 --model_path="path_to_model/voxelnet-xxx.tckpt" --save_path="path_to_save/xxx.pkl" --include_roi=1 --include_road_points=0 --dr_area=0 -This is a fork of [SECOND for KITTI object detection](https://github.com/traveller59/second.pytorch) and the relevant -subset of the original README is reproduced here. +Command to get Results in AB3DMOT format( in point_pilllars/second) - -### Code Support +python get_tracking_result.py --model_path=path_to_model --sv_dir=path_to_AB3DMOT/data/argo/car_3d_det_val/ --set=val (or test) -ONLY supports python 3.6+, pytorch 0.4.1+. Code has only been tested on Ubuntu 16.04/18.04. - -### Install - -#### 1. Clone code - -```bash -git clone https://github.com/nutonomy/second.pytorch.git -``` - -#### 2. Install Python packages - -It is recommend to use the Anaconda package manager. - -First, use Anaconda to configure as many packages as possible. -```bash -conda create -n pointpillars python=3.7 anaconda -source activate pointpillars -conda install shapely pybind11 protobuf scikit-image numba pillow -conda install pytorch torchvision -c pytorch -conda install google-sparsehash -c bioconda -``` - -Then use pip for the packages missing from Anaconda. -```bash -pip install --upgrade pip -pip install fire tensorboardX -``` - -Finally, install SparseConvNet. This is not required for PointPillars, but the general SECOND code base expects this -to be correctly configured. -```bash -git clone git@github.com:facebookresearch/SparseConvNet.git -cd SparseConvNet/ -bash build.sh -# NOTE: if bash build.sh fails, try bash develop.sh instead -``` - -Additionally, you may need to install Boost geometry: - -```bash -sudo apt-get install libboost-all-dev -``` - - -#### 3. Setup cuda for numba - -You need to add following environment variables for numba to ~/.bashrc: - -```bash -export NUMBAPRO_CUDA_DRIVER=/usr/lib/x86_64-linux-gnu/libcuda.so -export NUMBAPRO_NVVM=/usr/local/cuda/nvvm/lib64/libnvvm.so -export NUMBAPRO_LIBDEVICE=/usr/local/cuda/nvvm/libdevice -``` - -#### 4. PYTHONPATH - -Add second.pytorch/ to your PYTHONPATH. - -### Prepare dataset - -#### 1. Dataset preparation - -Download KITTI dataset and create some directories first: - -```plain -└── KITTI_DATASET_ROOT - ├── training <-- 7481 train data - | ├── image_2 <-- for visualization - | ├── calib - | ├── label_2 - | ├── velodyne - | └── velodyne_reduced <-- empty directory - └── testing <-- 7580 test data - ├── image_2 <-- for visualization - ├── calib - ├── velodyne - └── velodyne_reduced <-- empty directory -``` - -Note: PointPillar's protos use ```KITTI_DATASET_ROOT=/data/sets/kitti_second/```. - -#### 2. Create kitti infos: - -```bash -python create_data.py create_kitti_info_file --data_path=KITTI_DATASET_ROOT -``` - -#### 3. Create reduced point cloud: - -```bash -python create_data.py create_reduced_point_cloud --data_path=KITTI_DATASET_ROOT -``` - -#### 4. Create groundtruth-database infos: - -```bash -python create_data.py create_groundtruth_database --data_path=KITTI_DATASET_ROOT -``` - -#### 5. Modify config file - -The config file needs to be edited to point to the above datasets: - -```bash -train_input_reader: { - ... - database_sampler { - database_info_path: "/path/to/kitti_dbinfos_train.pkl" - ... - } - kitti_info_path: "/path/to/kitti_infos_train.pkl" - kitti_root_path: "KITTI_DATASET_ROOT" -} -... -eval_input_reader: { - ... - kitti_info_path: "/path/to/kitti_infos_val.pkl" - kitti_root_path: "KITTI_DATASET_ROOT" -} -``` - - -### Train - -```bash -cd ~/second.pytorch/second -python ./pytorch/train.py train --config_path=./configs/pointpillars/car/xyres_16.proto --model_dir=/path/to/model_dir -``` - -* If you want to train a new model, make sure "/path/to/model_dir" doesn't exist. -* If "/path/to/model_dir" does exist, training will be resumed from the last checkpoint. -* Training only supports a single GPU. -* Training uses a batchsize=2 which should fit in memory on most standard GPUs. -* On a single 1080Ti, training xyres_16 requires approximately 20 hours for 160 epochs. - - -### Evaluate - - -```bash -cd ~/second.pytorch/second/ -python pytorch/train.py evaluate --config_path= configs/pointpillars/car/xyres_16.proto --model_dir=/path/to/model_dir -``` - -* Detection result will saved in model_dir/eval_results/step_xxx. -* By default, results are stored as a result.pkl file. To save as official KITTI label format use --pickle_result=False. diff --git a/second/.gitignore b/second/.gitignore new file mode 100644 index 00000000..30ad8a62 --- /dev/null +++ b/second/.gitignore @@ -0,0 +1,23 @@ +*.sh +report* +*.ipynb +*.tckpt +analysis/ +*.pkl +models/ +network_input_examples/ +pcd_output/ +ped_models_56/ +road_train_info.trch +sample_result_ab3dmot_format/ +sample_road_info.trch +sample_road_val_info.trch +sample_test/ +sample_test_wroad/ +sample_test_wroi/ +sample_train_roadmap/ +sample_val_roadmap/ +test_roadmap/ +track_analysis/ +sample_val_roadmap/ +test/ diff --git a/second/builder/dataset_builder.py b/second/builder/dataset_builder.py index bc6d2d4c..8985c174 100644 --- a/second/builder/dataset_builder.py +++ b/second/builder/dataset_builder.py @@ -28,10 +28,13 @@ import numpy as np from second.builder import dbsampler_builder from functools import partial +import torch + def build(input_reader_config, model_config, + info, training, voxel_generator, target_assigner=None): @@ -58,8 +61,8 @@ def build(input_reader_config, cfg = input_reader_config db_sampler_cfg = input_reader_config.database_sampler db_sampler = None - if len(db_sampler_cfg.sample_groups) > 0: # enable sample - db_sampler = dbsampler_builder.build(db_sampler_cfg) + #if len(db_sampler_cfg.sample_groups) > 0: # enable sample + # db_sampler = dbsampler_builder.build(db_sampler_cfg) u_db_sampler_cfg = input_reader_config.unlabeled_database_sampler u_db_sampler = None if len(u_db_sampler_cfg.sample_groups) > 0: # enable sample @@ -68,10 +71,17 @@ def build(input_reader_config, # [352, 400] feature_map_size = grid_size[:2] // out_size_factor feature_map_size = [*feature_map_size, 1][::-1] - + inform = info.copy() + inform["road_map"] = None + + root_path = input_reader_config.kitti_root_path + index_list = torch.load(input_reader_config.kitti_info_path) + + inform["index_list"] = index_list + prep_func = partial( prep_pointcloud, - root_path=cfg.kitti_root_path, + root_path=root_path, class_names=list(cfg.class_names), voxel_generator=voxel_generator, target_assigner=target_assigner, @@ -100,9 +110,11 @@ def build(input_reader_config, remove_environment=cfg.remove_environment, use_group_id=cfg.use_group_id, out_size_factor=out_size_factor) + dataset = KittiDataset( - info_path=cfg.kitti_info_path, - root_path=cfg.kitti_root_path, + info_path=inform, + root_path=root_path, + class_names=list(cfg.class_names), num_point_features=num_point_features, target_assigner=target_assigner, feature_map_size=feature_map_size, diff --git a/second/calc_map.py b/second/calc_map.py new file mode 100644 index 00000000..90784d9e --- /dev/null +++ b/second/calc_map.py @@ -0,0 +1,470 @@ +import pickle +import argoverse +from argoverse.data_loading.argoverse_tracking_loader import ArgoverseTrackingLoader +from argoverse.map_representation.map_api import ArgoverseMap +from shapely.geometry import Polygon +import argparse +import numpy as np +import operator +import torch +import time + +# result path, range + + +parser = argparse.ArgumentParser(description="arg parser") + + + +#parser.add_argument('--device', type=int, default=1, help='specify the gpu device for training') +parser.add_argument('--res_path',type=str,help='specify path where result file is stored.') +parser.add_argument('--range',type=float,help='specify range of the model for mAP evaluation') +parser.add_argument('--thresh',type=float,help='specify threshold for mAP evaluation') +parser.add_argument('--det_thresh',type=float,help='specify detection threshold for filtering detections') + + +args,unknown = parser.parse_known_args() + +#https://github.com/rafaelpadilla/Object-Detection-Metrics/blob/master/lib/Evaluator.py +def ElevenPointInterpolatedAP(rec, prec): + # def CalculateAveragePrecision2(rec, prec): + mrec = [] + # mrec.append(0) + [mrec.append(e) for e in rec] + # mrec.append(1) + mpre = [] + # mpre.append(0) + [mpre.append(e) for e in prec] + # mpre.append(0) + recallValues = np.linspace(0, 1, 41) + recallValues = list(recallValues[::-1]) + rhoInterp = [] + recallValid = [] + # For each recallValues (0, 0.1, 0.2, ... , 1) + for r in recallValues: + # Obtain all recall values higher or equal than r + argGreaterRecalls = np.argwhere(mrec[:] >= r) + pmax = 0 + # If there are recalls above r + if argGreaterRecalls.size != 0: + pmax = max(mpre[argGreaterRecalls.min():]) + recallValid.append(r) + rhoInterp.append(pmax) + # By definition AP = sum(max(precision whose recall is above r))/11 + ap = sum(rhoInterp) / 41 + # Generating values for the plot + rvals = [] + rvals.append(recallValid[0]) + [rvals.append(e) for e in recallValid] + rvals.append(0) + pvals = [] + pvals.append(0) + [pvals.append(e) for e in rhoInterp] + pvals.append(0) + # rhoInterp = rhoInterp[::-1] + cc = [] + for i in range(len(rvals)): + p = (rvals[i], pvals[i - 1]) + if p not in cc: + cc.append(p) + p = (rvals[i], pvals[i]) + if p not in cc: + cc.append(p) + recallValues = [i[0] for i in cc] + rhoInterp = [i[1] for i in cc] + return [ap, rhoInterp, recallValues, None] + +def CalculateAveragePrecision(rec, prec): + mrec = [] + mrec.append(0) + [mrec.append(e) for e in rec] + mrec.append(1) + mpre = [] + mpre.append(0) + [mpre.append(e) for e in prec] + mpre.append(0) + for i in range(len(mpre) - 1, 0, -1): + mpre[i - 1] = max(mpre[i - 1], mpre[i]) + ii = [] + for i in range(len(mrec) - 1): + if mrec[1:][i] != mrec[0:-1][i]: + ii.append(i + 1) + ap = 0 + for i in ii: + ap = ap + np.sum((mrec[i] - mrec[i - 1]) * mpre[i]) + # return [ap, mpre[1:len(mpre)-1], mrec[1:len(mpre)-1], ii] + return [ap, mpre[0:len(mpre) - 1], mrec[0:len(mpre) - 1], ii] + + + +#https://github.com/udacity/didi-competition/blob/master/tracklets/python/evaluate_tracklets.py +# 3d iou + + +def iou_3d(box_a, box_b,vol_a,vol_b): + """ + A simplified calculation of 3d bounding box intersection. + It is assumed that the bounding box is only rotated + around Z axis (yaw) from an axis-aligned box. + :param box_a, box_b: obstacle bounding boxes for comparison + :return: intersection volume (float) + """ + # height (Z) overlap + box_a = np.array([ box_a[:,0], box_a[:,2], box_a[:,1] ]) + box_b = np.array([ box_b[:,0], box_b[:,2], box_b[:,1] ] ) + + min_h_a = np.min(box_a[2]) + max_h_a = np.max(box_a[2]) + min_h_b = np.min(box_b[2]) + max_h_b = np.max(box_b[2]) + max_of_min = np.max([min_h_a, min_h_b]) + min_of_max = np.min([max_h_a, max_h_b]) + z_intersection = np.max([0, min_of_max - max_of_min]) + if z_intersection == 0: + return 0. + + # oriented XY overlap + xy_poly_a = Polygon(zip(*box_a[0:2, 0:4])) + #print(xy_poly_a) + xy_poly_b = Polygon(zip(*box_b[0:2, 0:4])) + try: + xy_intersection = xy_poly_a.intersection(xy_poly_b).area + except: + print("ERROR",xy_poly_a,xy_poly_b) + if xy_intersection == 0: + return 0. + + + + vol_intersect = z_intersection * xy_intersection + union = vol_a + vol_b - vol_intersect + iou_val = vol_intersect/union + + return iou_val + + +def quaternion_to_euler(quat): + w,x,y,z = quat + + t0 = +2.0 * (w * x + y * z) + t1 = +1.0 - 2.0 * (x * x + y * y) + roll = math.atan2(t0, t1) + t2 = +2.0 * (w * y - z * x) + t2 = +1.0 if t2 > +1.0 else t2 + t2 = -1.0 if t2 < -1.0 else t2 + pitch = math.asin(t2) + t3 = +2.0 * (w * z + x * y) + t4 = +1.0 - 2.0 * (y * y + z * z) + yaw = math.atan2(t3, t4) + return yaw, pitch, roll + + +def corners_in_roi_kitti(corners): + limit = args.range + ''' + for i in range(4): + if not (-limit<= corners[i][0] <= limit and -limit <= corners[i][1] <= limit) : + p1 = Polygon( [ (-limit,limit), (limit,limit), (limit,-limit), (-limit,-limit) ] ) + p2 = Polygon( [ (corners[0][0],corners[0][1]), (corners[1][0],corners[1][1]), (corners[2][0],corners[2][1]), (corners[3][0],corners[3][1]) ] ) + + if p1.intersection(p2).area >= 0.7*p2.area: + return True + else: + return False + ''' + for i in range(4): + if -limit<=corners[i][0] <= limit and -limit <= corners[i][1] <= limit: + return True + + return False + +def center_to_bbox_2d(location,dimensions,yaw): + crnrs = [] + for i in range(len(location)): + x,y,z = location[i] + l,h,w = dimensions[i] + corners = np.array([ [l/2,w/2],[l/2,-w/2],[-l/2,-w/2],[-l/2,w/2] ]) + + rot_mat = np.array( [[ np.cos(yaw[i]) ,np.sin(yaw[i]) ], + [-np.sin(yaw[i]), np.cos(yaw[i])] ] ) + rot_corners = np.dot(rot_mat,corners.T).T + rot_corners = rot_corners + [x,z] + crnrs.append(rot_corners) + return crnrs + +def center_to_bbox_3d(location,dimensions,yaw): + crnrs = [] + for i in range(len(location)): + x,y,z = location[i] + l,h,w = dimensions[i] + corners = np.array([ [-l/2,0,w/2],[-l/2,0,-w/2],[l/2,0,-w/2],[l/2,0,w/2], + [-l/2,-h,w/2],[-l/2,-h,-w/2],[l/2,-h,-w/2],[l/2,-h,w/2], + ]) + + rot_mat = np.array( [[ np.cos(yaw[i]) , 0, np.sin(yaw[i]) ], + [ 0 , 1, 0 ], + [-np.sin(yaw[i]) ,0, np.cos(yaw[i])] ] + ) + rot_corners = np.dot(rot_mat,corners.T).T + rot_corners = rot_corners + [x,y,z] + crnrs.append(rot_corners) + return crnrs + + +def mAP(): + # comment corners_in_roi_kitti for normal evaluation + # score threshold, corners_in_kitti_roi, gt_roi + + f = open(args.res_path,"rb") + result = pickle.load(f) + + root_dir = "./../../argodataset/argoverse-tracking/val/" + argoverse_loader = ArgoverseTrackingLoader(root_dir) + am = ArgoverseMap() + + + preds = result + threshold = 0.5 + out,out_3d = [],[] + cnt = 0 + + file_cnt = 0 + lcc = 0 + print(len(preds)) + for i in range(len(preds)): + if file_cnt%50 == 0: + print(file_cnt) + file_cnt += 1 + if len(preds[i]['image_idx']) == 0: + continue + + file = preds[i]['image_idx'][0] + first_del = file.find('_') + seq_no = int(file[:first_del]) + frame_no = int( file[ first_del + 1: ] ) + argoverse_data = argoverse_loader[seq_no] + objects = argoverse_data.get_label_object(frame_no) + good_result = (preds[i]['score'] > args.det_thresh) + ps,pind,pdims = preds[i]['score'][good_result], preds[i]['name'][good_result],preds[i]['dimensions'][good_result] + + city_to_egovehicle_se3 = argoverse_data.get_pose(frame_no) + locs,rots = preds[i]['location'][good_result], preds[i]['rotation_y'][good_result] + city_name = argoverse_data.city_name + ''' + for lci in range(len(locs)): + kitti_loc = locs[lci] + argo_loc = [kitti_loc[2],-kitti_loc[0],1.78-kitti_loc[1]] + pts = city_to_egovehicle_se3.transform_point_cloud(np.array([argo_loc]))[0][:2] + cl_obj,cl_conf,clines = am.get_nearest_centerline(pts,city_name) + if not cl_obj.is_intersection: + vec,conf = am.get_lane_direction(pts,city_name) + + lidar_unit_vec = city_to_egovehicle_se3.inverse_transform_point_cloud(np.array([[10*vec[0],10*vec[1],15]]))[0][:2] + lidar_orig = city_to_egovehicle_se3.inverse_transform_point_cloud( np.array([[0.,0.,15.]]) )[0][:2] + lidar_vec = lidar_unit_vec-lidar_orig + #print(lidar_vec) + lidar_angle = np.arctan2(lidar_vec[1],lidar_vec[0]) + + lidar_angle = -np.pi/2 - lidar_angle + if lidar_angle > np.pi: + lidar_angle -= 2*np.pi + elif lidar_angle < -np.pi: + lidar_angle += 2*np.pi + + if rots[lci] > np.pi: + rots[lci] -= 2*np.pi + elif rots[lci] < -np.pi: + rots[lci] += 2*np.pi + + + if conf>0.5 and np.abs(lidar_angle-rots[lci]) > 0.2 and not (np.pi-0.2 <= np.abs(lidar_angle-rots[lci]) <= np.pi+0.2 ) : + lcc += 1 + #print(conf, lidar_angle, rots[lci]) + rots[lci] = lidar_angle + + + + ''' + + pc = center_to_bbox_2d(preds[i]['location'][good_result],preds[i]['dimensions'][good_result],rots) + pc_3d = center_to_bbox_3d(preds[i]['location'][good_result],preds[i]['dimensions'][good_result],rots) + #print(preds[i]['location']) + mark,mark_3d = {},{} + categ = "VEHICLE" + + + ''' + gt_roi = [] + gt_locss = [] + for obj in objects: + if obj.label_class == categ: + gt_roi.append(obj) + corners= obj.as_3d_bbox() + centers = np.mean(corners,axis=0) + gt_locss.append(centers) + + roi_locs = city_to_egovehicle_se3.transform_point_cloud(np.array(gt_locss)) # put into city coords + roi_locs_flag = am.remove_non_roi_points(roi_locs, city_name) # remove non-driveable region + gt_roi = np.array(gt_roi) + gt_roi = gt_roi[roi_locs_flag] + ''' + + for obj in objects: + if obj.label_class == categ: + corners = obj.as_2d_bbox() + corners = corners[:,[1,0]] + corners[:,[0]] = -corners[:,[0]] + corners[[2,3],:] = corners[[3,2],:] + + corners_3d = obj.as_3d_bbox() + corners_3d = corners_3d[:,[1,2,0]] + corners_3d[:,0] = -corners_3d[:,0] + corners_3d[:,1] = 1.73 - corners_3d[:,1] + corners_3d = corners_3d[[2,3,7,6,1,0,4,5],:] + vol_gt = obj.length*obj.height*obj.width + if corners_in_roi_kitti(corners): + cnt+=1 + p1 = Polygon( [ (corners[0][0],corners[0][1]), (corners[1][0],corners[1][1]), (corners[2][0],corners[2][1]), (corners[3][0],corners[3][1]) ] ) + mx,mx_3d = 0,0 + temp_crnr,temp_crnr_3d = np.zeros((4,2)), np.zeros((8,3)) + temp_scr,temp_scr_3d = 0,0 + ind,ind_3d = -1,-1 + + for num,qnt in enumerate(zip(pc,pc_3d,ps,pind,pdims)): + crnr,crnr_3d, score,c_ind,dims = qnt + pvol = dims[0]*dims[1]*dims[2] + if c_ind == categ: + p2 = Polygon( [ (crnr[0][0],crnr[0][1]), (crnr[1][0],crnr[1][1]), (crnr[2][0],crnr[2][1]), (crnr[3][0],crnr[3][1]) ] ) + iou = p1.intersection(p2).area / p1.union(p2).area + iou_td = iou_3d(corners_3d,crnr_3d,vol_gt,pvol) + + if iou>mx : + mx = iou + temp_crnr[:] = crnr + temp_scr = score + ind = num + #print(iou,p1,p2,crnr_3d,corners_3d) + + if iou_td>mx_3d : + mx_3d = iou_td + temp_crnr_3d[:] = crnr_3d + temp_scr_3d = score + ind_3d = num + + if mx >= threshold and ind not in mark.keys() : + out.append((temp_scr,True)) + mark[ind] = 1 + + + if mx_3d >= threshold and ind not in mark_3d.keys() : + out_3d.append((temp_scr_3d,True)) + mark_3d[ind_3d] = 1 + #print(mx_3d) + + for sn in range(len(pind)): + if pind[sn]==categ and sn not in mark.keys(): + out.append((ps[sn],False)) + if pind[sn] == categ and sn not in mark_3d.keys(): + out_3d.append((ps[sn],False)) + prec,prec_3d = 0,0 + for elems in out: + prec += elems[1] + for elems in out_3d: + prec_3d += elems[1] + + + print("preds - ",len(out),"true predictions - ", prec , " true 3d prediction - ", prec_3d, "ground truth labels - ",cnt) + print(lcc) + + out.sort(key = operator.itemgetter(0),reverse = True) + out_3d.sort(key=operator.itemgetter(0),reverse=True) + print(len(out),len(out_3d)) + + precision, precision_3d = [], [] + recall, recall_3d = [], [] + + tp = 0 + for i in range(len(out)): + if out[i][1]: + tp += 1 + + precision.append(tp/(i+1)) + recall.append(tp/cnt) + + tp3d = 0 + + for j in range(len(out_3d)): + if out_3d[j][1]: + tp3d += 1 + + precision_3d.append(tp3d/(j+1)) + recall_3d.append(tp3d/cnt) + + ''' + plt.plot(recall,precision) + plt.show() + plt.plot(recall_3d,precision_3d) + plt.show() + ''' + + t0 = time.time() + area = np.max(precision) + print(recall[0]) + #print(out[:10]) + find = 0.025 + for i in range(len(recall)): + if recall[i] >= find: + #print(recall[i],precision[i]) + area += np.max(precision[i:]) + find += 0.025 + + + t1 = time.time() + + #auth_ap_every = CalculateAveragePrecision(recall,precision) + + t2 = time.time() + + auth_ap_eleven = ElevenPointInterpolatedAP(recall,precision) + + t3 = time.time() + print(t1-t0,t2-t1,t3-t2) + + print("Average Precision bev- ", area/41) + #print("every point interpolation - ",auth_ap_every[0]) + print("41 point interpolation - ", auth_ap_eleven[0]) + + find3d = 0.025 + area3d = np.max(precision_3d) + + for i in range(len(recall_3d)): + if recall_3d[i] >= find3d: + area3d += np.max(precision_3d[i:]) + find3d += 0.025 + + #auth_ap_every_3d = CalculateAveragePrecision(recall_3d,precision_3d) + auth_ap_eleven_3d = ElevenPointInterpolatedAP(recall_3d,precision_3d) + + print("Average Precision 3d- ", area3d/41) + #print("every point interpolation - ",auth_ap_every_3d[0]) + print("41 point interpolation - ", auth_ap_eleven_3d[0]) + + name = args.res_path.replace("pkl","txt") + + + with open(name,"a+") as f: + f.write("preds - " + str(len(out)) + " true predictions - " + str(prec) + " true 3d prediction - " + str(prec_3d) + " ground truth labels - " + str(cnt) + "\n") + f.write("threshold - " + str(args.thresh) + "\n") + + f.write("bev AP - " + str(area/41) + "\n") + #f.write("every point interpolation - " + str(auth_ap_every[0]) + "\n") + f.write("41 point interpolation - " + str(auth_ap_eleven[0]) + "\n" ) + + f.write("3d AP - " + str(area3d/41) + "\n") + #f.write("every point interpolation - " + str(auth_ap_every_3d[0]) + "\n" ) + f.write("41 point interpolation - " + str(auth_ap_eleven_3d[0]) + "\n" ) + + +if __name__ == '__main__': + mAP() + diff --git a/second/configs/pointpillars/car/xyres_12_argo_48.proto b/second/configs/pointpillars/car/xyres_12_argo_48.proto new file mode 100644 index 00000000..ce8fb262 --- /dev/null +++ b/second/configs/pointpillars/car/xyres_12_argo_48.proto @@ -0,0 +1,187 @@ +model: { + second: { + voxel_generator { + point_cloud_range : [-48, -48, -3, 48, 48, 1] + voxel_size : [0.12, 0.12, 4] + max_number_of_points_per_voxel : 100 + } + num_class: 1 + voxel_feature_extractor: { + module_class_name: "PillarFeatureNet" + num_filters: [64] + with_distance: false + } + middle_feature_extractor: { + module_class_name: "PointPillarsScatter" + } + rpn: { + module_class_name: "RPN" + layer_nums: [3, 5, 5] + layer_strides: [2, 2, 2] + num_filters: [64, 128, 256] + upsample_strides: [1, 2, 4] + num_upsample_filters: [128, 128, 128] + use_groupnorm: false + num_groups: 32 + } + loss: { + classification_loss: { + weighted_sigmoid_focal: { + alpha: 0.25 + gamma: 2.0 + anchorwise_output: true + } + } + localization_loss: { + weighted_smooth_l1: { + sigma: 3.0 + code_weight: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + } + classification_weight: 1.0 + localization_weight: 2.0 + } + # Outputs + use_sigmoid_score: true + encode_background_as_zeros: true + encode_rad_error_by_sin: true + + use_direction_classifier: true + direction_loss_weight: 0.2 + use_aux_classifier: false + # Loss + pos_class_weight: 1.0 + neg_class_weight: 1.0 + + loss_norm_type: NormByNumPositives + # Postprocess + post_center_limit_range: [-48, -48, -3, 48, 48, 1] + use_rotate_nms: false + use_multi_class_nms: false + nms_pre_max_size: 1000 + nms_post_max_size: 300 + nms_score_threshold: 0.05 + nms_iou_threshold: 0.5 + + use_bev: false + num_point_features: 3 + without_reflectivity: true + box_coder: { + ground_box3d_coder: { + linear_dim: false + encode_angle_vector: false + } + } + target_assigner: { + anchor_generators: { + anchor_generator_stride: { + sizes: [1.67, 3.63, 1.77] # wlh + strides: [0.24, 0.24, 0.0] # if generate only 1 z_center, z_stride will be ignored + offsets: [-47.88, -47.88, -1.73] # origin_offset + strides / 2 + rotations: [0, 1.57] # 0, pi/2 + matched_threshold : 0.6 + unmatched_threshold : 0.45 + } + } + + sample_positive_fraction : -1 + sample_size : 512 + region_similarity_calculator: { + nearest_iou_similarity: { + } + } + } + } +} + + +train_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_train.tfrecord" + class_names: ["VEHICLE"] + max_num_epochs : 200 + batch_size: 1 + prefetch_size : 25 + max_number_of_voxels: 16000 + shuffle_points: true + num_workers: 2 + groundtruth_localization_noise_std: [0.25, 0.25, 0.25] + groundtruth_rotation_uniform_noise: [-0.15707963267, 0.15707963267] + global_rotation_uniform_noise: [-0.78539816, 0.78539816] + global_scaling_uniform_noise: [0.95, 1.05] + global_random_rotation_range_per_object: [0, 0] + anchor_area_threshold: 1 + remove_points_after_sample: false + groundtruth_points_drop_percentage: 0.0 + groundtruth_drop_max_keep_points: 15 + database_sampler { + database_info_path: "/data/sets/kitti_second/kitti_dbinfos_train.pkl" + sample_groups { + name_to_max_num { + key: "VEHICLE" + value: 15 + } + } + database_prep_steps { + filter_by_min_num_points { + min_num_point_pairs { + key: "VEHICLE" + value: 5 + } + } + } + database_prep_steps { + filter_by_difficulty { + removed_difficulties: [-1] + } + } + global_random_rotation_range_per_object: [0, 0] + rate: 1.0 + } + + remove_unknown_examples: false + remove_environment: false + kitti_info_path: "/nfs/151/gpu/gunji/point_pillars/second/train.trch" + kitti_root_path: "/nfs/151/gpu/gunji/argodataset/argoverse-tracking/train/" +} + +train_config: { + optimizer: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate: { + initial_learning_rate: 0.0002 + decay_steps: 40605 # 1856 steps per epoch * 15 epochs + decay_factor: 0.8 + staircase: true + } + } + weight_decay: 0.0001 + } + use_moving_average: false + + } + inter_op_parallelism_threads: 4 + intra_op_parallelism_threads: 4 + steps: 433120 # 1856 steps per epoch * 160 epochs + steps_per_eval: 13535 # 1856 steps per epoch * 5 epochs + save_checkpoints_secs : 1800 # half hour + save_summary_steps : 10 + enable_mixed_precision: false + loss_scale_factor : 512.0 + clear_metrics_every_epoch: false +} + +eval_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_val.tfrecord" + class_names: ["VEHICLE"] + batch_size: 1 + max_num_epochs : 200 + prefetch_size : 25 + max_number_of_voxels: 16000 + shuffle_points: false + num_workers: 3 + anchor_area_threshold: 1 + remove_environment: false + kitti_info_path: "/nfs/151/gpu/gunji/point_pillars/second/val.trch" + kitti_root_path: "/nfs/151/gpu/gunji/argodataset/argoverse-tracking/val/" +} diff --git a/second/configs/pointpillars/car/xyres_16.proto b/second/configs/pointpillars/car/xyres_16.proto index d2b7bdc1..74be6574 100644 --- a/second/configs/pointpillars/car/xyres_16.proto +++ b/second/configs/pointpillars/car/xyres_16.proto @@ -1,7 +1,7 @@ model: { second: { voxel_generator { - point_cloud_range : [0, -39.68, -3, 69.12, 39.68, 1] + point_cloud_range : [-96, -96, -3, 96, 96, 1] voxel_size : [0.16, 0.16, 4] max_number_of_points_per_voxel : 100 } @@ -55,7 +55,7 @@ model: { loss_norm_type: NormByNumPositives # Postprocess - post_center_limit_range: [0, -39.68, -5, 69.12, 39.68, 5] + post_center_limit_range: [-96, -96, -3, 96, 96, 1] use_rotate_nms: false use_multi_class_nms: false nms_pre_max_size: 1000 @@ -64,8 +64,8 @@ model: { nms_iou_threshold: 0.5 use_bev: false - num_point_features: 4 - without_reflectivity: false + num_point_features: 3 + without_reflectivity: true box_coder: { ground_box3d_coder: { linear_dim: false @@ -75,9 +75,9 @@ model: { target_assigner: { anchor_generators: { anchor_generator_stride: { - sizes: [1.6, 3.9, 1.56] # wlh + sizes: [1.67, 3.63, 1.77] # wlh strides: [0.32, 0.32, 0.0] # if generate only 1 z_center, z_stride will be ignored - offsets: [0.16, -39.52, -1.78] # origin_offset + strides / 2 + offsets: [-95.84, -95.84, -1.78] # origin_offset + strides / 2 rotations: [0, 1.57] # 0, pi/2 matched_threshold : 0.6 unmatched_threshold : 0.45 @@ -97,9 +97,9 @@ model: { train_input_reader: { record_file_path: "/data/sets/kitti_second/kitti_train.tfrecord" - class_names: ["Car"] + class_names: ["VEHICLE"] max_num_epochs : 160 - batch_size: 2 + batch_size: 1 prefetch_size : 25 max_number_of_voxels: 12000 shuffle_points: true @@ -117,14 +117,14 @@ train_input_reader: { database_info_path: "/data/sets/kitti_second/kitti_dbinfos_train.pkl" sample_groups { name_to_max_num { - key: "Car" + key: "VEHICLE" value: 15 } } database_prep_steps { filter_by_min_num_points { min_num_point_pairs { - key: "Car" + key: "VEHICLE" value: 5 } } @@ -150,7 +150,7 @@ train_config: { learning_rate: { exponential_decay_learning_rate: { initial_learning_rate: 0.0002 - decay_steps: 27840 # 1856 steps per epoch * 15 epochs + decay_steps: 2000#27840 1856 steps per epoch * 15 epochs decay_factor: 0.8 staircase: true } @@ -162,8 +162,8 @@ train_config: { } inter_op_parallelism_threads: 4 intra_op_parallelism_threads: 4 - steps: 296960 # 1856 steps per epoch * 160 epochs - steps_per_eval: 9280 # 1856 steps per epoch * 5 epochs + steps: 5000#296960 1856 steps per epoch * 160 epochs + steps_per_eval: 2500 #9280 1856 steps per epoch * 5 epochs save_checkpoints_secs : 1800 # half hour save_summary_steps : 10 enable_mixed_precision: false @@ -173,8 +173,8 @@ train_config: { eval_input_reader: { record_file_path: "/data/sets/kitti_second/kitti_val.tfrecord" - class_names: ["Car"] - batch_size: 2 + class_names: ["VEHICLE"] + batch_size: 1 max_num_epochs : 160 prefetch_size : 25 max_number_of_voxels: 12000 diff --git a/second/configs/pointpillars/car/xyres_16_argo_30.proto b/second/configs/pointpillars/car/xyres_16_argo_30.proto new file mode 100644 index 00000000..7fd6b19d --- /dev/null +++ b/second/configs/pointpillars/car/xyres_16_argo_30.proto @@ -0,0 +1,189 @@ +model: { + second: { + voxel_generator { + point_cloud_range : [-32, -32, -3, 32, 32, 1] + voxel_size : [0.16, 0.16, 4] + max_number_of_points_per_voxel : 100 + } + num_class: 1 + voxel_feature_extractor: { + module_class_name: "PillarFeatureNet" + num_filters: [64] + with_distance: false + } + middle_feature_extractor: { + module_class_name: "PointPillarsScatter" + } + rpn: { + module_class_name: "RPN" + layer_nums: [3, 5, 5] + layer_strides: [2, 2, 2] + num_filters: [64, 128, 256] + upsample_strides: [1, 2, 4] + num_upsample_filters: [128, 128, 128] + use_groupnorm: false + num_groups: 32 + } + loss: { + classification_loss: { + weighted_sigmoid_focal: { + alpha: 0.25 + gamma: 2.0 + anchorwise_output: true + } + } + localization_loss: { + weighted_smooth_l1: { + sigma: 3.0 + code_weight: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + } + classification_weight: 1.0 + localization_weight: 2.0 + } + # Outputs + use_sigmoid_score: true + encode_background_as_zeros: true + encode_rad_error_by_sin: true + + use_direction_classifier: true + direction_loss_weight: 0.2 + use_aux_classifier: false + # Loss + pos_class_weight: 1.0 + neg_class_weight: 1.0 + + loss_norm_type: NormByNumPositives + # Postprocess + post_center_limit_range: [-32, -32, -3, 32, 32, 1] + use_rotate_nms: false + use_multi_class_nms: false + nms_pre_max_size: 1000 + nms_post_max_size: 300 + nms_score_threshold: 0.05 + nms_iou_threshold: 0.5 + + use_bev: false + num_point_features: 3 + without_reflectivity: true + box_coder: { + ground_box3d_coder: { + linear_dim: false + encode_angle_vector: false + } + } + target_assigner: { + anchor_generators: { + anchor_generator_stride: { + sizes: [1.67, 3.63, 1.77] # wlh + strides: [0.32, 0.32, 0.0] # if generate only 1 z_center, z_stride will be ignored + offsets: [-31.84, -31.84, -1.78] # origin_offset + strides / 2 + rotations: [0, 1.57] # 0, pi/2 + matched_threshold : 0.6 + unmatched_threshold : 0.45 + } + } + + sample_positive_fraction : -1 + sample_size : 512 + region_similarity_calculator: { + nearest_iou_similarity: { + } + } + } + } +} + + +train_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_train.tfrecord" + class_names: ["VEHICLE"] + max_num_epochs : 10 + batch_size: 1 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: true + num_workers: 2 + groundtruth_localization_noise_std: [0.25, 0.25, 0.25] + groundtruth_rotation_uniform_noise: [-0.15707963267, 0.15707963267] + global_rotation_uniform_noise: [-0.78539816, 0.78539816] + global_scaling_uniform_noise: [0.95, 1.05] + global_random_rotation_range_per_object: [0, 0] + anchor_area_threshold: 1 + remove_points_after_sample: false + groundtruth_points_drop_percentage: 0.0 + groundtruth_drop_max_keep_points: 15 + database_sampler { + database_info_path: "/data/sets/kitti_second/kitti_dbinfos_train.pkl" + sample_groups { + name_to_max_num { + key: "VEHICLE" + value: 15 + } + } + database_prep_steps { + filter_by_min_num_points { + min_num_point_pairs { + key: "VEHICLE" + value: 5 + } + } + } + database_prep_steps { + filter_by_difficulty { + removed_difficulties: [-1] + } + } + global_random_rotation_range_per_object: [0, 0] + rate: 1.0 + } + + remove_unknown_examples: false + remove_environment: false + kitti_info_path: "/nfs/151/gpu/gunji/point_pillars/second/sample.trch" + kitti_root_path: "/nfs/151/gpu/gunji/argodataset/argoverse-tracking/sample/" + +} + +train_config: { + optimizer: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate: { + initial_learning_rate: 0.0002 + decay_steps: 2500 # 1856 steps per epoch * 15 epochs + decay_factor: 0.8 + staircase: true + } + } + weight_decay: 0.0001 + } + use_moving_average: false + + } + inter_op_parallelism_threads: 4 + intra_op_parallelism_threads: 4 + steps: 10000 # 1856 steps per epoch * 160 epochs + steps_per_eval:10000 # 1856 steps per epoch * 5 epochs + save_checkpoints_secs : 1800 # half hour + save_summary_steps : 10 + enable_mixed_precision: false + loss_scale_factor : 512.0 + clear_metrics_every_epoch: false +} + +eval_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_val.tfrecord" + class_names: ["VEHICLE"] + batch_size: 1 + max_num_epochs : 10 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: false + num_workers: 3 + anchor_area_threshold: 1 + remove_environment: false + kitti_info_path: "/nfs/151/gpu/gunji/point_pillars/second/sample_val.trch" + kitti_root_path: "/nfs/151/gpu/gunji/argodataset/argoverse-tracking/sample/" + +} diff --git a/second/configs/pointpillars/car/xyres_16_argo_48.proto b/second/configs/pointpillars/car/xyres_16_argo_48.proto new file mode 100644 index 00000000..a2c7fd48 --- /dev/null +++ b/second/configs/pointpillars/car/xyres_16_argo_48.proto @@ -0,0 +1,187 @@ +model: { + second: { + voxel_generator { + point_cloud_range : [-48, -48, -3, 48, 48, 1] + voxel_size : [0.16, 0.16, 4] + max_number_of_points_per_voxel : 100 + } + num_class: 1 + voxel_feature_extractor: { + module_class_name: "PillarFeatureNet" + num_filters: [64] + with_distance: false + } + middle_feature_extractor: { + module_class_name: "PointPillarsScatter" + } + rpn: { + module_class_name: "RPN" + layer_nums: [3, 5, 5] + layer_strides: [2, 2, 2] + num_filters: [64, 128, 256] + upsample_strides: [1, 2, 4] + num_upsample_filters: [128, 128, 128] + use_groupnorm: false + num_groups: 32 + } + loss: { + classification_loss: { + weighted_sigmoid_focal: { + alpha: 0.25 + gamma: 2.0 + anchorwise_output: true + } + } + localization_loss: { + weighted_smooth_l1: { + sigma: 3.0 + code_weight: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + } + classification_weight: 1.0 + localization_weight: 2.0 + } + # Outputs + use_sigmoid_score: true + encode_background_as_zeros: true + encode_rad_error_by_sin: true + + use_direction_classifier: true + direction_loss_weight: 0.2 + use_aux_classifier: false + # Loss + pos_class_weight: 1.0 + neg_class_weight: 1.0 + + loss_norm_type: NormByNumPositives + # Postprocess + post_center_limit_range: [-48, -48, -3, 48, 48, 1] + use_rotate_nms: false + use_multi_class_nms: false + nms_pre_max_size: 1000 + nms_post_max_size: 300 + nms_score_threshold: 0.05 + nms_iou_threshold: 0.5 + + use_bev: false + num_point_features: 3 + without_reflectivity: true + box_coder: { + ground_box3d_coder: { + linear_dim: false + encode_angle_vector: false + } + } + target_assigner: { + anchor_generators: { + anchor_generator_stride: { + sizes: [1.67, 3.63, 1.77] # wlh + strides: [0.32, 0.32, 0.0] # if generate only 1 z_center, z_stride will be ignored + offsets: [-47.84, -47.84, -1.73] # origin_offset + strides / 2 + rotations: [0, 1.57] # 0, pi/2 + matched_threshold : 0.6 + unmatched_threshold : 0.45 + } + } + + sample_positive_fraction : -1 + sample_size : 512 + region_similarity_calculator: { + nearest_iou_similarity: { + } + } + } + } +} + + +train_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_train.tfrecord" + class_names: ["VEHICLE"] + max_num_epochs : 200 + batch_size: 1 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: true + num_workers: 2 + groundtruth_localization_noise_std: [0.25, 0.25, 0.25] + groundtruth_rotation_uniform_noise: [-0.15707963267, 0.15707963267] + global_rotation_uniform_noise: [-0.78539816, 0.78539816] + global_scaling_uniform_noise: [0.95, 1.05] + global_random_rotation_range_per_object: [0, 0] + anchor_area_threshold: 1 + remove_points_after_sample: false + groundtruth_points_drop_percentage: 0.0 + groundtruth_drop_max_keep_points: 15 + database_sampler { + database_info_path: "/data/sets/kitti_second/kitti_dbinfos_train.pkl" + sample_groups { + name_to_max_num { + key: "VEHICLE" + value: 15 + } + } + database_prep_steps { + filter_by_min_num_points { + min_num_point_pairs { + key: "VEHICLE" + value: 5 + } + } + } + database_prep_steps { + filter_by_difficulty { + removed_difficulties: [-1] + } + } + global_random_rotation_range_per_object: [0, 0] + rate: 1.0 + } + + remove_unknown_examples: false + remove_environment: false + kitti_info_path: "/nfs/151/gpu/gunji/point_pillars/second/train.trch" + kitti_root_path: "/nfs/151/gpu/gunji/argodataset/argoverse-tracking/train/" +} + +train_config: { + optimizer: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate: { + initial_learning_rate: 0.0002 + decay_steps: 40605 # 1856 steps per epoch * 15 epochs + decay_factor: 0.8 + staircase: true + } + } + weight_decay: 0.0001 + } + use_moving_average: false + + } + inter_op_parallelism_threads: 4 + intra_op_parallelism_threads: 4 + steps: 433120 # 1856 steps per epoch * 160 epochs + steps_per_eval: 13535 # 1856 steps per epoch * 5 epochs + save_checkpoints_secs : 1800 # half hour + save_summary_steps : 10 + enable_mixed_precision: false + loss_scale_factor : 512.0 + clear_metrics_every_epoch: false +} + +eval_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_val.tfrecord" + class_names: ["VEHICLE"] + batch_size: 1 + max_num_epochs : 200 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: false + num_workers: 3 + anchor_area_threshold: 1 + remove_environment: false + kitti_info_path: "/nfs/151/gpu/gunji/point_pillars/second/val.trch" + kitti_root_path: "/nfs/151/gpu/gunji/argodataset/argoverse-tracking/val/" +} diff --git a/second/configs/pointpillars/car/xyres_16_argo_64.proto b/second/configs/pointpillars/car/xyres_16_argo_64.proto new file mode 100644 index 00000000..3670cb32 --- /dev/null +++ b/second/configs/pointpillars/car/xyres_16_argo_64.proto @@ -0,0 +1,187 @@ +model: { + second: { + voxel_generator { + point_cloud_range : [-64, -64, -3, 64, 64, 1] + voxel_size : [0.16, 0.16, 4] + max_number_of_points_per_voxel : 100 + } + num_class: 1 + voxel_feature_extractor: { + module_class_name: "PillarFeatureNet" + num_filters: [64] + with_distance: false + } + middle_feature_extractor: { + module_class_name: "PointPillarsScatter" + } + rpn: { + module_class_name: "RPN" + layer_nums: [3, 5, 5] + layer_strides: [2, 2, 2] + num_filters: [64, 128, 256] + upsample_strides: [1, 2, 4] + num_upsample_filters: [128, 128, 128] + use_groupnorm: false + num_groups: 32 + } + loss: { + classification_loss: { + weighted_sigmoid_focal: { + alpha: 0.25 + gamma: 2.0 + anchorwise_output: true + } + } + localization_loss: { + weighted_smooth_l1: { + sigma: 3.0 + code_weight: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + } + classification_weight: 1.0 + localization_weight: 2.0 + } + # Outputs + use_sigmoid_score: true + encode_background_as_zeros: true + encode_rad_error_by_sin: true + + use_direction_classifier: true + direction_loss_weight: 0.2 + use_aux_classifier: false + # Loss + pos_class_weight: 1.0 + neg_class_weight: 1.0 + + loss_norm_type: NormByNumPositives + # Postprocess + post_center_limit_range: [-64, -64, -3, 64, 64, 1] + use_rotate_nms: false + use_multi_class_nms: false + nms_pre_max_size: 1000 + nms_post_max_size: 300 + nms_score_threshold: 0.05 + nms_iou_threshold: 0.5 + + use_bev: false + num_point_features: 3 + without_reflectivity: true + box_coder: { + ground_box3d_coder: { + linear_dim: false + encode_angle_vector: false + } + } + target_assigner: { + anchor_generators: { + anchor_generator_stride: { + sizes: [1.67, 3.63, 1.77] # wlh + strides: [0.32, 0.32, 0.0] # if generate only 1 z_center, z_stride will be ignored + offsets: [-63.84, -63.84, -1.73] # origin_offset + strides / 2 + rotations: [0, 1.57] # 0, pi/2 + matched_threshold : 0.6 + unmatched_threshold : 0.45 + } + } + + sample_positive_fraction : -1 + sample_size : 512 + region_similarity_calculator: { + nearest_iou_similarity: { + } + } + } + } +} + + +train_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_train.tfrecord" + class_names: ["VEHICLE"] + max_num_epochs : 200 + batch_size: 1 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: true + num_workers: 2 + groundtruth_localization_noise_std: [0.25, 0.25, 0.25] + groundtruth_rotation_uniform_noise: [-0.15707963267, 0.15707963267] + global_rotation_uniform_noise: [-0.78539816, 0.78539816] + global_scaling_uniform_noise: [0.95, 1.05] + global_random_rotation_range_per_object: [0, 0] + anchor_area_threshold: 1 + remove_points_after_sample: false + groundtruth_points_drop_percentage: 0.0 + groundtruth_drop_max_keep_points: 15 + database_sampler { + database_info_path: "/data/sets/kitti_second/kitti_dbinfos_train.pkl" + sample_groups { + name_to_max_num { + key: "VEHICLE" + value: 15 + } + } + database_prep_steps { + filter_by_min_num_points { + min_num_point_pairs { + key: "VEHICLE" + value: 5 + } + } + } + database_prep_steps { + filter_by_difficulty { + removed_difficulties: [-1] + } + } + global_random_rotation_range_per_object: [0, 0] + rate: 1.0 + } + + remove_unknown_examples: false + remove_environment: false + kitti_info_path: "/nfs/151/gpu/gunji/point_pillars/second/train.trch" + kitti_root_path: "/nfs/151/gpu/gunji/argodataset/argoverse-tracking/train/" +} + +train_config: { + optimizer: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate: { + initial_learning_rate: 0.0002 + decay_steps: 40605 # 1856 steps per epoch * 15 epochs + decay_factor: 0.8 + staircase: true + } + } + weight_decay: 0.0001 + } + use_moving_average: false + + } + inter_op_parallelism_threads: 4 + intra_op_parallelism_threads: 4 + steps: 433120 # 1856 steps per epoch * 160 epochs + steps_per_eval: 13535 # 1856 steps per epoch * 5 epochs + save_checkpoints_secs : 1800 # half hour + save_summary_steps : 10 + enable_mixed_precision: false + loss_scale_factor : 512.0 + clear_metrics_every_epoch: false +} + +eval_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_val.tfrecord" + class_names: ["VEHICLE"] + batch_size: 1 + max_num_epochs : 200 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: false + num_workers: 3 + anchor_area_threshold: 1 + remove_environment: false + kitti_info_path:"/nfs/151/gpu/gunji/point_pillars/second/val.trch" + kitti_root_path: "/nfs/151/gpu/gunji/argodataset/argoverse-tracking/val/" +} diff --git a/second/configs/pointpillars/car/xyres_16_argo_80.proto b/second/configs/pointpillars/car/xyres_16_argo_80.proto new file mode 100644 index 00000000..c61b1da3 --- /dev/null +++ b/second/configs/pointpillars/car/xyres_16_argo_80.proto @@ -0,0 +1,187 @@ +model: { + second: { + voxel_generator { + point_cloud_range : [-80, -80, -3, 80, 80, 1] + voxel_size : [0.16, 0.16, 4] + max_number_of_points_per_voxel : 100 + } + num_class: 1 + voxel_feature_extractor: { + module_class_name: "PillarFeatureNet" + num_filters: [64] + with_distance: false + } + middle_feature_extractor: { + module_class_name: "PointPillarsScatter" + } + rpn: { + module_class_name: "RPN" + layer_nums: [3, 5, 5] + layer_strides: [2, 2, 2] + num_filters: [64, 128, 256] + upsample_strides: [1, 2, 4] + num_upsample_filters: [128, 128, 128] + use_groupnorm: false + num_groups: 32 + } + loss: { + classification_loss: { + weighted_sigmoid_focal: { + alpha: 0.25 + gamma: 2.0 + anchorwise_output: true + } + } + localization_loss: { + weighted_smooth_l1: { + sigma: 3.0 + code_weight: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + } + classification_weight: 1.0 + localization_weight: 2.0 + } + # Outputs + use_sigmoid_score: true + encode_background_as_zeros: true + encode_rad_error_by_sin: true + + use_direction_classifier: true + direction_loss_weight: 0.2 + use_aux_classifier: false + # Loss + pos_class_weight: 1.0 + neg_class_weight: 1.0 + + loss_norm_type: NormByNumPositives + # Postprocess + post_center_limit_range: [-80, -80, -3, 80, 80, 1] + use_rotate_nms: false + use_multi_class_nms: false + nms_pre_max_size: 1000 + nms_post_max_size: 300 + nms_score_threshold: 0.05 + nms_iou_threshold: 0.5 + + use_bev: false + num_point_features: 3 + without_reflectivity: true + box_coder: { + ground_box3d_coder: { + linear_dim: false + encode_angle_vector: false + } + } + target_assigner: { + anchor_generators: { + anchor_generator_stride: { + sizes: [1.67, 3.63, 1.77] # wlh + strides: [0.32, 0.32, 0.0] # if generate only 1 z_center, z_stride will be ignored + offsets: [-79.84, -79.84, -1.73] # origin_offset + strides / 2 + rotations: [0, 1.57] # 0, pi/2 + matched_threshold : 0.6 + unmatched_threshold : 0.45 + } + } + + sample_positive_fraction : -1 + sample_size : 512 + region_similarity_calculator: { + nearest_iou_similarity: { + } + } + } + } +} + + +train_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_train.tfrecord" + class_names: ["VEHICLE"] + max_num_epochs : 200 + batch_size: 1 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: true + num_workers: 2 + groundtruth_localization_noise_std: [0.25, 0.25, 0.25] + groundtruth_rotation_uniform_noise: [-0.15707963267, 0.15707963267] + global_rotation_uniform_noise: [-0.78539816, 0.78539816] + global_scaling_uniform_noise: [0.95, 1.05] + global_random_rotation_range_per_object: [0, 0] + anchor_area_threshold: 1 + remove_points_after_sample: false + groundtruth_points_drop_percentage: 0.0 + groundtruth_drop_max_keep_points: 15 + database_sampler { + database_info_path: "/data/sets/kitti_second/kitti_dbinfos_train.pkl" + sample_groups { + name_to_max_num { + key: "VEHICLE" + value: 15 + } + } + database_prep_steps { + filter_by_min_num_points { + min_num_point_pairs { + key: "VEHICLE" + value: 5 + } + } + } + database_prep_steps { + filter_by_difficulty { + removed_difficulties: [-1] + } + } + global_random_rotation_range_per_object: [0, 0] + rate: 1.0 + } + + remove_unknown_examples: false + remove_environment: false + kitti_info_path: "/nfs/151/gpu/gunji/point_pillars/second/train.trch" + kitti_root_path: "/nfs/151/gpu/gunji/argodataset/argoverse-tracking/train/" +} + +train_config: { + optimizer: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate: { + initial_learning_rate: 0.0002 + decay_steps: 40605 # 1856 steps per epoch * 15 epochs + decay_factor: 0.8 + staircase: true + } + } + weight_decay: 0.0001 + } + use_moving_average: false + + } + inter_op_parallelism_threads: 4 + intra_op_parallelism_threads: 4 + steps: 433120 # 1856 steps per epoch * 160 epochs + steps_per_eval: 13535 # 1856 steps per epoch * 5 epochs + save_checkpoints_secs : 1800 # half hour + save_summary_steps : 10 + enable_mixed_precision: false + loss_scale_factor : 512.0 + clear_metrics_every_epoch: false +} + +eval_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_val.tfrecord" + class_names: ["VEHICLE"] + batch_size: 1 + max_num_epochs : 200 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: false + num_workers: 3 + anchor_area_threshold: 1 + remove_environment: false + kitti_info_path:"/nfs/151/gpu/gunji/point_pillars/second/val.trch" + kitti_root_path: "/nfs/151/gpu/gunji/argodataset/argoverse-tracking/val/" +} diff --git a/second/configs/pointpillars/car/xyres_16_argo_88.proto b/second/configs/pointpillars/car/xyres_16_argo_88.proto new file mode 100644 index 00000000..85bbded4 --- /dev/null +++ b/second/configs/pointpillars/car/xyres_16_argo_88.proto @@ -0,0 +1,190 @@ +model: { + second: { + voxel_generator { + point_cloud_range : [-89.6, -89.6, -3, 89.6, 89.6, 1] + voxel_size : [0.16, 0.16, 4] + max_number_of_points_per_voxel : 100 + } + num_class: 1 + voxel_feature_extractor: { + module_class_name: "PillarFeatureNet" + num_filters: [64] + with_distance: false + } + middle_feature_extractor: { + module_class_name: "PointPillarsScatter" + } + rpn: { + module_class_name: "RPN" + layer_nums: [3, 5, 5] + layer_strides: [2, 2, 2] + num_filters: [64, 128, 256] + upsample_strides: [1, 2, 4] + num_upsample_filters: [128, 128, 128] + use_groupnorm: false + num_groups: 32 + } + loss: { + classification_loss: { + weighted_sigmoid_focal: { + alpha: 0.25 + gamma: 2.0 + anchorwise_output: true + } + } + localization_loss: { + weighted_smooth_l1: { + sigma: 3.0 + code_weight: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + } + classification_weight: 1.0 + localization_weight: 2.0 + } + # Outputs + use_sigmoid_score: true + encode_background_as_zeros: true + encode_rad_error_by_sin: true + + use_direction_classifier: true + direction_loss_weight: 0.2 + use_aux_classifier: false + # Loss + pos_class_weight: 1.0 + neg_class_weight: 1.0 + + loss_norm_type: NormByNumPositives + # Postprocess + post_center_limit_range: [-89.6, -89.6, -3, 89.6, 89.6, 1] + use_rotate_nms: false + use_multi_class_nms: false + nms_pre_max_size: 1000 + nms_post_max_size: 300 + nms_score_threshold: 0.05 + nms_iou_threshold: 0.5 + + use_bev: false + num_point_features: 3 + without_reflectivity: true + box_coder: { + ground_box3d_coder: { + linear_dim: false + encode_angle_vector: false + } + } + target_assigner: { + anchor_generators: { + anchor_generator_stride: { + sizes: [1.67, 3.63, 1.77] # wlh + strides: [0.32, 0.32, 0.0] # if generate only 1 z_center, z_stride will be ignored + offsets: [-89.44, -89.44, -1.73] # origin_offset + strides / 2 + rotations: [0, 1.57] # 0, pi/2 + matched_threshold : 0.6 + unmatched_threshold : 0.45 + } + } + + sample_positive_fraction : -1 + sample_size : 512 + region_similarity_calculator: { + nearest_iou_similarity: { + } + } + } + } +} + + +train_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_train.tfrecord" + class_names: ["VEHICLE"] + max_num_epochs : 160 + batch_size: 1 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: true + num_workers: 2 + groundtruth_localization_noise_std: [0.25, 0.25, 0.25] + groundtruth_rotation_uniform_noise: [-0.15707963267, 0.15707963267] + global_rotation_uniform_noise: [-0.78539816, 0.78539816] + global_scaling_uniform_noise: [0.95, 1.05] + global_random_rotation_range_per_object: [0, 0] + anchor_area_threshold: 1 + remove_points_after_sample: false + groundtruth_points_drop_percentage: 0.0 + groundtruth_drop_max_keep_points: 15 + database_sampler { + database_info_path: "/data/sets/kitti_second/kitti_dbinfos_train.pkl" + sample_groups { + name_to_max_num { + key: "VEHICLE" + value: 15 + } + } + database_prep_steps { + filter_by_min_num_points { + min_num_point_pairs { + key: "VEHICLE" + value: 5 + } + } + } + database_prep_steps { + filter_by_difficulty { + removed_difficulties: [-1] + } + } + global_random_rotation_range_per_object: [0, 0] + rate: 1.0 + } + + remove_unknown_examples: false + remove_environment: false + kitti_info_path: "/nfs/151/gpu/gunji/point_pillars/second/train.trch" + kitti_root_path: "/nfs/151/gpu/gunji/argodataset/argoverse-tracking/train/" + +} + +train_config: { + optimizer: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate: { + initial_learning_rate: 0.0002 + decay_steps: 40605 # 1856 steps per epoch * 15 epochs + decay_factor: 0.8 + staircase: true + } + } + weight_decay: 0.0001 + } + use_moving_average: false + + } + inter_op_parallelism_threads: 4 + intra_op_parallelism_threads: 4 + steps: 433120 # steps per epoch * 160 epochs + steps_per_eval: 13535 # 1856 steps per epoch * 5 epochs + save_checkpoints_secs : 1800 # half hour + save_summary_steps : 10 + enable_mixed_precision: false + loss_scale_factor : 512.0 + clear_metrics_every_epoch: false +} + +eval_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_val.tfrecord" + class_names: ["VEHICLE"] + batch_size: 1 + max_num_epochs : 160 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: false + num_workers: 3 + anchor_area_threshold: 1 + remove_environment: false + kitti_info_path: "/nfs/151/gpu/gunji/point_pillars/second/val.trch" + kitti_root_path: "/nfs/151/gpu/gunji/argodataset/argoverse-tracking/val/" + + +} diff --git a/second/configs/pointpillars/car/xyres_16_argo_96.proto b/second/configs/pointpillars/car/xyres_16_argo_96.proto new file mode 100644 index 00000000..6e89c20c --- /dev/null +++ b/second/configs/pointpillars/car/xyres_16_argo_96.proto @@ -0,0 +1,187 @@ +model: { + second: { + voxel_generator { + point_cloud_range : [-96, -96, -3, 96, 96, 1] + voxel_size : [0.16, 0.16, 4] + max_number_of_points_per_voxel : 100 + } + num_class: 1 + voxel_feature_extractor: { + module_class_name: "PillarFeatureNet" + num_filters: [64] + with_distance: false + } + middle_feature_extractor: { + module_class_name: "PointPillarsScatter" + } + rpn: { + module_class_name: "RPN" + layer_nums: [3, 5, 5] + layer_strides: [2, 2, 2] + num_filters: [64, 128, 256] + upsample_strides: [1, 2, 4] + num_upsample_filters: [128, 128, 128] + use_groupnorm: false + num_groups: 32 + } + loss: { + classification_loss: { + weighted_sigmoid_focal: { + alpha: 0.25 + gamma: 2.0 + anchorwise_output: true + } + } + localization_loss: { + weighted_smooth_l1: { + sigma: 3.0 + code_weight: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + } + classification_weight: 1.0 + localization_weight: 2.0 + } + # Outputs + use_sigmoid_score: true + encode_background_as_zeros: true + encode_rad_error_by_sin: true + + use_direction_classifier: true + direction_loss_weight: 0.2 + use_aux_classifier: false + # Loss + pos_class_weight: 1.0 + neg_class_weight: 1.0 + + loss_norm_type: NormByNumPositives + # Postprocess + post_center_limit_range: [-96, -96, -3, 96, 96, 1] + use_rotate_nms: false + use_multi_class_nms: false + nms_pre_max_size: 1000 + nms_post_max_size: 300 + nms_score_threshold: 0.05 + nms_iou_threshold: 0.5 + + use_bev: false + num_point_features: 3 + without_reflectivity: true + box_coder: { + ground_box3d_coder: { + linear_dim: false + encode_angle_vector: false + } + } + target_assigner: { + anchor_generators: { + anchor_generator_stride: { + sizes: [1.67, 3.63, 1.77] # wlh + strides: [0.32, 0.32, 0.0] # if generate only 1 z_center, z_stride will be ignored + offsets: [-95.84, -95.84, -1.73] # origin_offset + strides / 2 + rotations: [0, 1.57] # 0, pi/2 + matched_threshold : 0.6 + unmatched_threshold : 0.45 + } + } + + sample_positive_fraction : -1 + sample_size : 512 + region_similarity_calculator: { + nearest_iou_similarity: { + } + } + } + } +} + + +train_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_train.tfrecord" + class_names: ["VEHICLE"] + max_num_epochs : 200 + batch_size: 1 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: true + num_workers: 2 + groundtruth_localization_noise_std: [0.25, 0.25, 0.25] + groundtruth_rotation_uniform_noise: [-0.15707963267, 0.15707963267] + global_rotation_uniform_noise: [-0.78539816, 0.78539816] + global_scaling_uniform_noise: [0.95, 1.05] + global_random_rotation_range_per_object: [0, 0] + anchor_area_threshold: 1 + remove_points_after_sample: false + groundtruth_points_drop_percentage: 0.0 + groundtruth_drop_max_keep_points: 15 + database_sampler { + database_info_path: "/data/sets/kitti_second/kitti_dbinfos_train.pkl" + sample_groups { + name_to_max_num { + key: "VEHICLE" + value: 15 + } + } + database_prep_steps { + filter_by_min_num_points { + min_num_point_pairs { + key: "VEHICLE" + value: 5 + } + } + } + database_prep_steps { + filter_by_difficulty { + removed_difficulties: [-1] + } + } + global_random_rotation_range_per_object: [0, 0] + rate: 1.0 + } + + remove_unknown_examples: false + remove_environment: false + kitti_info_path: "/nfs/151/gpu/gunji/point_pillars/second/train.trch" + kitti_root_path:"/nfs/151/gpu/gunji/argodataset/argoverse-tracking/train/" +} + +train_config: { + optimizer: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate: { + initial_learning_rate: 0.0002 + decay_steps: 40605 # 1856 steps per epoch * 15 epochs + decay_factor: 0.8 + staircase: true + } + } + weight_decay: 0.0001 + } + use_moving_average: false + + } + inter_op_parallelism_threads: 4 + intra_op_parallelism_threads: 4 + steps: 433120 # 1856 steps per epoch * 160 epochs + steps_per_eval: 13535 # 1856 steps per epoch * 5 epochs + save_checkpoints_secs : 1800 # half hour + save_summary_steps : 10 + enable_mixed_precision: false + loss_scale_factor : 512.0 + clear_metrics_every_epoch: false +} + +eval_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_val.tfrecord" + class_names: ["VEHICLE"] + batch_size: 1 + max_num_epochs : 200 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: false + num_workers: 3 + anchor_area_threshold: 1 + remove_environment: false + kitti_info_path: "/nfs/151/gpu/gunji/point_pillars/second/val.trch" + kitti_root_path: "/nfs/151/gpu/gunji/argodataset/argoverse-tracking/val/" +} diff --git a/second/configs/pointpillars/car/xyres_16_argo_upper.proto b/second/configs/pointpillars/car/xyres_16_argo_upper.proto new file mode 100644 index 00000000..7733f04d --- /dev/null +++ b/second/configs/pointpillars/car/xyres_16_argo_upper.proto @@ -0,0 +1,187 @@ +model: { + second: { + voxel_generator { + point_cloud_range : [-112, -112, -3, 112, 112, 1] + voxel_size : [0.16, 0.16, 4] + max_number_of_points_per_voxel : 100 + } + num_class: 1 + voxel_feature_extractor: { + module_class_name: "PillarFeatureNet" + num_filters: [64] + with_distance: false + } + middle_feature_extractor: { + module_class_name: "PointPillarsScatter" + } + rpn: { + module_class_name: "RPN" + layer_nums: [3, 5, 5] + layer_strides: [2, 2, 2] + num_filters: [64, 128, 256] + upsample_strides: [1, 2, 4] + num_upsample_filters: [128, 128, 128] + use_groupnorm: false + num_groups: 32 + } + loss: { + classification_loss: { + weighted_sigmoid_focal: { + alpha: 0.25 + gamma: 2.0 + anchorwise_output: true + } + } + localization_loss: { + weighted_smooth_l1: { + sigma: 3.0 + code_weight: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + } + classification_weight: 1.0 + localization_weight: 2.0 + } + # Outputs + use_sigmoid_score: true + encode_background_as_zeros: true + encode_rad_error_by_sin: true + + use_direction_classifier: true + direction_loss_weight: 0.2 + use_aux_classifier: false + # Loss + pos_class_weight: 1.0 + neg_class_weight: 1.0 + + loss_norm_type: NormByNumPositives + # Postprocess + post_center_limit_range: [-112, -112, -3, 112, 112, 1] + use_rotate_nms: false + use_multi_class_nms: false + nms_pre_max_size: 1000 + nms_post_max_size: 300 + nms_score_threshold: 0.05 + nms_iou_threshold: 0.5 + + use_bev: false + num_point_features: 3 + without_reflectivity: true + box_coder: { + ground_box3d_coder: { + linear_dim: false + encode_angle_vector: false + } + } + target_assigner: { + anchor_generators: { + anchor_generator_stride: { + sizes: [1.67, 3.63, 1.77] # wlh + strides: [0.32, 0.32, 0.0] # if generate only 1 z_center, z_stride will be ignored + offsets: [-111.84, -111.84, -1.73] # origin_offset + strides / 2 + rotations: [0, 1.57] # 0, pi/2 + matched_threshold : 0.6 + unmatched_threshold : 0.45 + } + } + + sample_positive_fraction : -1 + sample_size : 512 + region_similarity_calculator: { + nearest_iou_similarity: { + } + } + } + } +} + + +train_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_train.tfrecord" + class_names: ["VEHICLE"] + max_num_epochs : 200 + batch_size: 1 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: true + num_workers: 2 + groundtruth_localization_noise_std: [0.25, 0.25, 0.25] + groundtruth_rotation_uniform_noise: [-0.15707963267, 0.15707963267] + global_rotation_uniform_noise: [-0.78539816, 0.78539816] + global_scaling_uniform_noise: [0.95, 1.05] + global_random_rotation_range_per_object: [0, 0] + anchor_area_threshold: 1 + remove_points_after_sample: false + groundtruth_points_drop_percentage: 0.0 + groundtruth_drop_max_keep_points: 15 + database_sampler { + database_info_path: "/data/sets/kitti_second/kitti_dbinfos_train.pkl" + sample_groups { + name_to_max_num { + key: "VEHICLE" + value: 15 + } + } + database_prep_steps { + filter_by_min_num_points { + min_num_point_pairs { + key: "VEHICLE" + value: 5 + } + } + } + database_prep_steps { + filter_by_difficulty { + removed_difficulties: [-1] + } + } + global_random_rotation_range_per_object: [0, 0] + rate: 1.0 + } + + remove_unknown_examples: false + remove_environment: false + kitti_info_path: "/nfs/151/gpu/gunji/point_pillars/second/train.trch" + kitti_root_path:"/nfs/151/gpu/gunji/argodataset/argoverse-tracking/train/" +} + +train_config: { + optimizer: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate: { + initial_learning_rate: 0.0002 + decay_steps: 40605 # 1856 steps per epoch * 15 epochs + decay_factor: 0.8 + staircase: true + } + } + weight_decay: 0.0001 + } + use_moving_average: false + + } + inter_op_parallelism_threads: 4 + intra_op_parallelism_threads: 4 + steps: 433120 # 1856 steps per epoch * 160 epochs + steps_per_eval: 13535 # 1856 steps per epoch * 5 epochs + save_checkpoints_secs : 1800 # half hour + save_summary_steps : 10 + enable_mixed_precision: false + loss_scale_factor : 512.0 + clear_metrics_every_epoch: false +} + +eval_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_val.tfrecord" + class_names: ["VEHICLE"] + batch_size: 1 + max_num_epochs : 200 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: false + num_workers: 3 + anchor_area_threshold: 1 + remove_environment: false + kitti_info_path: "/nfs/151/gpu/gunji/point_pillars/second/val.trch" + kitti_root_path: "/nfs/151/gpu/gunji/argodataset/argoverse-tracking/val/" +} diff --git a/second/configs/pointpillars/car/xyres_20_argo_48.proto b/second/configs/pointpillars/car/xyres_20_argo_48.proto new file mode 100644 index 00000000..ed0c41d9 --- /dev/null +++ b/second/configs/pointpillars/car/xyres_20_argo_48.proto @@ -0,0 +1,187 @@ +model: { + second: { + voxel_generator { + point_cloud_range : [-48, -48, -3, 48, 48, 1] + voxel_size : [0.2, 0.2, 4] + max_number_of_points_per_voxel : 100 + } + num_class: 1 + voxel_feature_extractor: { + module_class_name: "PillarFeatureNet" + num_filters: [64] + with_distance: false + } + middle_feature_extractor: { + module_class_name: "PointPillarsScatter" + } + rpn: { + module_class_name: "RPN" + layer_nums: [3, 5, 5] + layer_strides: [2, 2, 2] + num_filters: [64, 128, 256] + upsample_strides: [1, 2, 4] + num_upsample_filters: [128, 128, 128] + use_groupnorm: false + num_groups: 32 + } + loss: { + classification_loss: { + weighted_sigmoid_focal: { + alpha: 0.25 + gamma: 2.0 + anchorwise_output: true + } + } + localization_loss: { + weighted_smooth_l1: { + sigma: 3.0 + code_weight: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + } + classification_weight: 1.0 + localization_weight: 2.0 + } + # Outputs + use_sigmoid_score: true + encode_background_as_zeros: true + encode_rad_error_by_sin: true + + use_direction_classifier: true + direction_loss_weight: 0.2 + use_aux_classifier: false + # Loss + pos_class_weight: 1.0 + neg_class_weight: 1.0 + + loss_norm_type: NormByNumPositives + # Postprocess + post_center_limit_range: [-48, -48, -3, 48, 48, 1] + use_rotate_nms: false + use_multi_class_nms: false + nms_pre_max_size: 1000 + nms_post_max_size: 300 + nms_score_threshold: 0.05 + nms_iou_threshold: 0.5 + + use_bev: false + num_point_features: 3 + without_reflectivity: true + box_coder: { + ground_box3d_coder: { + linear_dim: false + encode_angle_vector: false + } + } + target_assigner: { + anchor_generators: { + anchor_generator_stride: { + sizes: [1.67, 3.63, 1.77] # wlh + strides: [0.4, 0.4, 0.0] # if generate only 1 z_center, z_stride will be ignored + offsets: [-47.80, -47.80, -1.73] # origin_offset + strides / 2 + rotations: [0, 1.57] # 0, pi/2 + matched_threshold : 0.6 + unmatched_threshold : 0.45 + } + } + + sample_positive_fraction : -1 + sample_size : 512 + region_similarity_calculator: { + nearest_iou_similarity: { + } + } + } + } +} + + +train_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_train.tfrecord" + class_names: ["VEHICLE"] + max_num_epochs : 200 + batch_size: 1 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: true + num_workers: 2 + groundtruth_localization_noise_std: [0.25, 0.25, 0.25] + groundtruth_rotation_uniform_noise: [-0.15707963267, 0.15707963267] + global_rotation_uniform_noise: [-0.78539816, 0.78539816] + global_scaling_uniform_noise: [0.95, 1.05] + global_random_rotation_range_per_object: [0, 0] + anchor_area_threshold: 1 + remove_points_after_sample: false + groundtruth_points_drop_percentage: 0.0 + groundtruth_drop_max_keep_points: 15 + database_sampler { + database_info_path: "/data/sets/kitti_second/kitti_dbinfos_train.pkl" + sample_groups { + name_to_max_num { + key: "VEHICLE" + value: 15 + } + } + database_prep_steps { + filter_by_min_num_points { + min_num_point_pairs { + key: "VEHICLE" + value: 5 + } + } + } + database_prep_steps { + filter_by_difficulty { + removed_difficulties: [-1] + } + } + global_random_rotation_range_per_object: [0, 0] + rate: 1.0 + } + + remove_unknown_examples: false + remove_environment: false + kitti_info_path: "/nfs/151/gpu/gunji/point_pillars/second/train.trch" + kitti_root_path: "/nfs/151/gpu/gunji/argodataset/argoverse-tracking/train/" +} + +train_config: { + optimizer: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate: { + initial_learning_rate: 0.0002 + decay_steps: 40605 # 1856 steps per epoch * 15 epochs + decay_factor: 0.8 + staircase: true + } + } + weight_decay: 0.0001 + } + use_moving_average: false + + } + inter_op_parallelism_threads: 4 + intra_op_parallelism_threads: 4 + steps: 433120 # 1856 steps per epoch * 160 epochs + steps_per_eval: 13535 # 1856 steps per epoch * 5 epochs + save_checkpoints_secs : 1800 # half hour + save_summary_steps : 10 + enable_mixed_precision: false + loss_scale_factor : 512.0 + clear_metrics_every_epoch: false +} + +eval_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_val.tfrecord" + class_names: ["VEHICLE"] + batch_size: 1 + max_num_epochs : 200 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: false + num_workers: 3 + anchor_area_threshold: 1 + remove_environment: false + kitti_info_path: "/nfs/151/gpu/gunji/point_pillars/second/val.trch" + kitti_root_path: "/nfs/151/gpu/gunji/argodataset/argoverse-tracking/val/" +} diff --git a/second/configs/pointpillars/car/xyres_20_argo_upper.proto b/second/configs/pointpillars/car/xyres_20_argo_upper.proto new file mode 100644 index 00000000..c4daf813 --- /dev/null +++ b/second/configs/pointpillars/car/xyres_20_argo_upper.proto @@ -0,0 +1,187 @@ +model: { + second: { + voxel_generator { + point_cloud_range : [-112, -112, -3, 112, 112, 1] + voxel_size : [0.2, 0.2, 4] + max_number_of_points_per_voxel : 100 + } + num_class: 1 + voxel_feature_extractor: { + module_class_name: "PillarFeatureNet" + num_filters: [64] + with_distance: false + } + middle_feature_extractor: { + module_class_name: "PointPillarsScatter" + } + rpn: { + module_class_name: "RPN" + layer_nums: [3, 5, 5] + layer_strides: [2, 2, 2] + num_filters: [64, 128, 256] + upsample_strides: [1, 2, 4] + num_upsample_filters: [128, 128, 128] + use_groupnorm: false + num_groups: 32 + } + loss: { + classification_loss: { + weighted_sigmoid_focal: { + alpha: 0.25 + gamma: 2.0 + anchorwise_output: true + } + } + localization_loss: { + weighted_smooth_l1: { + sigma: 3.0 + code_weight: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + } + classification_weight: 1.0 + localization_weight: 2.0 + } + # Outputs + use_sigmoid_score: true + encode_background_as_zeros: true + encode_rad_error_by_sin: true + + use_direction_classifier: true + direction_loss_weight: 0.2 + use_aux_classifier: false + # Loss + pos_class_weight: 1.0 + neg_class_weight: 1.0 + + loss_norm_type: NormByNumPositives + # Postprocess + post_center_limit_range: [-112, -112, -3, 112, 112, 1] + use_rotate_nms: false + use_multi_class_nms: false + nms_pre_max_size: 1000 + nms_post_max_size: 300 + nms_score_threshold: 0.05 + nms_iou_threshold: 0.5 + + use_bev: false + num_point_features: 3 + without_reflectivity: true + box_coder: { + ground_box3d_coder: { + linear_dim: false + encode_angle_vector: false + } + } + target_assigner: { + anchor_generators: { + anchor_generator_stride: { + sizes: [1.67, 3.63, 1.77] # wlh + strides: [0.4, 0.4, 0.0] # if generate only 1 z_center, z_stride will be ignored + offsets: [-111.80, -111.80, -1.73] # origin_offset + strides / 2 + rotations: [0, 1.57] # 0, pi/2 + matched_threshold : 0.6 + unmatched_threshold : 0.45 + } + } + + sample_positive_fraction : -1 + sample_size : 512 + region_similarity_calculator: { + nearest_iou_similarity: { + } + } + } + } +} + + +train_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_train.tfrecord" + class_names: ["VEHICLE"] + max_num_epochs : 200 + batch_size: 1 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: true + num_workers: 2 + groundtruth_localization_noise_std: [0.25, 0.25, 0.25] + groundtruth_rotation_uniform_noise: [-0.15707963267, 0.15707963267] + global_rotation_uniform_noise: [-0.78539816, 0.78539816] + global_scaling_uniform_noise: [0.95, 1.05] + global_random_rotation_range_per_object: [0, 0] + anchor_area_threshold: 1 + remove_points_after_sample: false + groundtruth_points_drop_percentage: 0.0 + groundtruth_drop_max_keep_points: 15 + database_sampler { + database_info_path: "/data/sets/kitti_second/kitti_dbinfos_train.pkl" + sample_groups { + name_to_max_num { + key: "VEHICLE" + value: 15 + } + } + database_prep_steps { + filter_by_min_num_points { + min_num_point_pairs { + key: "VEHICLE" + value: 5 + } + } + } + database_prep_steps { + filter_by_difficulty { + removed_difficulties: [-1] + } + } + global_random_rotation_range_per_object: [0, 0] + rate: 1.0 + } + + remove_unknown_examples: false + remove_environment: false + kitti_info_path: "/nfs/151/gpu/gunji/point_pillars/second/train.trch" + kitti_root_path: "/nfs/151/gpu/gunji/argodataset/argoverse-tracking/train/" +} + +train_config: { + optimizer: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate: { + initial_learning_rate: 0.0002 + decay_steps: 40605 # 1856 steps per epoch * 15 epochs + decay_factor: 0.8 + staircase: true + } + } + weight_decay: 0.0001 + } + use_moving_average: false + + } + inter_op_parallelism_threads: 4 + intra_op_parallelism_threads: 4 + steps: 433120 # 1856 steps per epoch * 160 epochs + steps_per_eval: 13535 # 1856 steps per epoch * 5 epochs + save_checkpoints_secs : 1800 # half hour + save_summary_steps : 10 + enable_mixed_precision: false + loss_scale_factor : 512.0 + clear_metrics_every_epoch: false +} + +eval_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_val.tfrecord" + class_names: ["VEHICLE"] + batch_size: 1 + max_num_epochs : 200 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: false + num_workers: 3 + anchor_area_threshold: 1 + remove_environment: false + kitti_info_path: "/nfs/151/gpu/gunji/point_pillars/second/val.trch" + kitti_root_path: "/nfs/151/gpu/gunji/argodataset/argoverse-tracking/val/" +} diff --git a/second/configs/pointpillars/car/xyres_24_argo_48.proto b/second/configs/pointpillars/car/xyres_24_argo_48.proto new file mode 100644 index 00000000..6da69357 --- /dev/null +++ b/second/configs/pointpillars/car/xyres_24_argo_48.proto @@ -0,0 +1,187 @@ +model: { + second: { + voxel_generator { + point_cloud_range : [-48, -48, -3, 48, 48, 1] + voxel_size : [0.24, 0.24, 4] + max_number_of_points_per_voxel : 100 + } + num_class: 1 + voxel_feature_extractor: { + module_class_name: "PillarFeatureNet" + num_filters: [64] + with_distance: false + } + middle_feature_extractor: { + module_class_name: "PointPillarsScatter" + } + rpn: { + module_class_name: "RPN" + layer_nums: [3, 5, 5] + layer_strides: [2, 2, 2] + num_filters: [64, 128, 256] + upsample_strides: [1, 2, 4] + num_upsample_filters: [128, 128, 128] + use_groupnorm: false + num_groups: 32 + } + loss: { + classification_loss: { + weighted_sigmoid_focal: { + alpha: 0.25 + gamma: 2.0 + anchorwise_output: true + } + } + localization_loss: { + weighted_smooth_l1: { + sigma: 3.0 + code_weight: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + } + classification_weight: 1.0 + localization_weight: 2.0 + } + # Outputs + use_sigmoid_score: true + encode_background_as_zeros: true + encode_rad_error_by_sin: true + + use_direction_classifier: true + direction_loss_weight: 0.2 + use_aux_classifier: false + # Loss + pos_class_weight: 1.0 + neg_class_weight: 1.0 + + loss_norm_type: NormByNumPositives + # Postprocess + post_center_limit_range: [-48, -48, -3, 48, 48, 1] + use_rotate_nms: false + use_multi_class_nms: false + nms_pre_max_size: 1000 + nms_post_max_size: 300 + nms_score_threshold: 0.05 + nms_iou_threshold: 0.5 + + use_bev: false + num_point_features: 3 + without_reflectivity: true + box_coder: { + ground_box3d_coder: { + linear_dim: false + encode_angle_vector: false + } + } + target_assigner: { + anchor_generators: { + anchor_generator_stride: { + sizes: [1.67, 3.63, 1.77] # wlh + strides: [0.48, 0.48, 0.0] # if generate only 1 z_center, z_stride will be ignored + offsets: [-47.76, -47.76, -1.73] # origin_offset + strides / 2 + rotations: [0, 1.57] # 0, pi/2 + matched_threshold : 0.6 + unmatched_threshold : 0.45 + } + } + + sample_positive_fraction : -1 + sample_size : 512 + region_similarity_calculator: { + nearest_iou_similarity: { + } + } + } + } +} + + +train_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_train.tfrecord" + class_names: ["VEHICLE"] + max_num_epochs : 200 + batch_size: 1 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: true + num_workers: 2 + groundtruth_localization_noise_std: [0.25, 0.25, 0.25] + groundtruth_rotation_uniform_noise: [-0.15707963267, 0.15707963267] + global_rotation_uniform_noise: [-0.78539816, 0.78539816] + global_scaling_uniform_noise: [0.95, 1.05] + global_random_rotation_range_per_object: [0, 0] + anchor_area_threshold: 1 + remove_points_after_sample: false + groundtruth_points_drop_percentage: 0.0 + groundtruth_drop_max_keep_points: 15 + database_sampler { + database_info_path: "/data/sets/kitti_second/kitti_dbinfos_train.pkl" + sample_groups { + name_to_max_num { + key: "VEHICLE" + value: 15 + } + } + database_prep_steps { + filter_by_min_num_points { + min_num_point_pairs { + key: "VEHICLE" + value: 5 + } + } + } + database_prep_steps { + filter_by_difficulty { + removed_difficulties: [-1] + } + } + global_random_rotation_range_per_object: [0, 0] + rate: 1.0 + } + + remove_unknown_examples: false + remove_environment: false + kitti_info_path: "/nfs/151/gpu/gunji/point_pillars/second/train.trch" + kitti_root_path: "/nfs/151/gpu/gunji/argodataset/argoverse-tracking/train/" +} + +train_config: { + optimizer: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate: { + initial_learning_rate: 0.0002 + decay_steps: 40605 # 1856 steps per epoch * 15 epochs + decay_factor: 0.8 + staircase: true + } + } + weight_decay: 0.0001 + } + use_moving_average: false + + } + inter_op_parallelism_threads: 4 + intra_op_parallelism_threads: 4 + steps: 433120 # 1856 steps per epoch * 160 epochs + steps_per_eval: 13535 # 1856 steps per epoch * 5 epochs + save_checkpoints_secs : 1800 # half hour + save_summary_steps : 10 + enable_mixed_precision: false + loss_scale_factor : 512.0 + clear_metrics_every_epoch: false +} + +eval_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_val.tfrecord" + class_names: ["VEHICLE"] + batch_size: 1 + max_num_epochs : 200 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: false + num_workers: 3 + anchor_area_threshold: 1 + remove_environment: false + kitti_info_path: "/nfs/151/gpu/gunji/point_pillars/second/val.trch" + kitti_root_path: "/nfs/151/gpu/gunji/argodataset/argoverse-tracking/val/" +} diff --git a/second/configs/pointpillars/car/xyres_28_argo_48.proto b/second/configs/pointpillars/car/xyres_28_argo_48.proto new file mode 100644 index 00000000..6b76364d --- /dev/null +++ b/second/configs/pointpillars/car/xyres_28_argo_48.proto @@ -0,0 +1,187 @@ +model: { + second: { + voxel_generator { + point_cloud_range : [-48.16, -48.16, -3, 48.16, 48.16, 1] + voxel_size : [0.28, 0.28, 4] + max_number_of_points_per_voxel : 200 + } + num_class: 1 + voxel_feature_extractor: { + module_class_name: "PillarFeatureNet" + num_filters: [64] + with_distance: false + } + middle_feature_extractor: { + module_class_name: "PointPillarsScatter" + } + rpn: { + module_class_name: "RPN" + layer_nums: [3, 5, 5] + layer_strides: [2, 2, 2] + num_filters: [64, 128, 256] + upsample_strides: [1, 2, 4] + num_upsample_filters: [128, 128, 128] + use_groupnorm: false + num_groups: 32 + } + loss: { + classification_loss: { + weighted_sigmoid_focal: { + alpha: 0.25 + gamma: 2.0 + anchorwise_output: true + } + } + localization_loss: { + weighted_smooth_l1: { + sigma: 3.0 + code_weight: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + } + classification_weight: 1.0 + localization_weight: 2.0 + } + # Outputs + use_sigmoid_score: true + encode_background_as_zeros: true + encode_rad_error_by_sin: true + + use_direction_classifier: true + direction_loss_weight: 0.2 + use_aux_classifier: false + # Loss + pos_class_weight: 1.0 + neg_class_weight: 1.0 + + loss_norm_type: NormByNumPositives + # Postprocess + post_center_limit_range: [-48.16, -48.16, -3, 48.16, 48.16, 1] + use_rotate_nms: false + use_multi_class_nms: false + nms_pre_max_size: 1000 + nms_post_max_size: 300 + nms_score_threshold: 0.05 + nms_iou_threshold: 0.5 + + use_bev: false + num_point_features: 3 + without_reflectivity: true + box_coder: { + ground_box3d_coder: { + linear_dim: false + encode_angle_vector: false + } + } + target_assigner: { + anchor_generators: { + anchor_generator_stride: { + sizes: [1.67, 3.63, 1.77] # wlh + strides: [0.56, 0.56, 0.0] # if generate only 1 z_center, z_stride will be ignored + offsets: [-47.88, -47.88, -1.73] # origin_offset + strides / 2 + rotations: [0, 1.57] # 0, pi/2 + matched_threshold : 0.6 + unmatched_threshold : 0.45 + } + } + + sample_positive_fraction : -1 + sample_size : 512 + region_similarity_calculator: { + nearest_iou_similarity: { + } + } + } + } +} + + +train_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_train.tfrecord" + class_names: ["VEHICLE"] + max_num_epochs : 200 + batch_size: 1 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: true + num_workers: 2 + groundtruth_localization_noise_std: [0.25, 0.25, 0.25] + groundtruth_rotation_uniform_noise: [-0.15707963267, 0.15707963267] + global_rotation_uniform_noise: [-0.78539816, 0.78539816] + global_scaling_uniform_noise: [0.95, 1.05] + global_random_rotation_range_per_object: [0, 0] + anchor_area_threshold: 1 + remove_points_after_sample: false + groundtruth_points_drop_percentage: 0.0 + groundtruth_drop_max_keep_points: 15 + database_sampler { + database_info_path: "/data/sets/kitti_second/kitti_dbinfos_train.pkl" + sample_groups { + name_to_max_num { + key: "VEHICLE" + value: 15 + } + } + database_prep_steps { + filter_by_min_num_points { + min_num_point_pairs { + key: "VEHICLE" + value: 5 + } + } + } + database_prep_steps { + filter_by_difficulty { + removed_difficulties: [-1] + } + } + global_random_rotation_range_per_object: [0, 0] + rate: 1.0 + } + + remove_unknown_examples: false + remove_environment: false + kitti_info_path: "/nfs/151/gpu/gunji/point_pillars/second/train.trch" + kitti_root_path: "/nfs/151/gpu/gunji/argodataset/argoverse-tracking/train/" +} + +train_config: { + optimizer: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate: { + initial_learning_rate: 0.0002 + decay_steps: 40605 # 1856 steps per epoch * 15 epochs + decay_factor: 0.8 + staircase: true + } + } + weight_decay: 0.0001 + } + use_moving_average: false + + } + inter_op_parallelism_threads: 4 + intra_op_parallelism_threads: 4 + steps: 433120 # 1856 steps per epoch * 160 epochs + steps_per_eval: 13535 # 1856 steps per epoch * 5 epochs + save_checkpoints_secs : 1800 # half hour + save_summary_steps : 10 + enable_mixed_precision: false + loss_scale_factor : 512.0 + clear_metrics_every_epoch: false +} + +eval_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_val.tfrecord" + class_names: ["VEHICLE"] + batch_size: 1 + max_num_epochs : 200 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: false + num_workers: 3 + anchor_area_threshold: 1 + remove_environment: false + kitti_info_path: "/nfs/151/gpu/gunji/point_pillars/second/val.trch" + kitti_root_path: "/nfs/151/gpu/gunji/argodataset/argoverse-tracking/val/" +} diff --git a/second/configs/pointpillars/pedestrian/xyres_16_argo_56.proto b/second/configs/pointpillars/pedestrian/xyres_16_argo_56.proto new file mode 100644 index 00000000..a778a0a6 --- /dev/null +++ b/second/configs/pointpillars/pedestrian/xyres_16_argo_56.proto @@ -0,0 +1,190 @@ +model: { + second: { + voxel_generator { + point_cloud_range : [-56, -56, -2.5, 56,56, 0.5] + voxel_size : [0.16, 0.16, 3] + max_number_of_points_per_voxel : 100 + } + num_class: 1 + voxel_feature_extractor: { + module_class_name: "PillarFeatureNet" + num_filters: [64] + with_distance: false + } + middle_feature_extractor: { + module_class_name: "PointPillarsScatter" + } + rpn: { + module_class_name: "RPN" + layer_nums: [3, 5, 5] + layer_strides: [1, 2, 2] + num_filters: [64, 128, 256] + upsample_strides: [1, 2, 4] + num_upsample_filters: [128, 128, 128] + use_groupnorm: false + num_groups: 32 + } + loss: { + classification_loss: { + weighted_sigmoid_focal: { + alpha: 0.25 + gamma: 2.0 + anchorwise_output: true + } + } + localization_loss: { + weighted_smooth_l1: { + sigma: 3.0 + code_weight: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + } + classification_weight: 1.0 + localization_weight: 2.0 + } + # Outputs + use_sigmoid_score: true + encode_background_as_zeros: true + encode_rad_error_by_sin: true + + use_direction_classifier: true + direction_loss_weight: 0.2 + use_aux_classifier: false + # Loss + pos_class_weight: 1.0 + neg_class_weight: 1.0 + + loss_norm_type: NormByNumPositives + # Postprocess + post_center_limit_range: [-56, -56, -2.5, 56, 56, 0.5] + use_rotate_nms: false + use_multi_class_nms: false + nms_pre_max_size: 1000 + nms_post_max_size: 300 + nms_score_threshold: 0.05 + nms_iou_threshold: 0.5 + + use_bev: false + num_point_features: 3 + without_reflectivity: true + box_coder: { + ground_box3d_coder: { + linear_dim: false + encode_angle_vector: false + } + } + target_assigner: { + + anchor_generators: { + anchor_generator_stride: { + sizes: [0.71, 0.71, 1.81] # wlh + strides: [0.16, 0.16, 0.0] # if generate only 1 z_center, z_stride will be ignored + offsets: [-55.92, -55.92, -1.465] # origin_offset + strides / 2 + rotations: [0, 1.57] # 0, pi/2 + matched_threshold : 0.5 + unmatched_threshold : 0.35 + } + } + + sample_positive_fraction : -1 + sample_size : 512 + region_similarity_calculator: { + nearest_iou_similarity: { + } + } + } + } +} + + +train_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_train.tfrecord" + class_names: ["PEDESTRIAN"] + max_num_epochs : 160 + batch_size: 1 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: true + num_workers: 2 + groundtruth_localization_noise_std: [0.25, 0.25, 0.25] + groundtruth_rotation_uniform_noise: [-0.15707963267, 0.15707963267] + global_rotation_uniform_noise: [-0.78539816, 0.78539816] + global_scaling_uniform_noise: [0.95, 1.05] + global_random_rotation_range_per_object: [0, 0] + anchor_area_threshold: 1 + remove_points_after_sample: false + groundtruth_points_drop_percentage: 0.0 + groundtruth_drop_max_keep_points: 15 + database_sampler { + database_info_path: "/data/sets/kitti_second/kitti_dbinfos_train.pkl" + sample_groups { + name_to_max_num { + key: "PEDESTRIAN" + value: 8 + } + } + database_prep_steps { + filter_by_min_num_points { + min_num_point_pairs { + key: "PEDESTRIAN" + value: 5 + } + } + } + database_prep_steps { + filter_by_difficulty { + removed_difficulties: [-1] + } + } + global_random_rotation_range_per_object: [0, 0] + rate: 1.0 + } + + remove_unknown_examples: false + remove_environment: false + kitti_info_path: "/nfs/151/gpu/gunji/point_pillars/second/ped_train.trch" + kitti_root_path: "/nfs/151/gpu/gunji/argodataset/argoverse-tracking/train/" + +} + +train_config: { + optimizer: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate: { + initial_learning_rate: 0.0002 + decay_steps: 30570 # 1856 steps per epoch * 15 epochs + decay_factor: 0.8 + staircase: true + } + } + weight_decay: 0.0001 + } + use_moving_average: false + + } + inter_op_parallelism_threads: 4 + intra_op_parallelism_threads: 4 + steps: 326080 # 1856 steps per epoch * 160 epochs + steps_per_eval: 10190 # 1856 steps per epoch * 5 epochs + save_checkpoints_secs : 1800 # half hour + save_summary_steps : 10 + enable_mixed_precision: false + loss_scale_factor : 512.0 + clear_metrics_every_epoch: false +} + +eval_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_val.tfrecord" + class_names: [ "PEDESTRIAN"] + batch_size: 1 + max_num_epochs : 200 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: false + num_workers: 3 + anchor_area_threshold: 1 + remove_environment: false + kitti_info_path: "/nfs/151/gpu/gunji/point_pillars/second/ped_val.trch" + kitti_root_path: "/nfs/151/gpu/gunji/argodataset/argoverse-tracking/val/" + +} diff --git a/second/configs/pointpillars/pedestrian/xyres_16_argo_64.proto b/second/configs/pointpillars/pedestrian/xyres_16_argo_64.proto new file mode 100644 index 00000000..4ccd5374 --- /dev/null +++ b/second/configs/pointpillars/pedestrian/xyres_16_argo_64.proto @@ -0,0 +1,190 @@ +model: { + second: { + voxel_generator { + point_cloud_range : [-48, -48, -2.5, 48,48, 0.5] + voxel_size : [0.16, 0.16, 3] + max_number_of_points_per_voxel : 100 + } + num_class: 1 + voxel_feature_extractor: { + module_class_name: "PillarFeatureNet" + num_filters: [64] + with_distance: false + } + middle_feature_extractor: { + module_class_name: "PointPillarsScatter" + } + rpn: { + module_class_name: "RPN" + layer_nums: [3, 5, 5] + layer_strides: [1, 2, 2] + num_filters: [64, 128, 256] + upsample_strides: [1, 2, 4] + num_upsample_filters: [128, 128, 128] + use_groupnorm: false + num_groups: 32 + } + loss: { + classification_loss: { + weighted_sigmoid_focal: { + alpha: 0.25 + gamma: 2.0 + anchorwise_output: true + } + } + localization_loss: { + weighted_smooth_l1: { + sigma: 3.0 + code_weight: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + } + } + classification_weight: 1.0 + localization_weight: 2.0 + } + # Outputs + use_sigmoid_score: true + encode_background_as_zeros: true + encode_rad_error_by_sin: true + + use_direction_classifier: true + direction_loss_weight: 0.2 + use_aux_classifier: false + # Loss + pos_class_weight: 1.0 + neg_class_weight: 1.0 + + loss_norm_type: NormByNumPositives + # Postprocess + post_center_limit_range: [-48, -48, -2.5, 48, 48, 0.5] + use_rotate_nms: false + use_multi_class_nms: false + nms_pre_max_size: 1000 + nms_post_max_size: 300 + nms_score_threshold: 0.05 + nms_iou_threshold: 0.5 + + use_bev: false + num_point_features: 3 + without_reflectivity: true + box_coder: { + ground_box3d_coder: { + linear_dim: false + encode_angle_vector: false + } + } + target_assigner: { + + anchor_generators: { + anchor_generator_stride: { + sizes: [0.71, 0.71, 1.81] # wlh + strides: [0.16, 0.16, 0.0] # if generate only 1 z_center, z_stride will be ignored + offsets: [-47.92, -47.92, -1.465] # origin_offset + strides / 2 + rotations: [0, 1.57] # 0, pi/2 + matched_threshold : 0.5 + unmatched_threshold : 0.35 + } + } + + sample_positive_fraction : -1 + sample_size : 512 + region_similarity_calculator: { + nearest_iou_similarity: { + } + } + } + } +} + + +train_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_train.tfrecord" + class_names: ["PEDESTRIAN"] + max_num_epochs : 160 + batch_size: 1 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: true + num_workers: 2 + groundtruth_localization_noise_std: [0.25, 0.25, 0.25] + groundtruth_rotation_uniform_noise: [-0.15707963267, 0.15707963267] + global_rotation_uniform_noise: [-0.78539816, 0.78539816] + global_scaling_uniform_noise: [0.95, 1.05] + global_random_rotation_range_per_object: [0, 0] + anchor_area_threshold: 1 + remove_points_after_sample: false + groundtruth_points_drop_percentage: 0.0 + groundtruth_drop_max_keep_points: 15 + database_sampler { + database_info_path: "/data/sets/kitti_second/kitti_dbinfos_train.pkl" + sample_groups { + name_to_max_num { + key: "PEDESTRIAN" + value: 8 + } + } + database_prep_steps { + filter_by_min_num_points { + min_num_point_pairs { + key: "PEDESTRIAN" + value: 5 + } + } + } + database_prep_steps { + filter_by_difficulty { + removed_difficulties: [-1] + } + } + global_random_rotation_range_per_object: [0, 0] + rate: 1.0 + } + + remove_unknown_examples: false + remove_environment: false + kitti_info_path: "/nfs/151/gpu/gunji/point_pillars/second/ped_train.trch" + kitti_root_path: "/nfs/151/gpu/gunji/argodataset/argoverse-tracking/train/" + +} + +train_config: { + optimizer: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate: { + initial_learning_rate: 0.0002 + decay_steps: 30570 # 1856 steps per epoch * 15 epochs + decay_factor: 0.8 + staircase: true + } + } + weight_decay: 0.0001 + } + use_moving_average: false + + } + inter_op_parallelism_threads: 4 + intra_op_parallelism_threads: 4 + steps: 326080 # 1856 steps per epoch * 160 epochs + steps_per_eval: 10190 # 1856 steps per epoch * 5 epochs + save_checkpoints_secs : 1800 # half hour + save_summary_steps : 10 + enable_mixed_precision: false + loss_scale_factor : 512.0 + clear_metrics_every_epoch: false +} + +eval_input_reader: { + record_file_path: "/data/sets/kitti_second/kitti_val.tfrecord" + class_names: [ "PEDESTRIAN"] + batch_size: 1 + max_num_epochs : 200 + prefetch_size : 25 + max_number_of_voxels: 12000 + shuffle_points: false + num_workers: 3 + anchor_area_threshold: 1 + remove_environment: false + kitti_info_path: "/nfs/151/gpu/gunji/point_pillars/second/ped_val.trch" + kitti_root_path: "/nfs/151/gpu/gunji/argodataset/argoverse-tracking/val/" + +} diff --git a/second/core/non_max_suppression/nms_gpu.py b/second/core/non_max_suppression/nms_gpu.py index b9600e59..7b574fcb 100644 --- a/second/core/non_max_suppression/nms_gpu.py +++ b/second/core/non_max_suppression/nms_gpu.py @@ -1,653 +1,655 @@ -import math -from pathlib import Path - -import numba -import numpy as np -from numba import cuda - -from second.utils.buildtools.pybind11_build import load_pb11 - -try: - from second.core.non_max_suppression.nms import non_max_suppression -except: - current_dir = Path(__file__).resolve().parents[0] - load_pb11( - ["../cc/nms/nms_kernel.cu.cc", "../cc/nms/nms.cc"], - current_dir / "nms.so", - current_dir, - cuda=True) - from second.core.non_max_suppression.nms import non_max_suppression - - -@cuda.jit('(float32[:], float32[:])', device=True, inline=True) -def iou_device(a, b): - left = max(a[0], b[0]) - right = min(a[2], b[2]) - top = max(a[1], b[1]) - bottom = min(a[3], b[3]) - width = max(right - left + 1, 0.) - height = max(bottom - top + 1, 0.) - interS = width * height - Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1) - Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1) - return interS / (Sa + Sb - interS) - - -@cuda.jit('(int64, float32, float32[:, :], uint64[:])') -def nms_kernel_v2(n_boxes, nms_overlap_thresh, dev_boxes, dev_mask): - threadsPerBlock = 8 * 8 - row_start = cuda.blockIdx.y - col_start = cuda.blockIdx.x - tx = cuda.threadIdx.x - row_size = min(n_boxes - row_start * threadsPerBlock, threadsPerBlock) - col_size = min(n_boxes - col_start * threadsPerBlock, threadsPerBlock) - block_boxes = cuda.shared.array( - shape=(threadsPerBlock, 5), dtype=numba.float32) - dev_box_idx = threadsPerBlock * col_start + tx - if (tx < col_size): - block_boxes[tx, 0] = dev_boxes[dev_box_idx, 0] - block_boxes[tx, 1] = dev_boxes[dev_box_idx, 1] - block_boxes[tx, 2] = dev_boxes[dev_box_idx, 2] - block_boxes[tx, 3] = dev_boxes[dev_box_idx, 3] - block_boxes[tx, 4] = dev_boxes[dev_box_idx, 4] - cuda.syncthreads() - if (cuda.threadIdx.x < row_size): - cur_box_idx = threadsPerBlock * row_start + cuda.threadIdx.x - # cur_box = dev_boxes + cur_box_idx * 5; - i = 0 - t = 0 - start = 0 - if (row_start == col_start): - start = tx + 1 - for i in range(start, col_size): - if (iou_device(dev_boxes[cur_box_idx], block_boxes[i]) > - nms_overlap_thresh): - t |= 1 << i - col_blocks = ((n_boxes) // (threadsPerBlock) + ( - (n_boxes) % (threadsPerBlock) > 0)) - dev_mask[cur_box_idx * col_blocks + col_start] = t - - -@cuda.jit('(int64, float32, float32[:], uint64[:])') -def nms_kernel(n_boxes, nms_overlap_thresh, dev_boxes, dev_mask): - threadsPerBlock = 8 * 8 - row_start = cuda.blockIdx.y - col_start = cuda.blockIdx.x - tx = cuda.threadIdx.x - row_size = min(n_boxes - row_start * threadsPerBlock, threadsPerBlock) - col_size = min(n_boxes - col_start * threadsPerBlock, threadsPerBlock) - block_boxes = cuda.shared.array(shape=(64 * 5, ), dtype=numba.float32) - dev_box_idx = threadsPerBlock * col_start + tx - if (tx < col_size): - block_boxes[tx * 5 + 0] = dev_boxes[dev_box_idx * 5 + 0] - block_boxes[tx * 5 + 1] = dev_boxes[dev_box_idx * 5 + 1] - block_boxes[tx * 5 + 2] = dev_boxes[dev_box_idx * 5 + 2] - block_boxes[tx * 5 + 3] = dev_boxes[dev_box_idx * 5 + 3] - block_boxes[tx * 5 + 4] = dev_boxes[dev_box_idx * 5 + 4] - cuda.syncthreads() - if (tx < row_size): - cur_box_idx = threadsPerBlock * row_start + tx - # cur_box = dev_boxes + cur_box_idx * 5; - t = 0 - start = 0 - if (row_start == col_start): - start = tx + 1 - for i in range(start, col_size): - iou = iou_device(dev_boxes[cur_box_idx * 5:cur_box_idx * 5 + 4], - block_boxes[i * 5:i * 5 + 4]) - if (iou > nms_overlap_thresh): - t |= 1 << i - col_blocks = ((n_boxes) // (threadsPerBlock) + ( - (n_boxes) % (threadsPerBlock) > 0)) - dev_mask[cur_box_idx * col_blocks + col_start] = t - - -@numba.jit(nopython=True) -def div_up(m, n): - return m // n + (m % n > 0) - - -@numba.jit(nopython=True) -def nms_postprocess(keep_out, mask_host, boxes_num): - threadsPerBlock = 8 * 8 - col_blocks = div_up(boxes_num, threadsPerBlock) - remv = np.zeros((col_blocks), dtype=np.uint64) - num_to_keep = 0 - for i in range(boxes_num): - nblock = i // threadsPerBlock - inblock = i % threadsPerBlock - mask = np.array(1 << inblock, dtype=np.uint64) - if not (remv[nblock] & mask): - keep_out[num_to_keep] = i - num_to_keep += 1 - # unsigned long long *p = &mask_host[0] + i * col_blocks; - for j in range(nblock, col_blocks): - remv[j] |= mask_host[i * col_blocks + j] - # remv[j] |= p[j]; - return num_to_keep - - -def nms_gpu(dets, nms_overlap_thresh, device_id=0): - """nms in gpu. - - Args: - dets ([type]): [description] - nms_overlap_thresh ([type]): [description] - device_id ([type], optional): Defaults to 0. [description] - - Returns: - [type]: [description] - """ - - boxes_num = dets.shape[0] - keep_out = np.zeros([boxes_num], dtype=np.int32) - scores = dets[:, 4] - order = scores.argsort()[::-1].astype(np.int32) - boxes_host = dets[order, :] - - threadsPerBlock = 8 * 8 - col_blocks = div_up(boxes_num, threadsPerBlock) - cuda.select_device(device_id) - mask_host = np.zeros((boxes_num * col_blocks, ), dtype=np.uint64) - blockspergrid = (div_up(boxes_num, threadsPerBlock), - div_up(boxes_num, threadsPerBlock)) - stream = cuda.stream() - with stream.auto_synchronize(): - boxes_dev = cuda.to_device(boxes_host.reshape([-1]), stream) - mask_dev = cuda.to_device(mask_host, stream) - nms_kernel[blockspergrid, threadsPerBlock, stream]( - boxes_num, nms_overlap_thresh, boxes_dev, mask_dev) - mask_dev.copy_to_host(mask_host, stream=stream) - # stream.synchronize() - num_out = nms_postprocess(keep_out, mask_host, boxes_num) - keep = keep_out[:num_out] - return list(order[keep]) - - -def nms_gpu_cc(dets, nms_overlap_thresh, device_id=0): - boxes_num = dets.shape[0] - keep = np.zeros(boxes_num, dtype=np.int32) - scores = dets[:, 4] - order = scores.argsort()[::-1].astype(np.int32) - sorted_dets = dets[order, :] - num_out = non_max_suppression(sorted_dets, keep, nms_overlap_thresh, - device_id) - keep = keep[:num_out] - return list(order[keep]) - - -@cuda.jit('(float32[:], float32[:], float32[:])', device=True, inline=True) -def trangle_area(a, b, c): - return ( - (a[0] - c[0]) * (b[1] - c[1]) - (a[1] - c[1]) * (b[0] - c[0])) / 2.0 - - -@cuda.jit('(float32[:], int32)', device=True, inline=True) -def area(int_pts, num_of_inter): - area_val = 0.0 - for i in range(num_of_inter - 2): - area_val += abs( - trangle_area(int_pts[:2], int_pts[2 * i + 2:2 * i + 4], - int_pts[2 * i + 4:2 * i + 6])) - return area_val - - -@cuda.jit('(float32[:], int32)', device=True, inline=True) -def sort_vertex_in_convex_polygon(int_pts, num_of_inter): - if num_of_inter > 0: - center = cuda.local.array((2, ), dtype=numba.float32) - center[:] = 0.0 - for i in range(num_of_inter): - center[0] += int_pts[2 * i] - center[1] += int_pts[2 * i + 1] - center[0] /= num_of_inter - center[1] /= num_of_inter - v = cuda.local.array((2, ), dtype=numba.float32) - vs = cuda.local.array((16, ), dtype=numba.float32) - for i in range(num_of_inter): - v[0] = int_pts[2 * i] - center[0] - v[1] = int_pts[2 * i + 1] - center[1] - d = math.sqrt(v[0] * v[0] + v[1] * v[1]) - v[0] = v[0] / d - v[1] = v[1] / d - if v[1] < 0: - v[0] = -2 - v[0] - vs[i] = v[0] - j = 0 - temp = 0 - for i in range(1, num_of_inter): - if vs[i - 1] > vs[i]: - temp = vs[i] - tx = int_pts[2 * i] - ty = int_pts[2 * i + 1] - j = i - while j > 0 and vs[j - 1] > temp: - vs[j] = vs[j - 1] - int_pts[j * 2] = int_pts[j * 2 - 2] - int_pts[j * 2 + 1] = int_pts[j * 2 - 1] - j -= 1 - - vs[j] = temp - int_pts[j * 2] = tx - int_pts[j * 2 + 1] = ty - - -@cuda.jit( - '(float32[:], float32[:], int32, int32, float32[:])', - device=True, - inline=True) -def line_segment_intersection(pts1, pts2, i, j, temp_pts): - A = cuda.local.array((2, ), dtype=numba.float32) - B = cuda.local.array((2, ), dtype=numba.float32) - C = cuda.local.array((2, ), dtype=numba.float32) - D = cuda.local.array((2, ), dtype=numba.float32) - - A[0] = pts1[2 * i] - A[1] = pts1[2 * i + 1] - - B[0] = pts1[2 * ((i + 1) % 4)] - B[1] = pts1[2 * ((i + 1) % 4) + 1] - - C[0] = pts2[2 * j] - C[1] = pts2[2 * j + 1] - - D[0] = pts2[2 * ((j + 1) % 4)] - D[1] = pts2[2 * ((j + 1) % 4) + 1] - BA0 = B[0] - A[0] - BA1 = B[1] - A[1] - DA0 = D[0] - A[0] - CA0 = C[0] - A[0] - DA1 = D[1] - A[1] - CA1 = C[1] - A[1] - acd = DA1 * CA0 > CA1 * DA0 - bcd = (D[1] - B[1]) * (C[0] - B[0]) > (C[1] - B[1]) * (D[0] - B[0]) - if acd != bcd: - abc = CA1 * BA0 > BA1 * CA0 - abd = DA1 * BA0 > BA1 * DA0 - if abc != abd: - DC0 = D[0] - C[0] - DC1 = D[1] - C[1] - ABBA = A[0] * B[1] - B[0] * A[1] - CDDC = C[0] * D[1] - D[0] * C[1] - DH = BA1 * DC0 - BA0 * DC1 - Dx = ABBA * DC0 - BA0 * CDDC - Dy = ABBA * DC1 - BA1 * CDDC - temp_pts[0] = Dx / DH - temp_pts[1] = Dy / DH - return True - return False - - -@cuda.jit( - '(float32[:], float32[:], int32, int32, float32[:])', - device=True, - inline=True) -def line_segment_intersection_v1(pts1, pts2, i, j, temp_pts): - a = cuda.local.array((2, ), dtype=numba.float32) - b = cuda.local.array((2, ), dtype=numba.float32) - c = cuda.local.array((2, ), dtype=numba.float32) - d = cuda.local.array((2, ), dtype=numba.float32) - - a[0] = pts1[2 * i] - a[1] = pts1[2 * i + 1] - - b[0] = pts1[2 * ((i + 1) % 4)] - b[1] = pts1[2 * ((i + 1) % 4) + 1] - - c[0] = pts2[2 * j] - c[1] = pts2[2 * j + 1] - - d[0] = pts2[2 * ((j + 1) % 4)] - d[1] = pts2[2 * ((j + 1) % 4) + 1] - - area_abc = trangle_area(a, b, c) - area_abd = trangle_area(a, b, d) - - if area_abc * area_abd >= 0: - return False - - area_cda = trangle_area(c, d, a) - area_cdb = area_cda + area_abc - area_abd - - if area_cda * area_cdb >= 0: - return False - t = area_cda / (area_abd - area_abc) - - dx = t * (b[0] - a[0]) - dy = t * (b[1] - a[1]) - temp_pts[0] = a[0] + dx - temp_pts[1] = a[1] + dy - return True - - -@cuda.jit('(float32, float32, float32[:])', device=True, inline=True) -def point_in_quadrilateral(pt_x, pt_y, corners): - ab0 = corners[2] - corners[0] - ab1 = corners[3] - corners[1] - - ad0 = corners[6] - corners[0] - ad1 = corners[7] - corners[1] - - ap0 = pt_x - corners[0] - ap1 = pt_y - corners[1] - - abab = ab0 * ab0 + ab1 * ab1 - abap = ab0 * ap0 + ab1 * ap1 - adad = ad0 * ad0 + ad1 * ad1 - adap = ad0 * ap0 + ad1 * ap1 - - return abab >= abap and abap >= 0 and adad >= adap and adap >= 0 - - -@cuda.jit('(float32[:], float32[:], float32[:])', device=True, inline=True) -def quadrilateral_intersection(pts1, pts2, int_pts): - num_of_inter = 0 - for i in range(4): - if point_in_quadrilateral(pts1[2 * i], pts1[2 * i + 1], pts2): - int_pts[num_of_inter * 2] = pts1[2 * i] - int_pts[num_of_inter * 2 + 1] = pts1[2 * i + 1] - num_of_inter += 1 - if point_in_quadrilateral(pts2[2 * i], pts2[2 * i + 1], pts1): - int_pts[num_of_inter * 2] = pts2[2 * i] - int_pts[num_of_inter * 2 + 1] = pts2[2 * i + 1] - num_of_inter += 1 - temp_pts = cuda.local.array((2, ), dtype=numba.float32) - for i in range(4): - for j in range(4): - has_pts = line_segment_intersection(pts1, pts2, i, j, temp_pts) - if has_pts: - int_pts[num_of_inter * 2] = temp_pts[0] - int_pts[num_of_inter * 2 + 1] = temp_pts[1] - num_of_inter += 1 - - return num_of_inter - - -@cuda.jit('(float32[:], float32[:])', device=True, inline=True) -def rbbox_to_corners(corners, rbbox): - # generate clockwise corners and rotate it clockwise - angle = rbbox[4] - a_cos = math.cos(angle) - a_sin = math.sin(angle) - center_x = rbbox[0] - center_y = rbbox[1] - x_d = rbbox[2] - y_d = rbbox[3] - corners_x = cuda.local.array((4, ), dtype=numba.float32) - corners_y = cuda.local.array((4, ), dtype=numba.float32) - corners_x[0] = -x_d / 2 - corners_x[1] = -x_d / 2 - corners_x[2] = x_d / 2 - corners_x[3] = x_d / 2 - corners_y[0] = -y_d / 2 - corners_y[1] = y_d / 2 - corners_y[2] = y_d / 2 - corners_y[3] = -y_d / 2 - for i in range(4): - corners[2 * i] = a_cos * corners_x[i] + a_sin * corners_y[i] + center_x - corners[2 * i + - 1] = -a_sin * corners_x[i] + a_cos * corners_y[i] + center_y - - -@cuda.jit('(float32[:], float32[:])', device=True, inline=True) -def inter(rbbox1, rbbox2): - corners1 = cuda.local.array((8, ), dtype=numba.float32) - corners2 = cuda.local.array((8, ), dtype=numba.float32) - intersection_corners = cuda.local.array((16, ), dtype=numba.float32) - - rbbox_to_corners(corners1, rbbox1) - rbbox_to_corners(corners2, rbbox2) - - num_intersection = quadrilateral_intersection(corners1, corners2, - intersection_corners) - sort_vertex_in_convex_polygon(intersection_corners, num_intersection) - # print(intersection_corners.reshape([-1, 2])[:num_intersection]) - - return area(intersection_corners, num_intersection) - - -@cuda.jit('(float32[:], float32[:])', device=True, inline=True) -def devRotateIoU(rbox1, rbox2): - area1 = rbox1[2] * rbox1[3] - area2 = rbox2[2] * rbox2[3] - area_inter = inter(rbox1, rbox2) - return area_inter / (area1 + area2 - area_inter) - - -@cuda.jit('(int64, float32, float32[:], uint64[:])') -def rotate_nms_kernel(n_boxes, nms_overlap_thresh, dev_boxes, dev_mask): - threadsPerBlock = 8 * 8 - row_start = cuda.blockIdx.y - col_start = cuda.blockIdx.x - tx = cuda.threadIdx.x - row_size = min(n_boxes - row_start * threadsPerBlock, threadsPerBlock) - col_size = min(n_boxes - col_start * threadsPerBlock, threadsPerBlock) - block_boxes = cuda.shared.array(shape=(64 * 6, ), dtype=numba.float32) - dev_box_idx = threadsPerBlock * col_start + tx - if (tx < col_size): - block_boxes[tx * 6 + 0] = dev_boxes[dev_box_idx * 6 + 0] - block_boxes[tx * 6 + 1] = dev_boxes[dev_box_idx * 6 + 1] - block_boxes[tx * 6 + 2] = dev_boxes[dev_box_idx * 6 + 2] - block_boxes[tx * 6 + 3] = dev_boxes[dev_box_idx * 6 + 3] - block_boxes[tx * 6 + 4] = dev_boxes[dev_box_idx * 6 + 4] - block_boxes[tx * 6 + 5] = dev_boxes[dev_box_idx * 6 + 5] - cuda.syncthreads() - if (tx < row_size): - cur_box_idx = threadsPerBlock * row_start + tx - # cur_box = dev_boxes + cur_box_idx * 5; - t = 0 - start = 0 - if (row_start == col_start): - start = tx + 1 - for i in range(start, col_size): - iou = devRotateIoU(dev_boxes[cur_box_idx * 6:cur_box_idx * 6 + 5], - block_boxes[i * 6:i * 6 + 5]) - # print('iou', iou, cur_box_idx, i) - if (iou > nms_overlap_thresh): - t |= 1 << i - col_blocks = ((n_boxes) // (threadsPerBlock) + ( - (n_boxes) % (threadsPerBlock) > 0)) - dev_mask[cur_box_idx * col_blocks + col_start] = t - - -def rotate_nms_gpu(dets, nms_overlap_thresh, device_id=0): - """nms in gpu. WARNING: this function can provide right result - but its performance isn't be tested - - Args: - dets ([type]): [description] - nms_overlap_thresh ([type]): [description] - device_id ([type], optional): Defaults to 0. [description] - - Returns: - [type]: [description] - """ - dets = dets.astype(np.float32) - boxes_num = dets.shape[0] - keep_out = np.zeros([boxes_num], dtype=np.int32) - scores = dets[:, 5] - order = scores.argsort()[::-1].astype(np.int32) - boxes_host = dets[order, :] - - threadsPerBlock = 8 * 8 - col_blocks = div_up(boxes_num, threadsPerBlock) - cuda.select_device(device_id) - # mask_host shape: boxes_num * col_blocks * sizeof(np.uint64) - mask_host = np.zeros((boxes_num * col_blocks, ), dtype=np.uint64) - blockspergrid = (div_up(boxes_num, threadsPerBlock), - div_up(boxes_num, threadsPerBlock)) - stream = cuda.stream() - with stream.auto_synchronize(): - boxes_dev = cuda.to_device(boxes_host.reshape([-1]), stream) - mask_dev = cuda.to_device(mask_host, stream) - rotate_nms_kernel[blockspergrid, threadsPerBlock, stream]( - boxes_num, nms_overlap_thresh, boxes_dev, mask_dev) - mask_dev.copy_to_host(mask_host, stream=stream) - num_out = nms_postprocess(keep_out, mask_host, boxes_num) - keep = keep_out[:num_out] - return list(order[keep]) - - -@cuda.jit('(int64, int64, float32[:], float32[:], float32[:])', fastmath=False) -def rotate_iou_kernel(N, K, dev_boxes, dev_query_boxes, dev_iou): - threadsPerBlock = 8 * 8 - row_start = cuda.blockIdx.x - col_start = cuda.blockIdx.y - tx = cuda.threadIdx.x - row_size = min(N - row_start * threadsPerBlock, threadsPerBlock) - col_size = min(K - col_start * threadsPerBlock, threadsPerBlock) - block_boxes = cuda.shared.array(shape=(64 * 5, ), dtype=numba.float32) - block_qboxes = cuda.shared.array(shape=(64 * 5, ), dtype=numba.float32) - - dev_query_box_idx = threadsPerBlock * col_start + tx - dev_box_idx = threadsPerBlock * row_start + tx - if (tx < col_size): - block_qboxes[tx * 5 + 0] = dev_query_boxes[dev_query_box_idx * 5 + 0] - block_qboxes[tx * 5 + 1] = dev_query_boxes[dev_query_box_idx * 5 + 1] - block_qboxes[tx * 5 + 2] = dev_query_boxes[dev_query_box_idx * 5 + 2] - block_qboxes[tx * 5 + 3] = dev_query_boxes[dev_query_box_idx * 5 + 3] - block_qboxes[tx * 5 + 4] = dev_query_boxes[dev_query_box_idx * 5 + 4] - if (tx < row_size): - block_boxes[tx * 5 + 0] = dev_boxes[dev_box_idx * 5 + 0] - block_boxes[tx * 5 + 1] = dev_boxes[dev_box_idx * 5 + 1] - block_boxes[tx * 5 + 2] = dev_boxes[dev_box_idx * 5 + 2] - block_boxes[tx * 5 + 3] = dev_boxes[dev_box_idx * 5 + 3] - block_boxes[tx * 5 + 4] = dev_boxes[dev_box_idx * 5 + 4] - cuda.syncthreads() - if tx < row_size: - for i in range(col_size): - offset = row_start * threadsPerBlock * K + col_start * threadsPerBlock + tx * K + i - dev_iou[offset] = devRotateIoU(block_qboxes[i * 5:i * 5 + 5], - block_boxes[tx * 5:tx * 5 + 5]) - - -def rotate_iou_gpu(boxes, query_boxes, device_id=0): - """rotated box iou running in gpu. 500x faster than cpu version - (take 5ms in one example with numba.cuda code). - convert from [this project]( - https://github.com/hongzhenwang/RRPN-revise/tree/master/lib/rotation). - - Args: - boxes (float tensor: [N, 5]): rbboxes. format: centers, dims, - angles(clockwise when positive) - query_boxes (float tensor: [K, 5]): [description] - device_id (int, optional): Defaults to 0. [description] - - Returns: - [type]: [description] - """ - box_dtype = boxes.dtype - boxes = boxes.astype(np.float32) - query_boxes = query_boxes.astype(np.float32) - N = boxes.shape[0] - K = query_boxes.shape[0] - iou = np.zeros((N, K), dtype=np.float32) - if N == 0 or K == 0: - return iou - threadsPerBlock = 8 * 8 - cuda.select_device(device_id) - blockspergrid = (div_up(N, threadsPerBlock), div_up(K, threadsPerBlock)) - - stream = cuda.stream() - with stream.auto_synchronize(): - boxes_dev = cuda.to_device(boxes.reshape([-1]), stream) - query_boxes_dev = cuda.to_device(query_boxes.reshape([-1]), stream) - iou_dev = cuda.to_device(iou.reshape([-1]), stream) - rotate_iou_kernel[blockspergrid, threadsPerBlock, stream]( - N, K, boxes_dev, query_boxes_dev, iou_dev) - iou_dev.copy_to_host(iou.reshape([-1]), stream=stream) - return iou.astype(boxes.dtype) - - -@cuda.jit('(float32[:], float32[:], int32)', device=True, inline=True) -def devRotateIoUEval(rbox1, rbox2, criterion=-1): - area1 = rbox1[2] * rbox1[3] - area2 = rbox2[2] * rbox2[3] - area_inter = inter(rbox1, rbox2) - if criterion == -1: - return area_inter / (area1 + area2 - area_inter) - elif criterion == 0: - return area_inter / area1 - elif criterion == 1: - return area_inter / area2 - else: - return area_inter - - -@cuda.jit( - '(int64, int64, float32[:], float32[:], float32[:], int32)', - fastmath=False) -def rotate_iou_kernel_eval(N, - K, - dev_boxes, - dev_query_boxes, - dev_iou, - criterion=-1): - threadsPerBlock = 8 * 8 - row_start = cuda.blockIdx.x - col_start = cuda.blockIdx.y - tx = cuda.threadIdx.x - row_size = min(N - row_start * threadsPerBlock, threadsPerBlock) - col_size = min(K - col_start * threadsPerBlock, threadsPerBlock) - block_boxes = cuda.shared.array(shape=(64 * 5, ), dtype=numba.float32) - block_qboxes = cuda.shared.array(shape=(64 * 5, ), dtype=numba.float32) - - dev_query_box_idx = threadsPerBlock * col_start + tx - dev_box_idx = threadsPerBlock * row_start + tx - if (tx < col_size): - block_qboxes[tx * 5 + 0] = dev_query_boxes[dev_query_box_idx * 5 + 0] - block_qboxes[tx * 5 + 1] = dev_query_boxes[dev_query_box_idx * 5 + 1] - block_qboxes[tx * 5 + 2] = dev_query_boxes[dev_query_box_idx * 5 + 2] - block_qboxes[tx * 5 + 3] = dev_query_boxes[dev_query_box_idx * 5 + 3] - block_qboxes[tx * 5 + 4] = dev_query_boxes[dev_query_box_idx * 5 + 4] - if (tx < row_size): - block_boxes[tx * 5 + 0] = dev_boxes[dev_box_idx * 5 + 0] - block_boxes[tx * 5 + 1] = dev_boxes[dev_box_idx * 5 + 1] - block_boxes[tx * 5 + 2] = dev_boxes[dev_box_idx * 5 + 2] - block_boxes[tx * 5 + 3] = dev_boxes[dev_box_idx * 5 + 3] - block_boxes[tx * 5 + 4] = dev_boxes[dev_box_idx * 5 + 4] - cuda.syncthreads() - if tx < row_size: - for i in range(col_size): - offset = row_start * threadsPerBlock * K + col_start * threadsPerBlock + tx * K + i - dev_iou[offset] = devRotateIoUEval(block_qboxes[i * 5:i * 5 + 5], - block_boxes[tx * 5:tx * 5 + 5], - criterion) - - -def rotate_iou_gpu_eval(boxes, query_boxes, criterion=-1, device_id=0): - """rotated box iou running in gpu. 500x faster than cpu version - (take 5ms in one example with numba.cuda code). - convert from [this project]( - https://github.com/hongzhenwang/RRPN-revise/tree/master/lib/rotation). - - Args: - boxes (float tensor: [N, 5]): rbboxes. format: centers, dims, - angles(clockwise when positive) - query_boxes (float tensor: [K, 5]): [description] - device_id (int, optional): Defaults to 0. [description] - - Returns: - [type]: [description] - """ - box_dtype = boxes.dtype - boxes = boxes.astype(np.float32) - query_boxes = query_boxes.astype(np.float32) - N = boxes.shape[0] - K = query_boxes.shape[0] - iou = np.zeros((N, K), dtype=np.float32) - if N == 0 or K == 0: - return iou - threadsPerBlock = 8 * 8 - cuda.select_device(device_id) - blockspergrid = (div_up(N, threadsPerBlock), div_up(K, threadsPerBlock)) - - stream = cuda.stream() - with stream.auto_synchronize(): - boxes_dev = cuda.to_device(boxes.reshape([-1]), stream) - query_boxes_dev = cuda.to_device(query_boxes.reshape([-1]), stream) - iou_dev = cuda.to_device(iou.reshape([-1]), stream) - rotate_iou_kernel_eval[blockspergrid, threadsPerBlock, stream]( - N, K, boxes_dev, query_boxes_dev, iou_dev, criterion) - iou_dev.copy_to_host(iou.reshape([-1]), stream=stream) - return iou.astype(boxes.dtype) +import math +from pathlib import Path + +import numba +import numpy as np +from numba import cuda + +from second.utils.buildtools.pybind11_build import load_pb11 + +try: + from second.core.non_max_suppression.nms import non_max_suppression +except: + current_dir = Path(__file__).resolve().parents[0] + load_pb11( + ["../cc/nms/nms_kernel.cu.cc", "../cc/nms/nms.cc"], + current_dir / "nms.so", + current_dir, + cuda=True) + from second.core.non_max_suppression.nms import non_max_suppression + + +@cuda.jit('(float32[:], float32[:])', device=True, inline=True) +def iou_device(a, b): + left = max(a[0], b[0]) + right = min(a[2], b[2]) + top = max(a[1], b[1]) + bottom = min(a[3], b[3]) + width = max(right - left + 1, 0.) + height = max(bottom - top + 1, 0.) + interS = width * height + Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1) + Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1) + return interS / (Sa + Sb - interS) + + +@cuda.jit('(int64, float32, float32[:, :], uint64[:])') +def nms_kernel_v2(n_boxes, nms_overlap_thresh, dev_boxes, dev_mask): + threadsPerBlock = 8 * 8 + row_start = cuda.blockIdx.y + col_start = cuda.blockIdx.x + tx = cuda.threadIdx.x + row_size = min(n_boxes - row_start * threadsPerBlock, threadsPerBlock) + col_size = min(n_boxes - col_start * threadsPerBlock, threadsPerBlock) + block_boxes = cuda.shared.array( + shape=(threadsPerBlock, 5), dtype=numba.float32) + dev_box_idx = threadsPerBlock * col_start + tx + if (tx < col_size): + block_boxes[tx, 0] = dev_boxes[dev_box_idx, 0] + block_boxes[tx, 1] = dev_boxes[dev_box_idx, 1] + block_boxes[tx, 2] = dev_boxes[dev_box_idx, 2] + block_boxes[tx, 3] = dev_boxes[dev_box_idx, 3] + block_boxes[tx, 4] = dev_boxes[dev_box_idx, 4] + cuda.syncthreads() + if (cuda.threadIdx.x < row_size): + cur_box_idx = threadsPerBlock * row_start + cuda.threadIdx.x + # cur_box = dev_boxes + cur_box_idx * 5; + i = 0 + t = 0 + start = 0 + if (row_start == col_start): + start = tx + 1 + for i in range(start, col_size): + if (iou_device(dev_boxes[cur_box_idx], block_boxes[i]) > + nms_overlap_thresh): + t |= 1 << i + col_blocks = ((n_boxes) // (threadsPerBlock) + ( + (n_boxes) % (threadsPerBlock) > 0)) + dev_mask[cur_box_idx * col_blocks + col_start] = t + + +@cuda.jit('(int64, float32, float32[:], uint64[:])') +def nms_kernel(n_boxes, nms_overlap_thresh, dev_boxes, dev_mask): + threadsPerBlock = 8 * 8 + row_start = cuda.blockIdx.y + col_start = cuda.blockIdx.x + tx = cuda.threadIdx.x + row_size = min(n_boxes - row_start * threadsPerBlock, threadsPerBlock) + col_size = min(n_boxes - col_start * threadsPerBlock, threadsPerBlock) + block_boxes = cuda.shared.array(shape=(64 * 5, ), dtype=numba.float32) + dev_box_idx = threadsPerBlock * col_start + tx + if (tx < col_size): + block_boxes[tx * 5 + 0] = dev_boxes[dev_box_idx * 5 + 0] + block_boxes[tx * 5 + 1] = dev_boxes[dev_box_idx * 5 + 1] + block_boxes[tx * 5 + 2] = dev_boxes[dev_box_idx * 5 + 2] + block_boxes[tx * 5 + 3] = dev_boxes[dev_box_idx * 5 + 3] + block_boxes[tx * 5 + 4] = dev_boxes[dev_box_idx * 5 + 4] + cuda.syncthreads() + if (tx < row_size): + cur_box_idx = threadsPerBlock * row_start + tx + # cur_box = dev_boxes + cur_box_idx * 5; + t = 0 + start = 0 + if (row_start == col_start): + start = tx + 1 + for i in range(start, col_size): + iou = iou_device(dev_boxes[cur_box_idx * 5:cur_box_idx * 5 + 4], + block_boxes[i * 5:i * 5 + 4]) + if (iou > nms_overlap_thresh): + t |= 1 << i + col_blocks = ((n_boxes) // (threadsPerBlock) + ( + (n_boxes) % (threadsPerBlock) > 0)) + dev_mask[cur_box_idx * col_blocks + col_start] = t + + +@numba.jit(nopython=True) +def div_up(m, n): + return m // n + (m % n > 0) + + +@numba.jit(nopython=True) +def nms_postprocess(keep_out, mask_host, boxes_num): + threadsPerBlock = 8 * 8 + col_blocks = div_up(boxes_num, threadsPerBlock) + remv = np.zeros((col_blocks), dtype=np.uint64) + num_to_keep = 0 + for i in range(boxes_num): + nblock = i // threadsPerBlock + inblock = i % threadsPerBlock + mask = np.array(1 << inblock, dtype=np.uint64) + if not (remv[nblock] & mask): + keep_out[num_to_keep] = i + num_to_keep += 1 + # unsigned long long *p = &mask_host[0] + i * col_blocks; + for j in range(nblock, col_blocks): + remv[j] |= mask_host[i * col_blocks + j] + # remv[j] |= p[j]; + return num_to_keep + + +def nms_gpu(dets, nms_overlap_thresh, device_id=1): + """nms in gpu. + + Args: + dets ([type]): [description] + nms_overlap_thresh ([type]): [description] + device_id ([type], optional): Defaults to 0. [description] + + Returns: + [type]: [description] + """ + + boxes_num = dets.shape[0] + #print(boxes_num) + keep_out = np.zeros([boxes_num], dtype=np.int32) + scores = dets[:, 4] + order = scores.argsort()[::-1].astype(np.int32) + boxes_host = dets[order, :] + + threadsPerBlock = 8 * 8 + col_blocks = div_up(boxes_num, threadsPerBlock) + cuda.select_device(device_id) + mask_host = np.zeros((boxes_num * col_blocks, ), dtype=np.uint64) + blockspergrid = (div_up(boxes_num, threadsPerBlock), + div_up(boxes_num, threadsPerBlock)) + #print(blockspergrid) + stream = cuda.stream() + with stream.auto_synchronize(): + boxes_dev = cuda.to_device(boxes_host.reshape([-1]), stream) + mask_dev = cuda.to_device(mask_host, stream) + nms_kernel[blockspergrid, threadsPerBlock, stream]( + boxes_num, nms_overlap_thresh, boxes_dev, mask_dev) + mask_dev.copy_to_host(mask_host, stream=stream) + # stream.synchronize() + num_out = nms_postprocess(keep_out, mask_host, boxes_num) + keep = keep_out[:num_out] + return list(order[keep]) + + +def nms_gpu_cc(dets, nms_overlap_thresh, device_id=1): + boxes_num = dets.shape[0] + keep = np.zeros(boxes_num, dtype=np.int32) + scores = dets[:, 4] + order = scores.argsort()[::-1].astype(np.int32) + sorted_dets = dets[order, :] + num_out = non_max_suppression(sorted_dets, keep, nms_overlap_thresh, + device_id) + keep = keep[:num_out] + return list(order[keep]) + + +@cuda.jit('(float32[:], float32[:], float32[:])', device=True, inline=True) +def trangle_area(a, b, c): + return ( + (a[0] - c[0]) * (b[1] - c[1]) - (a[1] - c[1]) * (b[0] - c[0])) / 2.0 + + +@cuda.jit('(float32[:], int32)', device=True, inline=True) +def area(int_pts, num_of_inter): + area_val = 0.0 + for i in range(num_of_inter - 2): + area_val += abs( + trangle_area(int_pts[:2], int_pts[2 * i + 2:2 * i + 4], + int_pts[2 * i + 4:2 * i + 6])) + return area_val + + +@cuda.jit('(float32[:], int32)', device=True, inline=True) +def sort_vertex_in_convex_polygon(int_pts, num_of_inter): + if num_of_inter > 0: + center = cuda.local.array((2, ), dtype=numba.float32) + center[:] = 0.0 + for i in range(num_of_inter): + center[0] += int_pts[2 * i] + center[1] += int_pts[2 * i + 1] + center[0] /= num_of_inter + center[1] /= num_of_inter + v = cuda.local.array((2, ), dtype=numba.float32) + vs = cuda.local.array((16, ), dtype=numba.float32) + for i in range(num_of_inter): + v[0] = int_pts[2 * i] - center[0] + v[1] = int_pts[2 * i + 1] - center[1] + d = math.sqrt(v[0] * v[0] + v[1] * v[1]) + v[0] = v[0] / d + v[1] = v[1] / d + if v[1] < 0: + v[0] = -2 - v[0] + vs[i] = v[0] + j = 0 + temp = 0 + for i in range(1, num_of_inter): + if vs[i - 1] > vs[i]: + temp = vs[i] + tx = int_pts[2 * i] + ty = int_pts[2 * i + 1] + j = i + while j > 0 and vs[j - 1] > temp: + vs[j] = vs[j - 1] + int_pts[j * 2] = int_pts[j * 2 - 2] + int_pts[j * 2 + 1] = int_pts[j * 2 - 1] + j -= 1 + + vs[j] = temp + int_pts[j * 2] = tx + int_pts[j * 2 + 1] = ty + + +@cuda.jit( + '(float32[:], float32[:], int32, int32, float32[:])', + device=True, + inline=True) +def line_segment_intersection(pts1, pts2, i, j, temp_pts): + A = cuda.local.array((2, ), dtype=numba.float32) + B = cuda.local.array((2, ), dtype=numba.float32) + C = cuda.local.array((2, ), dtype=numba.float32) + D = cuda.local.array((2, ), dtype=numba.float32) + + A[0] = pts1[2 * i] + A[1] = pts1[2 * i + 1] + + B[0] = pts1[2 * ((i + 1) % 4)] + B[1] = pts1[2 * ((i + 1) % 4) + 1] + + C[0] = pts2[2 * j] + C[1] = pts2[2 * j + 1] + + D[0] = pts2[2 * ((j + 1) % 4)] + D[1] = pts2[2 * ((j + 1) % 4) + 1] + BA0 = B[0] - A[0] + BA1 = B[1] - A[1] + DA0 = D[0] - A[0] + CA0 = C[0] - A[0] + DA1 = D[1] - A[1] + CA1 = C[1] - A[1] + acd = DA1 * CA0 > CA1 * DA0 + bcd = (D[1] - B[1]) * (C[0] - B[0]) > (C[1] - B[1]) * (D[0] - B[0]) + if acd != bcd: + abc = CA1 * BA0 > BA1 * CA0 + abd = DA1 * BA0 > BA1 * DA0 + if abc != abd: + DC0 = D[0] - C[0] + DC1 = D[1] - C[1] + ABBA = A[0] * B[1] - B[0] * A[1] + CDDC = C[0] * D[1] - D[0] * C[1] + DH = BA1 * DC0 - BA0 * DC1 + Dx = ABBA * DC0 - BA0 * CDDC + Dy = ABBA * DC1 - BA1 * CDDC + temp_pts[0] = Dx / DH + temp_pts[1] = Dy / DH + return True + return False + + +@cuda.jit( + '(float32[:], float32[:], int32, int32, float32[:])', + device=True, + inline=True) +def line_segment_intersection_v1(pts1, pts2, i, j, temp_pts): + a = cuda.local.array((2, ), dtype=numba.float32) + b = cuda.local.array((2, ), dtype=numba.float32) + c = cuda.local.array((2, ), dtype=numba.float32) + d = cuda.local.array((2, ), dtype=numba.float32) + + a[0] = pts1[2 * i] + a[1] = pts1[2 * i + 1] + + b[0] = pts1[2 * ((i + 1) % 4)] + b[1] = pts1[2 * ((i + 1) % 4) + 1] + + c[0] = pts2[2 * j] + c[1] = pts2[2 * j + 1] + + d[0] = pts2[2 * ((j + 1) % 4)] + d[1] = pts2[2 * ((j + 1) % 4) + 1] + + area_abc = trangle_area(a, b, c) + area_abd = trangle_area(a, b, d) + + if area_abc * area_abd >= 0: + return False + + area_cda = trangle_area(c, d, a) + area_cdb = area_cda + area_abc - area_abd + + if area_cda * area_cdb >= 0: + return False + t = area_cda / (area_abd - area_abc) + + dx = t * (b[0] - a[0]) + dy = t * (b[1] - a[1]) + temp_pts[0] = a[0] + dx + temp_pts[1] = a[1] + dy + return True + + +@cuda.jit('(float32, float32, float32[:])', device=True, inline=True) +def point_in_quadrilateral(pt_x, pt_y, corners): + ab0 = corners[2] - corners[0] + ab1 = corners[3] - corners[1] + + ad0 = corners[6] - corners[0] + ad1 = corners[7] - corners[1] + + ap0 = pt_x - corners[0] + ap1 = pt_y - corners[1] + + abab = ab0 * ab0 + ab1 * ab1 + abap = ab0 * ap0 + ab1 * ap1 + adad = ad0 * ad0 + ad1 * ad1 + adap = ad0 * ap0 + ad1 * ap1 + + return abab >= abap and abap >= 0 and adad >= adap and adap >= 0 + + +@cuda.jit('(float32[:], float32[:], float32[:])', device=True, inline=True) +def quadrilateral_intersection(pts1, pts2, int_pts): + num_of_inter = 0 + for i in range(4): + if point_in_quadrilateral(pts1[2 * i], pts1[2 * i + 1], pts2): + int_pts[num_of_inter * 2] = pts1[2 * i] + int_pts[num_of_inter * 2 + 1] = pts1[2 * i + 1] + num_of_inter += 1 + if point_in_quadrilateral(pts2[2 * i], pts2[2 * i + 1], pts1): + int_pts[num_of_inter * 2] = pts2[2 * i] + int_pts[num_of_inter * 2 + 1] = pts2[2 * i + 1] + num_of_inter += 1 + temp_pts = cuda.local.array((2, ), dtype=numba.float32) + for i in range(4): + for j in range(4): + has_pts = line_segment_intersection(pts1, pts2, i, j, temp_pts) + if has_pts: + int_pts[num_of_inter * 2] = temp_pts[0] + int_pts[num_of_inter * 2 + 1] = temp_pts[1] + num_of_inter += 1 + + return num_of_inter + + +@cuda.jit('(float32[:], float32[:])', device=True, inline=True) +def rbbox_to_corners(corners, rbbox): + # generate clockwise corners and rotate it clockwise + angle = rbbox[4] + a_cos = math.cos(angle) + a_sin = math.sin(angle) + center_x = rbbox[0] + center_y = rbbox[1] + x_d = rbbox[2] + y_d = rbbox[3] + corners_x = cuda.local.array((4, ), dtype=numba.float32) + corners_y = cuda.local.array((4, ), dtype=numba.float32) + corners_x[0] = -x_d / 2 + corners_x[1] = -x_d / 2 + corners_x[2] = x_d / 2 + corners_x[3] = x_d / 2 + corners_y[0] = -y_d / 2 + corners_y[1] = y_d / 2 + corners_y[2] = y_d / 2 + corners_y[3] = -y_d / 2 + for i in range(4): + corners[2 * i] = a_cos * corners_x[i] + a_sin * corners_y[i] + center_x + corners[2 * i + + 1] = -a_sin * corners_x[i] + a_cos * corners_y[i] + center_y + + +@cuda.jit('(float32[:], float32[:])', device=True, inline=True) +def inter(rbbox1, rbbox2): + corners1 = cuda.local.array((8, ), dtype=numba.float32) + corners2 = cuda.local.array((8, ), dtype=numba.float32) + intersection_corners = cuda.local.array((16, ), dtype=numba.float32) + + rbbox_to_corners(corners1, rbbox1) + rbbox_to_corners(corners2, rbbox2) + + num_intersection = quadrilateral_intersection(corners1, corners2, + intersection_corners) + sort_vertex_in_convex_polygon(intersection_corners, num_intersection) + # print(intersection_corners.reshape([-1, 2])[:num_intersection]) + + return area(intersection_corners, num_intersection) + + +@cuda.jit('(float32[:], float32[:])', device=True, inline=True) +def devRotateIoU(rbox1, rbox2): + area1 = rbox1[2] * rbox1[3] + area2 = rbox2[2] * rbox2[3] + area_inter = inter(rbox1, rbox2) + return area_inter / (area1 + area2 - area_inter) + + +@cuda.jit('(int64, float32, float32[:], uint64[:])') +def rotate_nms_kernel(n_boxes, nms_overlap_thresh, dev_boxes, dev_mask): + threadsPerBlock = 8 * 8 + row_start = cuda.blockIdx.y + col_start = cuda.blockIdx.x + tx = cuda.threadIdx.x + row_size = min(n_boxes - row_start * threadsPerBlock, threadsPerBlock) + col_size = min(n_boxes - col_start * threadsPerBlock, threadsPerBlock) + block_boxes = cuda.shared.array(shape=(64 * 6, ), dtype=numba.float32) + dev_box_idx = threadsPerBlock * col_start + tx + if (tx < col_size): + block_boxes[tx * 6 + 0] = dev_boxes[dev_box_idx * 6 + 0] + block_boxes[tx * 6 + 1] = dev_boxes[dev_box_idx * 6 + 1] + block_boxes[tx * 6 + 2] = dev_boxes[dev_box_idx * 6 + 2] + block_boxes[tx * 6 + 3] = dev_boxes[dev_box_idx * 6 + 3] + block_boxes[tx * 6 + 4] = dev_boxes[dev_box_idx * 6 + 4] + block_boxes[tx * 6 + 5] = dev_boxes[dev_box_idx * 6 + 5] + cuda.syncthreads() + if (tx < row_size): + cur_box_idx = threadsPerBlock * row_start + tx + # cur_box = dev_boxes + cur_box_idx * 5; + t = 0 + start = 0 + if (row_start == col_start): + start = tx + 1 + for i in range(start, col_size): + iou = devRotateIoU(dev_boxes[cur_box_idx * 6:cur_box_idx * 6 + 5], + block_boxes[i * 6:i * 6 + 5]) + # print('iou', iou, cur_box_idx, i) + if (iou > nms_overlap_thresh): + t |= 1 << i + col_blocks = ((n_boxes) // (threadsPerBlock) + ( + (n_boxes) % (threadsPerBlock) > 0)) + dev_mask[cur_box_idx * col_blocks + col_start] = t + + +def rotate_nms_gpu(dets, nms_overlap_thresh, device_id=1): + """nms in gpu. WARNING: this function can provide right result + but its performance isn't be tested + + Args: + dets ([type]): [description] + nms_overlap_thresh ([type]): [description] + device_id ([type], optional): Defaults to 0. [description] + + Returns: + [type]: [description] + """ + dets = dets.astype(np.float32) + boxes_num = dets.shape[0] + keep_out = np.zeros([boxes_num], dtype=np.int32) + scores = dets[:, 5] + order = scores.argsort()[::-1].astype(np.int32) + boxes_host = dets[order, :] + + threadsPerBlock = 8 * 8 + col_blocks = div_up(boxes_num, threadsPerBlock) + cuda.select_device(device_id) + # mask_host shape: boxes_num * col_blocks * sizeof(np.uint64) + mask_host = np.zeros((boxes_num * col_blocks, ), dtype=np.uint64) + blockspergrid = (div_up(boxes_num, threadsPerBlock), + div_up(boxes_num, threadsPerBlock)) + stream = cuda.stream() + with stream.auto_synchronize(): + boxes_dev = cuda.to_device(boxes_host.reshape([-1]), stream) + mask_dev = cuda.to_device(mask_host, stream) + rotate_nms_kernel[blockspergrid, threadsPerBlock, stream]( + boxes_num, nms_overlap_thresh, boxes_dev, mask_dev) + mask_dev.copy_to_host(mask_host, stream=stream) + num_out = nms_postprocess(keep_out, mask_host, boxes_num) + keep = keep_out[:num_out] + return list(order[keep]) + + +@cuda.jit('(int64, int64, float32[:], float32[:], float32[:])', fastmath=False) +def rotate_iou_kernel(N, K, dev_boxes, dev_query_boxes, dev_iou): + threadsPerBlock = 8 * 8 + row_start = cuda.blockIdx.x + col_start = cuda.blockIdx.y + tx = cuda.threadIdx.x + row_size = min(N - row_start * threadsPerBlock, threadsPerBlock) + col_size = min(K - col_start * threadsPerBlock, threadsPerBlock) + block_boxes = cuda.shared.array(shape=(64 * 5, ), dtype=numba.float32) + block_qboxes = cuda.shared.array(shape=(64 * 5, ), dtype=numba.float32) + + dev_query_box_idx = threadsPerBlock * col_start + tx + dev_box_idx = threadsPerBlock * row_start + tx + if (tx < col_size): + block_qboxes[tx * 5 + 0] = dev_query_boxes[dev_query_box_idx * 5 + 0] + block_qboxes[tx * 5 + 1] = dev_query_boxes[dev_query_box_idx * 5 + 1] + block_qboxes[tx * 5 + 2] = dev_query_boxes[dev_query_box_idx * 5 + 2] + block_qboxes[tx * 5 + 3] = dev_query_boxes[dev_query_box_idx * 5 + 3] + block_qboxes[tx * 5 + 4] = dev_query_boxes[dev_query_box_idx * 5 + 4] + if (tx < row_size): + block_boxes[tx * 5 + 0] = dev_boxes[dev_box_idx * 5 + 0] + block_boxes[tx * 5 + 1] = dev_boxes[dev_box_idx * 5 + 1] + block_boxes[tx * 5 + 2] = dev_boxes[dev_box_idx * 5 + 2] + block_boxes[tx * 5 + 3] = dev_boxes[dev_box_idx * 5 + 3] + block_boxes[tx * 5 + 4] = dev_boxes[dev_box_idx * 5 + 4] + cuda.syncthreads() + if tx < row_size: + for i in range(col_size): + offset = row_start * threadsPerBlock * K + col_start * threadsPerBlock + tx * K + i + dev_iou[offset] = devRotateIoU(block_qboxes[i * 5:i * 5 + 5], + block_boxes[tx * 5:tx * 5 + 5]) + + +def rotate_iou_gpu(boxes, query_boxes, device_id=1): + """rotated box iou running in gpu. 500x faster than cpu version + (take 5ms in one example with numba.cuda code). + convert from [this project]( + https://github.com/hongzhenwang/RRPN-revise/tree/master/lib/rotation). + + Args: + boxes (float tensor: [N, 5]): rbboxes. format: centers, dims, + angles(clockwise when positive) + query_boxes (float tensor: [K, 5]): [description] + device_id (int, optional): Defaults to 0. [description] + + Returns: + [type]: [description] + """ + box_dtype = boxes.dtype + boxes = boxes.astype(np.float32) + query_boxes = query_boxes.astype(np.float32) + N = boxes.shape[0] + K = query_boxes.shape[0] + iou = np.zeros((N, K), dtype=np.float32) + if N == 0 or K == 0: + return iou + threadsPerBlock = 8 * 8 + cuda.select_device(device_id) + blockspergrid = (div_up(N, threadsPerBlock), div_up(K, threadsPerBlock)) + + stream = cuda.stream() + with stream.auto_synchronize(): + boxes_dev = cuda.to_device(boxes.reshape([-1]), stream) + query_boxes_dev = cuda.to_device(query_boxes.reshape([-1]), stream) + iou_dev = cuda.to_device(iou.reshape([-1]), stream) + rotate_iou_kernel[blockspergrid, threadsPerBlock, stream]( + N, K, boxes_dev, query_boxes_dev, iou_dev) + iou_dev.copy_to_host(iou.reshape([-1]), stream=stream) + return iou.astype(boxes.dtype) + + +@cuda.jit('(float32[:], float32[:], int32)', device=True, inline=True) +def devRotateIoUEval(rbox1, rbox2, criterion=-1): + area1 = rbox1[2] * rbox1[3] + area2 = rbox2[2] * rbox2[3] + area_inter = inter(rbox1, rbox2) + if criterion == -1: + return area_inter / (area1 + area2 - area_inter) + elif criterion == 0: + return area_inter / area1 + elif criterion == 1: + return area_inter / area2 + else: + return area_inter + + +@cuda.jit( + '(int64, int64, float32[:], float32[:], float32[:], int32)', + fastmath=False) +def rotate_iou_kernel_eval(N, + K, + dev_boxes, + dev_query_boxes, + dev_iou, + criterion=-1): + threadsPerBlock = 8 * 8 + row_start = cuda.blockIdx.x + col_start = cuda.blockIdx.y + tx = cuda.threadIdx.x + row_size = min(N - row_start * threadsPerBlock, threadsPerBlock) + col_size = min(K - col_start * threadsPerBlock, threadsPerBlock) + block_boxes = cuda.shared.array(shape=(64 * 5, ), dtype=numba.float32) + block_qboxes = cuda.shared.array(shape=(64 * 5, ), dtype=numba.float32) + + dev_query_box_idx = threadsPerBlock * col_start + tx + dev_box_idx = threadsPerBlock * row_start + tx + if (tx < col_size): + block_qboxes[tx * 5 + 0] = dev_query_boxes[dev_query_box_idx * 5 + 0] + block_qboxes[tx * 5 + 1] = dev_query_boxes[dev_query_box_idx * 5 + 1] + block_qboxes[tx * 5 + 2] = dev_query_boxes[dev_query_box_idx * 5 + 2] + block_qboxes[tx * 5 + 3] = dev_query_boxes[dev_query_box_idx * 5 + 3] + block_qboxes[tx * 5 + 4] = dev_query_boxes[dev_query_box_idx * 5 + 4] + if (tx < row_size): + block_boxes[tx * 5 + 0] = dev_boxes[dev_box_idx * 5 + 0] + block_boxes[tx * 5 + 1] = dev_boxes[dev_box_idx * 5 + 1] + block_boxes[tx * 5 + 2] = dev_boxes[dev_box_idx * 5 + 2] + block_boxes[tx * 5 + 3] = dev_boxes[dev_box_idx * 5 + 3] + block_boxes[tx * 5 + 4] = dev_boxes[dev_box_idx * 5 + 4] + cuda.syncthreads() + if tx < row_size: + for i in range(col_size): + offset = row_start * threadsPerBlock * K + col_start * threadsPerBlock + tx * K + i + dev_iou[offset] = devRotateIoUEval(block_qboxes[i * 5:i * 5 + 5], + block_boxes[tx * 5:tx * 5 + 5], + criterion) + + +def rotate_iou_gpu_eval(boxes, query_boxes, criterion=-1, device_id=1): + """rotated box iou running in gpu. 500x faster than cpu version + (take 5ms in one example with numba.cuda code). + convert from [this project]( + https://github.com/hongzhenwang/RRPN-revise/tree/master/lib/rotation). + + Args: + boxes (float tensor: [N, 5]): rbboxes. format: centers, dims, + angles(clockwise when positive) + query_boxes (float tensor: [K, 5]): [description] + device_id (int, optional): Defaults to 0. [description] + + Returns: + [type]: [description] + """ + box_dtype = boxes.dtype + boxes = boxes.astype(np.float32) + query_boxes = query_boxes.astype(np.float32) + N = boxes.shape[0] + K = query_boxes.shape[0] + iou = np.zeros((N, K), dtype=np.float32) + if N == 0 or K == 0: + return iou + threadsPerBlock = 8 * 8 + cuda.select_device(device_id) + blockspergrid = (div_up(N, threadsPerBlock), div_up(K, threadsPerBlock)) + + stream = cuda.stream() + with stream.auto_synchronize(): + boxes_dev = cuda.to_device(boxes.reshape([-1]), stream) + query_boxes_dev = cuda.to_device(query_boxes.reshape([-1]), stream) + iou_dev = cuda.to_device(iou.reshape([-1]), stream) + rotate_iou_kernel_eval[blockspergrid, threadsPerBlock, stream]( + N, K, boxes_dev, query_boxes_dev, iou_dev, criterion) + iou_dev.copy_to_host(iou.reshape([-1]), stream=stream) + return iou.astype(boxes.dtype) diff --git a/second/data/dataset.py b/second/data/dataset.py index 645faf12..5037f8c4 100644 --- a/second/data/dataset.py +++ b/second/data/dataset.py @@ -8,7 +8,15 @@ from second.core import box_np_ops from second.core import preprocess as prep from second.data import kitti_common as kitti -from second.data.preprocess import _read_and_prep_v9 +from second.data.preprocess import _read_and_prep_v9, quaternion_to_euler +import torch + +import argoverse +from argoverse.data_loading.argoverse_tracking_loader import ArgoverseTrackingLoader +from argoverse.map_representation.map_api import ArgoverseMap +import copy + + class Dataset(object): @@ -27,15 +35,18 @@ def __len__(self): class KittiDataset(Dataset): - def __init__(self, info_path, root_path, num_point_features, + def __init__(self, info_path, root_path, class_names, num_point_features, target_assigner, feature_map_size, prep_func): - with open(info_path, 'rb') as f: - infos = pickle.load(f) - #self._kitti_infos = kitti.filter_infos_by_used_classes(infos, class_names) + #with open(info_path, 'rb') as f: + # infos = pickle.load(f) self._root_path = root_path - self._kitti_infos = infos + self.argoverse_loader = ArgoverseTrackingLoader(root_path) + self.am = ArgoverseMap() + + self.inform = info_path self._num_point_features = num_point_features - print("remain number of infos:", len(self._kitti_infos)) + self.class_names = class_names + #print("remain number of infos:", len(self._kitti_infos)) # generate anchors cache # [352, 400] ret = target_assigner.generate_anchors(feature_map_size) @@ -51,18 +62,101 @@ def __init__(self, info_path, root_path, num_point_features, "matched_thresholds": matched_thresholds, "unmatched_thresholds": unmatched_thresholds, } + self._prep_func = partial(prep_func, anchor_cache=anchor_cache) def __len__(self): - return len(self._kitti_infos) + return len(self.inform["index_list"]) @property def kitti_infos(self): - return self._kitti_infos + ret = [] + for info in self.inform["index_list"]: + annos = {} + undr_scr = info.find('_') + seq = int(info[:undr_scr]) + frame = int(info[undr_scr+1:]) + argoverse_data = self.argoverse_loader[seq] + + ## converting to kitti camera coordinate system + objects = argoverse_data.get_label_object(frame) + + city_name = argoverse_data.city_name + city_to_egovehicle_se3 = argoverse_data.get_pose(frame) + am = self.am + + gt_location = [] + dims = [] + gt_rots = [] + gt_names = [] + gt_bbox = [] + gt_occ = [] + gt_trunc = [] + gt_alpha = [] + + for obj in objects: + if obj.label_class in self.class_names: + corners = obj.as_3d_bbox() + center = np.mean(corners,axis=0) + center[2] = corners[2,2] + + #if np.abs(center[0]) > 32 or np.abs(center[1]) > 32 : + # continue + + gt_location.append(center) + gt_names.append(obj.label_class) + quat = obj.quaternion + yaw,pitch,roll = quaternion_to_euler(quat) + if np.pi/2<= yaw <=np.pi: + yaw = 3*np.pi/2 -yaw + else: + yaw = -yaw-np.pi/2 + assert -np.pi <= yaw <= np.pi + + gt_rots.append(yaw) + dims.append([obj.length,obj.height,obj.width]) + gt_bbox.append([100,100,200,200]) + gt_occ.append(0) + gt_trunc.append(0) + gt_alpha.append(0) + + gt_location,dims,gt_rots,gt_names = np.array(gt_location), np.array(dims), np.array(gt_rots), np.array(gt_names) + gt_bbox, gt_occ, gt_trunc, gt_alpha = np.array(gt_bbox), np.array(gt_occ), np.array(gt_trunc) ,np.array(gt_alpha) + + roi_locations = copy.deepcopy(gt_location) + ''' + if self.inform["include_roi"] or self.inform["dr_area"]: + roi_locs = city_to_egovehicle_se3.transform_point_cloud(roi_locations) # put into city coords + + if self.inform["include_roi"]: + roi_locs_flag = am.remove_non_roi_points(roi_locs, city_name) # remove non-driveable region + + if not self.inform["include_roi"] and self.inform["dr_area"]: + roi_locs_flag = am.remove_non_driveable_area_points(roi_locs, city_name) # remove non-driveable region + + gt_location,dims,gt_rots,gt_names = ( gt_location[roi_locs_flag] , dims[roi_locs_flag] , + gt_rots[roi_locs_flag] , gt_names[roi_locs_flag] ) + + gt_bbox, gt_occ, gt_trunc, gt_alpha = ( gt_bbox[roi_locs_flag], gt_occ[roi_locs_flag], gt_trunc[roi_locs_flag], gt_alpha[roi_locs_flag] ) + ''' + gt_location[:,2] -= 1.73 + gt_location[:,[0,1,2]] = gt_location[:,[1,2,0]] + gt_location[:,[0,1]] = -gt_location[:,[0,1]] + + annos["name"], annos["location"], annos["dimensions"], annos["rotation_y"] = gt_names, gt_location, dims, gt_rots + annos["bbox"], annos["alpha"], annos["occluded"], annos["truncated"] = gt_bbox, gt_alpha, gt_occ, gt_trunc + ret.append(annos) + + return ret def __getitem__(self, idx): + info = { "index": self.inform["index_list"][idx], "road_map": self.inform["road_map"] , "include_roadmap": self.inform["include_roadmap"], "include_road_points": self.inform["include_road_points"] ,"include_roi": self.inform["include_roi"], + "dr_area": self.inform["dr_area"]} return _read_and_prep_v9( - info=self._kitti_infos[idx], + info=info, + class_names=self.class_names, root_path=self._root_path, num_point_features=self._num_point_features, + argoverse_loader = self.argoverse_loader, + argoverse_map = self.am, prep_func=self._prep_func) diff --git a/second/data/preprocess.py b/second/data/preprocess.py index 436f55dd..511efda0 100644 --- a/second/data/preprocess.py +++ b/second/data/preprocess.py @@ -11,6 +11,12 @@ from second.core.geometry import points_in_convex_polygon_3d_jit from second.core.point_cloud.bev_ops import points_to_bev from second.data import kitti_common as kitti +import argoverse +from argoverse.data_loading.argoverse_tracking_loader import ArgoverseTrackingLoader +from argoverse.map_representation.map_api import ArgoverseMap +import copy +import math +import torch def merge_second_batch(batch_list, _unused=False): @@ -41,6 +47,20 @@ def merge_second_batch(batch_list, _unused=False): ret[key] = np.stack(elems, axis=0) return ret +def quaternion_to_euler(quat): + w,x,y,z = quat + + t0 = +2.0 * (w * x + y * z) + t1 = +1.0 - 2.0 * (x * x + y * y) + roll = math.atan2(t0, t1) + t2 = +2.0 * (w * y - z * x) + t2 = +1.0 if t2 > +1.0 else t2 + t2 = -1.0 if t2 < -1.0 else t2 + pitch = math.asin(t2) + t3 = +2.0 * (w * z + x * y) + t4 = +1.0 - 2.0 * (y * y + z * z) + yaw = math.atan2(t3, t4) + return yaw, pitch, roll def prep_pointcloud(input_dict, root_path, @@ -48,7 +68,7 @@ def prep_pointcloud(input_dict, target_assigner, db_sampler=None, max_voxels=20000, - class_names=['Car'], + class_names=['PEDESTRIAN'], remove_outside_points=False, training=True, create_targets=True, @@ -79,113 +99,41 @@ def prep_pointcloud(input_dict, min_gt_point_dict=None, bev_only=False, use_group_id=False, - out_dtype=np.float32): + out_dtype=np.float32 + ): """convert point cloud to voxels, create targets if ground truths exists. """ points = input_dict["points"] + pc_range = voxel_generator.point_cloud_range + + pts_x, pts_y, pts_z = points[:, 0], points[:, 1], points[:, 2] + range_flag = ( (pts_x >= pc_range[0]) & (pts_x <= pc_range[3]) + & (pts_y >= pc_range[1]) & (pts_y <= pc_range[4]) + & (pts_z >= pc_range[2]) & (pts_z <= pc_range[5]) ) + + points = points[range_flag] + + if training: gt_boxes = input_dict["gt_boxes"] gt_names = input_dict["gt_names"] - difficulty = input_dict["difficulty"] + ## group_ids ? np.arange(num_gt,dtype=np.int32) num_gt - number of objects (of all categories) in annotated lidar frame + group_ids = None if use_group_id and "group_ids" in input_dict: group_ids = input_dict["group_ids"] - rect = input_dict["rect"] - Trv2c = input_dict["Trv2c"] - P2 = input_dict["P2"] - unlabeled_training = unlabeled_db_sampler is not None - image_idx = input_dict["image_idx"] - - if reference_detections is not None: - C, R, T = box_np_ops.projection_matrix_to_CRT_kitti(P2) - frustums = box_np_ops.get_frustum_v2(reference_detections, C) - frustums -= T - # frustums = np.linalg.inv(R) @ frustums.T - frustums = np.einsum('ij, akj->aki', np.linalg.inv(R), frustums) - frustums = box_np_ops.camera_to_lidar(frustums, rect, Trv2c) - surfaces = box_np_ops.corner_to_surfaces_3d_jit(frustums) - masks = points_in_convex_polygon_3d_jit(points, surfaces) - points = points[masks.any(-1)] - - if remove_outside_points and not lidar_input: - image_shape = input_dict["image_shape"] - points = box_np_ops.remove_outside_points(points, rect, Trv2c, P2, - image_shape) - if remove_environment is True and training: - selected = kitti.keep_arrays_by_name(gt_names, class_names) - gt_boxes = gt_boxes[selected] - gt_names = gt_names[selected] - difficulty = difficulty[selected] - if group_ids is not None: - group_ids = group_ids[selected] - points = prep.remove_points_outside_boxes(points, gt_boxes) - if training: - # print(gt_names) - selected = kitti.drop_arrays_by_name(gt_names, ["DontCare"]) - gt_boxes = gt_boxes[selected] - gt_names = gt_names[selected] - difficulty = difficulty[selected] - if group_ids is not None: - group_ids = group_ids[selected] - - gt_boxes = box_np_ops.box_camera_to_lidar(gt_boxes, rect, Trv2c) - if remove_unknown: - remove_mask = difficulty == -1 - """ - gt_boxes_remove = gt_boxes[remove_mask] - gt_boxes_remove[:, 3:6] += 0.25 - points = prep.remove_points_in_boxes(points, gt_boxes_remove) - """ - keep_mask = np.logical_not(remove_mask) - gt_boxes = gt_boxes[keep_mask] - gt_names = gt_names[keep_mask] - difficulty = difficulty[keep_mask] - if group_ids is not None: - group_ids = group_ids[keep_mask] + + + #unlabeled_training = unlabeled_db_sampler is not None + + if training: + + gt_boxes_mask = np.array( [n in class_names for n in gt_names], dtype=np.bool_) - if db_sampler is not None: - sampled_dict = db_sampler.sample_all( - root_path, - gt_boxes, - gt_names, - num_point_features, - random_crop, - gt_group_ids=group_ids, - rect=rect, - Trv2c=Trv2c, - P2=P2) - - if sampled_dict is not None: - sampled_gt_names = sampled_dict["gt_names"] - sampled_gt_boxes = sampled_dict["gt_boxes"] - sampled_points = sampled_dict["points"] - sampled_gt_masks = sampled_dict["gt_masks"] - # gt_names = gt_names[gt_boxes_mask].tolist() - gt_names = np.concatenate([gt_names, sampled_gt_names], axis=0) - # gt_names += [s["name"] for s in sampled] - gt_boxes = np.concatenate([gt_boxes, sampled_gt_boxes]) - gt_boxes_mask = np.concatenate( - [gt_boxes_mask, sampled_gt_masks], axis=0) - if group_ids is not None: - sampled_group_ids = sampled_dict["group_ids"] - group_ids = np.concatenate([group_ids, sampled_group_ids]) - - if remove_points_after_sample: - points = prep.remove_points_in_boxes( - points, sampled_gt_boxes) - - points = np.concatenate([sampled_points, points], axis=0) - # unlabeled_mask = np.zeros((gt_boxes.shape[0], ), dtype=np.bool_) - if without_reflectivity: - used_point_axes = list(range(num_point_features)) - used_point_axes.pop(3) - points = points[:, used_point_axes] - pc_range = voxel_generator.point_cloud_range - if bev_only: # set z and h to limits - gt_boxes[:, 2] = pc_range[2] - gt_boxes[:, 5] = pc_range[5] - pc_range[2] + #print(gt_boxes_mask.shape,gt_boxes.shape,"before") + prep.noise_per_object_v3_( gt_boxes, points, @@ -195,14 +143,23 @@ def prep_pointcloud(input_dict, global_random_rot_range=global_random_rot_range, group_ids=group_ids, num_try=100) + #print(gt_boxes_mask.shape,gt_boxes.shape,"after") + # should remove unrelated objects after noise per object gt_boxes = gt_boxes[gt_boxes_mask] gt_names = gt_names[gt_boxes_mask] + + if group_ids is not None: group_ids = group_ids[gt_boxes_mask] + + + gt_classes = np.array( [class_names.index(n) + 1 for n in gt_names], dtype=np.int32) - + + + #need to check the output gt_boxes, points = prep.random_flip(gt_boxes, points) gt_boxes, points = prep.global_rotation( gt_boxes, points, rotation=global_rotation_noise) @@ -211,7 +168,8 @@ def prep_pointcloud(input_dict, # Global translation gt_boxes, points = prep.global_translate(gt_boxes, points, global_loc_noise_std) - + + bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]] mask = prep.filter_gt_box_outside_range(gt_boxes, bv_range) gt_boxes = gt_boxes[mask] @@ -222,17 +180,16 @@ def prep_pointcloud(input_dict, # limit rad to [-pi, pi] gt_boxes[:, 6] = box_np_ops.limit_period( gt_boxes[:, 6], offset=0.5, period=2 * np.pi) + #assert -np.pi/2 <= g <= np.pi/2 if shuffle_points: # shuffle is a little slow. np.random.shuffle(points) - # [0, -40, -3, 70.4, 40, 1] voxel_size = voxel_generator.voxel_size pc_range = voxel_generator.point_cloud_range grid_size = voxel_generator.grid_size - # [352, 400] - + voxels, coordinates, num_points = voxel_generator.generate( points, max_voxels) @@ -242,11 +199,8 @@ def prep_pointcloud(input_dict, 'coordinates': coordinates, "num_voxels": np.array([voxels.shape[0]], dtype=np.int64) } - example.update({ - 'rect': rect, - 'Trv2c': Trv2c, - 'P2': P2, - }) + + # if not lidar_input: feature_map_size = grid_size[:2] // out_size_factor feature_map_size = [*feature_map_size, 1][::-1] @@ -264,8 +218,7 @@ def prep_pointcloud(input_dict, anchors_bv = box_np_ops.rbbox2d_to_near_bbox( anchors[:, [0, 1, 3, 4, 6]]) example["anchors"] = anchors - # print("debug", anchors.shape, matched_thresholds.shape) - # anchors_bv = anchors_bv.reshape([-1, 4]) + anchors_mask = None if anchor_area_threshold >= 0: coors = coordinates @@ -276,8 +229,8 @@ def prep_pointcloud(input_dict, anchors_area = box_np_ops.fused_get_anchors_area( dense_voxel_map, anchors_bv, voxel_size, pc_range, grid_size) anchors_mask = anchors_area > anchor_area_threshold - # example['anchors_mask'] = anchors_mask.astype(np.uint8) example['anchors_mask'] = anchors_mask + if generate_bev: bev_vxsize = voxel_size.copy() bev_vxsize[:2] /= 2 @@ -303,58 +256,120 @@ def prep_pointcloud(input_dict, return example -def _read_and_prep_v9(info, root_path, num_point_features, prep_func): +def _read_and_prep_v9(info, class_names,root_path, num_point_features, argoverse_loader ,argoverse_map,prep_func): """read data from KITTI-format infos, then call prep function. """ # velodyne_path = str(pathlib.Path(root_path) / info['velodyne_path']) # velodyne_path += '_reduced' - v_path = pathlib.Path(root_path) / info['velodyne_path'] - v_path = v_path.parent.parent / ( - v_path.parent.stem + "_reduced") / v_path.name + fr_seq = info["index"] + undr_scr = fr_seq.find('_') + seq = int(fr_seq[:undr_scr]) + frame = int(fr_seq[undr_scr+1:]) + argoverse_data = argoverse_loader[seq] + + ## converting to kitti camera coordinate system + ## all input points + points = argoverse_data.get_lidar(frame) + roi_pts = copy.deepcopy(points) + objects = argoverse_data.get_label_object(frame) - points = np.fromfile( - str(v_path), dtype=np.float32, - count=-1).reshape([-1, num_point_features]) - image_idx = info['image_idx'] - rect = info['calib/R0_rect'].astype(np.float32) - Trv2c = info['calib/Tr_velo_to_cam'].astype(np.float32) - P2 = info['calib/P2'].astype(np.float32) + city_name = argoverse_data.city_name + city_to_egovehicle_se3 = argoverse_data.get_pose(frame) + am = argoverse_map + + + + if info["include_roi"] or info["dr_area"] or not info["include_road_points"]: + roi_pts = city_to_egovehicle_se3.transform_point_cloud(roi_pts) # put into city coords + + if info["include_roi"]: + roi_pts_flag = am.remove_non_roi_points(roi_pts, city_name) # remove non-driveable region + roi_pts = roi_pts[roi_pts_flag] + + if not info["include_roi"] and info["dr_area"]: + roi_pts_flag = am.remove_non_driveable_area_points(roi_pts, city_name) # remove non-driveable region + roi_pts = roi_pts[roi_pts_flag] + + + if not info["include_road_points"]: + roi_pts = am.remove_ground_surface(roi_pts, city_name) # remove ground surface + # convert city to lidar co-ordinates + if info["include_roi"] or info["dr_area"] or not info["include_road_points"]: + roi_pts = city_to_egovehicle_se3.inverse_transform_point_cloud(roi_pts) + + + roi_pts[:,2] = roi_pts[:,2] - 1.73 + + + gt_location = [] + dims = [] + gt_rots = [] + gt_names = [] + + for obj in objects: + if obj.label_class in class_names: + corners = obj.as_3d_bbox() + center = np.mean(corners,axis=0) + center[2] = corners[2,2] + gt_location.append(center) + gt_names.append(obj.label_class) + quat = obj.quaternion + yaw,pitch,roll = quaternion_to_euler(quat) + if np.pi/2<= yaw <=np.pi: + yaw = 3*np.pi/2 -yaw + else: + yaw = -yaw-np.pi/2 + assert -np.pi <= yaw <= np.pi + + gt_rots.append(yaw) + dims.append([obj.width,obj.length,obj.height]) + + gt_location = np.array(gt_location) + + if info["include_roi"]: + roi_locs = city_to_egovehicle_se3.transform_point_cloud(gt_location) # put into city coords + #non roi + roi_locs_flag = am.remove_non_roi_points(roi_locs, city_name) # remove non-driveable region + + if not info["include_roi"] and info["dr_area"]: + roi_locs = city_to_egovehicle_se3.transform_point_cloud(gt_location) # put into city coords + #non roi + roi_locs_flag = am.remove_non_driveable_area_points(roi_locs, city_name) # remove non-driveable region + + + + dims = np.array(dims) + gt_rots = np.array( gt_rots ) + gt_location[:,2] -= 1.73 + gt_boxes = np.concatenate([gt_location, dims, gt_rots[..., np.newaxis]], axis=1).astype(np.float32) + gt_names = np.array(gt_names) + + if info["include_roi"] or info["dr_area"] : + gt_boxes = gt_boxes[roi_locs_flag] + gt_names = gt_names[roi_locs_flag] + input_dict = { - 'points': points, - 'rect': rect, - 'Trv2c': Trv2c, - 'P2': P2, - 'image_shape': np.array(info["img_shape"], dtype=np.int32), - 'image_idx': image_idx, - 'image_path': info['img_path'], - # 'pointcloud_num_features': num_point_features, + 'points': roi_pts, + 'pointcloud_num_features': num_point_features, + 'gt_boxes': gt_boxes, + 'gt_names': gt_names, } - if 'annos' in info: - annos = info['annos'] - # we need other objects to avoid collision when sample - annos = kitti.remove_dontcare(annos) - loc = annos["location"] - dims = annos["dimensions"] - rots = annos["rotation_y"] - gt_names = annos["name"] - # print(gt_names, len(loc)) - gt_boxes = np.concatenate( - [loc, dims, rots[..., np.newaxis]], axis=1).astype(np.float32) - # gt_boxes = box_np_ops.box_camera_to_lidar(gt_boxes, rect, Trv2c) - difficulty = annos["difficulty"] - input_dict.update({ - 'gt_boxes': gt_boxes, - 'gt_names': gt_names, - 'difficulty': difficulty, - }) + ''' if 'group_ids' in annos: input_dict['group_ids'] = annos["group_ids"] + ''' example = prep_func(input_dict=input_dict) - example["image_idx"] = image_idx - example["image_shape"] = input_dict["image_shape"] if "anchors_mask" in example: example["anchors_mask"] = example["anchors_mask"].astype(np.uint8) + example["image_idx"] = fr_seq + example["image_shape"] = np.array([400,400],dtype=np.int32) + if info["include_roadmap"]: + example["road_map"] = torch.load(info["road_map"] + fr_seq ) + else: + example["road_map"] = None + example["include_roadmap"] = info["include_roadmap"] + #torch.save(example,"./network_input_examples/" + info) return example diff --git a/second/get_tracking_result.py b/second/get_tracking_result.py new file mode 100644 index 00000000..8a569a6e --- /dev/null +++ b/second/get_tracking_result.py @@ -0,0 +1,92 @@ +import argoverse +from argoverse.data_loading.argoverse_tracking_loader import ArgoverseTrackingLoader +from argoverse.map_representation.map_api import ArgoverseMap +import pickle +import math +import numpy as np +import os +import argparse + +parser = argparse.ArgumentParser(description="arg parser") +parser.add_argument('--model_path',type=str,help='specify path where result file is stored.') +parser.add_argument('--sv_dir',type=str,help='specify path to store the results.') +parser.add_argument('--set',type=str,default='val',help='test/train/val') + +#parser.add_argument('--thresh',type=float,help='specify threshold for mAP evaluation') +#parser.add_argument('--det_thresh',type=float,help='specify detection threshold for filtering detections') +args = parser.parse_args() + + + +## NOTE : if file already exists, this code will append on file +## for storing in form of world coordinates +## second entry 1 for pedestrian, 2 for car + +#sv_dir = "./AB3DMOT/data/argo/car_3d_det_val_96/" + +def track_format(): + f = open(args.model_path,"rb") + result = pickle.load(f) + print(result[0].keys()) + + sv_dir = args.sv_dir + + if not os.path.exists(sv_dir): + os.mkdir(sv_dir) + else: + for fls in os.listdir(sv_dir): + os.remove(sv_dir+fls) + + save_path= "" + + root_dir = os.path.join('./../../argodataset/argoverse-tracking/', args.set) + argoverse_loader = ArgoverseTrackingLoader(root_dir) + + am = ArgoverseMap() + + for res_cnt,res in enumerate(result): + if len(res['image_idx'])==0: + continue + if res_cnt%100 == 0: + print(res_cnt) + fr_seq = res['image_idx'][0] + undr_scr = fr_seq.find('_') + seq = int(fr_seq[:undr_scr]) + frame = int(fr_seq[undr_scr+1:]) + argoverse_data = argoverse_loader[seq] + seq_id = argoverse_loader.log_list[seq] + #save_path = os.path.join("./point_pillars/second/sample_result_ab3dmot_format/",seq_id+".txt") + save_path = os.path.join(sv_dir,seq_id+".txt") + + city_name = argoverse_data.city_name + city_to_egovehicle_se3 = argoverse_data.get_pose(frame) + + file = "" + if not os.path.exists(save_path): + file = open(save_path,"w") + else: + file = open(save_path,"a") + + for i in range(len(res['name'])): + r_y = res['rotation_y'][i] + if r_y > np.pi : r_y -= 2*np.pi + if r_y < -np.pi : r_y += 2*np.pi + + x,y,z = res['location'][i][2], -res['location'][i][0], 1.73 - res['location'][i][1] + #print(z) + roi_pts = city_to_egovehicle_se3.transform_point_cloud(np.array([[x,y,z]])) # put into city coords + x,y,z = roi_pts[0] + x,y,z = -y,-z,x + + + file.write( str(frame) + ", 1 ," + str(res['bbox'][i][0]) + " , " + str(res['bbox'][i][1]) + " , " + + str(res['bbox'][i][2]) + " , " + str(res['bbox'][i][3]) + " , " + str(res['score'][i]) + " , " + + str(res['dimensions'][i][1]) + " , " + str(res['dimensions'][i][2]) + " , " + str(res['dimensions'][i][0]) + + " , " + str(x) + " , " + str(y) + " , " + + str(z) + " , " + str(r_y) + + " , " + str(res['alpha'][i]) + " " + str("\n") ) + file.close() + + +if __name__ == '__main__': + track_format() \ No newline at end of file diff --git a/second/ped_train.trch b/second/ped_train.trch new file mode 100644 index 00000000..cdbaa30c Binary files /dev/null and b/second/ped_train.trch differ diff --git a/second/ped_val.trch b/second/ped_val.trch new file mode 100644 index 00000000..ca8c769e Binary files /dev/null and b/second/ped_val.trch differ diff --git a/second/pp_inference.py b/second/pp_inference.py new file mode 100644 index 00000000..7b9e8e11 --- /dev/null +++ b/second/pp_inference.py @@ -0,0 +1,267 @@ +pth = '/users/gpu/gunji/.local/lib/python3.6/site-packages' +import sys +if pth in sys.path: + sys.path.remove(pth) + +print("pp_inference",sys.path) +import argoverse +from argoverse.data_loading.argoverse_tracking_loader import ArgoverseTrackingLoader +from argoverse.map_representation.map_api import ArgoverseMap +import pathlib +import shutil +from second.data.preprocess import merge_second_batch, prep_pointcloud +from pathlib import Path + +import numpy as np +import torch +import os + +import torchplus +from second.core import box_np_ops +from second.core.inference import InferenceContext +from second.builder import target_assigner_builder, voxel_builder +from second.pytorch.builder import box_coder_builder, second_builder +from second.pytorch.models.voxelnet import VoxelNet +from second.pytorch.train import predict_kitti_to_anno, example_convert_to_torch +import fire +import copy +import abc +import contextlib + +import numpy as np +from google.protobuf import text_format + +from second.data.preprocess import merge_second_batch, prep_pointcloud +from second.protos import pipeline_pb2 +from numba.errors import NumbaDeprecationWarning, NumbaWarning, NumbaPerformanceWarning +import warnings +import pickle +warnings.simplefilter('ignore', category=NumbaDeprecationWarning) +warnings.simplefilter('ignore', category=NumbaWarning) +warnings.simplefilter('ignore', category=NumbaPerformanceWarning) + + +import argparse +parser = argparse.ArgumentParser(description="arg parser") +parser.add_argument('--device', type=int, default=1, help='specify the gpu device for training') +parser.add_argument('--model_path', type=str, help='specify path to load model from.') +parser.add_argument('--save_path', type=str, help='specify path to save evaluation results.') +parser.add_argument('--config_path', type=str, help='config path') +parser.add_argument('--model_dir', type=str, help='model dir') +parser.add_argument('--include_roi',type=int,default=1) +parser.add_argument('--include_road_points',type=int,default=0) +parser.add_argument('--dr_area',type=int,default=0) +parser.add_argument('--set', default='val',type=str, help='val/test/train') + + + +args = parser.parse_args() +print(args.device) +torch.cuda.set_device(args.device) + + + +def test(config_path=args.config_path, + model_dir = args.model_dir, + result_path=None, + create_folder=False, + pickle_result=True, + include_roadmap=False, + device=1): + """train a VoxelNet model specified by a config file. + """ + if create_folder: + if pathlib.Path(model_dir).exists(): + model_dir = torchplus.train.create_folder(model_dir) + + model_dir = pathlib.Path(model_dir) + model_dir.mkdir(parents=True, exist_ok=True) + eval_checkpoint_dir = model_dir / 'eval_checkpoints' + eval_checkpoint_dir.mkdir(parents=True, exist_ok=True) + if result_path is None: + result_path = model_dir / 'results' + config_file_bkp = "pipeline.config" + config = pipeline_pb2.TrainEvalPipelineConfig() + with open(config_path, "r") as f: + proto_str = f.read() + text_format.Merge(proto_str, config) + shutil.copyfile(config_path, str(model_dir / config_file_bkp)) + input_cfg = config.train_input_reader + eval_input_cfg = config.eval_input_reader + model_cfg = config.model.second + train_cfg = config.train_config + batch_size = 1 + class_names = list(input_cfg.class_names) + ###################### + # BUILD VOXEL GENERATOR + ###################### + voxel_generator = voxel_builder.build(model_cfg.voxel_generator) + grid_size = voxel_generator.grid_size + ###################### + # BUILD TARGET ASSIGNER + ###################### + bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]] + box_coder = box_coder_builder.build(model_cfg.box_coder) + target_assigner_cfg = model_cfg.target_assigner + target_assigner = target_assigner_builder.build(target_assigner_cfg, + bv_range, box_coder) + ###################### + # BUILD NET + ###################### + center_limit_range = model_cfg.post_center_limit_range + net = second_builder.build(model_cfg, voxel_generator, target_assigner,include_roadmap) + net.cuda().eval() + + print("num_trainable parameters:", len(list(net.parameters()))) + # for n, p in net.named_parameters(): + # print(n, p.shape) + + #torchplus.train.try_restore_latest_checkpoints(model_dir, [net]) + torchplus.train.restore(args.model_path, net) + #torchplus.train.restore("./ped_models_56/voxelnet-275130.tckpt",net) + out_size_factor = model_cfg.rpn.layer_strides[0] / model_cfg.rpn.upsample_strides[0] + print(out_size_factor) + #out_size_factor *= model_cfg.middle_feature_extractor.downsample_factor + out_size_factor = int(out_size_factor) + feature_map_size = grid_size[:2] // out_size_factor + feature_map_size = [*feature_map_size, 1][::-1] + print(feature_map_size) + ret = target_assigner.generate_anchors(feature_map_size) + #anchors_dict = target_assigner.generate_anchors_dict(feature_map_size) + anchors = ret["anchors"] + anchors = anchors.reshape([-1, 7]) + matched_thresholds = ret["matched_thresholds"] + unmatched_thresholds = ret["unmatched_thresholds"] + anchors_bv = box_np_ops.rbbox2d_to_near_bbox( + anchors[:, [0, 1, 3, 4, 6]]) + anchor_cache = { + "anchors": anchors, + "anchors_bv": anchors_bv, + "matched_thresholds": matched_thresholds, + "unmatched_thresholds": unmatched_thresholds, + #"anchors_dict": anchors_dict, + } + + + am = ArgoverseMap() + dt_annos = [] + + root_dir = os.path.join('./../../argodataset/argoverse-tracking/',args.set) + argoverse_loader = ArgoverseTrackingLoader(root_dir) + + prog_cnt = 0 + for seq in range(len(argoverse_loader)): + argoverse_data = argoverse_loader[seq] + nlf = argoverse_data.num_lidar_frame + for frame in range(nlf): + prog_cnt += 1 + if prog_cnt%50 ==0: + print(prog_cnt) + points = argoverse_data.get_lidar(frame) + roi_pts = copy.deepcopy(points) + city_name = argoverse_data.city_name + city_to_egovehicle_se3 = argoverse_data.get_pose(frame) + + + ''' + roi_pts = city_to_egovehicle_se3.transform_point_cloud(roi_pts) # put into city coords + #non roi + roi_pts_flag = am.remove_non_roi_points(roi_pts, city_name) # remove non-driveable region + roi_pts = roi_pts[roi_pts_flag] + roi_pts = am.remove_ground_surface(roi_pts, city_name) # remove ground surface + + # convert city to lidar co-ordinates + + roi_pts = city_to_egovehicle_se3.inverse_transform_point_cloud(roi_pts) + ''' + if args.include_roi or args.dr_area or not args.include_road_points: + roi_pts = city_to_egovehicle_se3.transform_point_cloud(roi_pts) # put into city coords + + if args.include_roi: + roi_pts_flag = am.remove_non_roi_points(roi_pts, city_name) # remove non-driveable region + roi_pts = roi_pts[roi_pts_flag] + + if not args.include_roi and args.dr_area: + roi_pts_flag = am.remove_non_driveable_area_points(roi_pts, city_name) # remove non-driveable region + roi_pts = roi_pts[roi_pts_flag] + + + if not args.include_road_points: + roi_pts = am.remove_ground_surface(roi_pts, city_name) # remove ground surface + + # convert city to lidar co-ordinates + if args.include_roi or args.dr_area or not args.include_road_points: + roi_pts = city_to_egovehicle_se3.inverse_transform_point_cloud(roi_pts) + + + + + roi_pts[:,2] = roi_pts[:,2] - 1.73 + + pts_x, pts_y, pts_z = roi_pts[:,0], roi_pts[:,1], roi_pts[:,2] + + + input_dict = { + 'points': roi_pts, + 'pointcloud_num_features': 3, + } + + out_size_factor = model_cfg.rpn.layer_strides[0] // model_cfg.rpn.upsample_strides[0] + + + example = prep_pointcloud( + input_dict=input_dict, + root_path=None, + voxel_generator=voxel_generator, + target_assigner=target_assigner, + max_voxels=input_cfg.max_number_of_voxels, + class_names=list(input_cfg.class_names), + training=False, + create_targets=False, + shuffle_points=input_cfg.shuffle_points, + generate_bev=False, + without_reflectivity=model_cfg.without_reflectivity, + num_point_features=model_cfg.num_point_features, + anchor_area_threshold=input_cfg.anchor_area_threshold, + anchor_cache=anchor_cache, + out_size_factor=out_size_factor, + out_dtype=np.float32) + + + + if "anchors_mask" in example: + example["anchors_mask"] = example["anchors_mask"].astype(np.uint8) + example["image_idx"] = str(seq) + "_" + str(frame) + example["image_shape"] = np.array([400,400],dtype=np.int32) + example["road_map"] = None + example["include_roadmap"] = False + example["points"] = roi_pts + #torch.save(example,"./network_input_examples/" + info) + example = merge_second_batch([example]) + + example_torch = example_convert_to_torch(example,device=args.device) + try: + result_annos = predict_kitti_to_anno( + net, example_torch, input_cfg.class_names, + model_cfg.post_center_limit_range, model_cfg.lidar_input) + except: + print(seq,frame) + continue + dt_annos += result_annos + + if pickle_result: + sdi = args.save_path.rfind('/') + save_dir = args.save_path[:sdi] + if not os.path.exists(save_dir): + os.mkdir(save_dir) + + with open( args.save_path, 'wb') as f: + pickle.dump(dt_annos, f) + + +if __name__ == '__main__': + test() + + + + \ No newline at end of file diff --git a/second/protos/input_reader_pb2.py b/second/protos/input_reader_pb2.py index 8f831b1a..6ffdd530 100644 --- a/second/protos/input_reader_pb2.py +++ b/second/protos/input_reader_pb2.py @@ -224,7 +224,8 @@ has_default_value=False, default_value=None, message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), + serialized_options=None, file=DESCRIPTOR), + ], extensions=[ ], diff --git a/second/pytorch/builder/input_reader_builder.py b/second/pytorch/builder/input_reader_builder.py index a9345808..9b293de4 100644 --- a/second/pytorch/builder/input_reader_builder.py +++ b/second/pytorch/builder/input_reader_builder.py @@ -48,6 +48,7 @@ def dataset(self): def build(input_reader_config, model_config, + options, training, voxel_generator, target_assigner=None) -> DatasetWrapper: @@ -66,7 +67,6 @@ def build(input_reader_config, if not isinstance(input_reader_config, input_reader_pb2.InputReader): raise ValueError('input_reader_config not of type ' 'input_reader_pb2.InputReader.') - dataset = dataset_builder.build(input_reader_config, model_config, - training, voxel_generator, target_assigner) + dataset = dataset_builder.build(input_reader_config, model_config,options,training, voxel_generator, target_assigner) dataset = DatasetWrapper(dataset) return dataset diff --git a/second/pytorch/builder/second_builder.py b/second/pytorch/builder/second_builder.py index 20d0d4bf..b22b5878 100644 --- a/second/pytorch/builder/second_builder.py +++ b/second/pytorch/builder/second_builder.py @@ -21,7 +21,7 @@ def build(model_cfg: second_pb2.VoxelNet, voxel_generator, - target_assigner) -> VoxelNet: + target_assigner,include_roadmap=False) -> VoxelNet: """build second pytorch instance. """ if not isinstance(model_cfg, second_pb2.VoxelNet): @@ -50,6 +50,7 @@ def build(model_cfg: second_pb2.VoxelNet, voxel_generator, direction_loss_weight = model_cfg.direction_loss_weight net = VoxelNet( + include_roadmap, dense_shape, num_class=num_class, vfe_class_name=model_cfg.voxel_feature_extractor.module_class_name, diff --git a/second/pytorch/core/box_torch_ops.py b/second/pytorch/core/box_torch_ops.py index 0c92f57b..1fcfa4ea 100644 --- a/second/pytorch/core/box_torch_ops.py +++ b/second/pytorch/core/box_torch_ops.py @@ -452,11 +452,12 @@ def nms(bboxes, scores, indices = torch.topk(scores, k=pre_max_size) bboxes = bboxes[indices] dets = torch.cat([bboxes, scores.unsqueeze(-1)], dim=1) + device = dets.get_device() dets_np = dets.data.cpu().numpy() if len(dets_np) == 0: keep = np.array([], dtype=np.int64) else: - ret = np.array(nms_gpu(dets_np, iou_threshold), dtype=np.int64) + ret = np.array(nms_gpu(dets_np, iou_threshold,device), dtype=np.int64) keep = ret[:post_max_size] if keep.shape[0] == 0: return None diff --git a/second/pytorch/inference.py b/second/pytorch/inference.py index 4e1e38d6..4d8406ee 100644 --- a/second/pytorch/inference.py +++ b/second/pytorch/inference.py @@ -12,6 +12,7 @@ from second.pytorch.train import predict_kitti_to_anno, example_convert_to_torch + class TorchInferenceContext(InferenceContext): def __init__(self): super().__init__() diff --git a/second/pytorch/models/pointpillars.py b/second/pytorch/models/pointpillars.py index 6d975d1e..a79be695 100644 --- a/second/pytorch/models/pointpillars.py +++ b/second/pytorch/models/pointpillars.py @@ -11,6 +11,7 @@ from second.pytorch.utils import get_paddings_indicator from torchplus.nn import Empty from torchplus.tools import change_default_args +import numpy as np class PFNLayer(nn.Module): @@ -133,7 +134,8 @@ def forward(self, features, num_voxels, coors): mask = get_paddings_indicator(num_voxels, voxel_count, axis=0) mask = torch.unsqueeze(mask, -1).type_as(features) features *= mask - + + #print(features.shape,num_voxels.shape,torch.min(num_voxels),torch.max(num_voxels)) # Forward pass through PFNLayers for pfn in self.pfn_layers: features = pfn(features) @@ -160,15 +162,16 @@ def __init__(self, self.nx = output_shape[3] self.nchannels = num_input_features - def forward(self, voxel_features, coords, batch_size): + def forward(self, voxel_features, coords, batch_size,road_map,include_roadmap): # batch_canvas will be the final output. batch_canvas = [] for batch_itt in range(batch_size): # Create the canvas for this sample - canvas = torch.zeros(self.nchannels, self.nx * self.ny, dtype=voxel_features.dtype, + canvas = torch.zeros(self.nchannels + int(include_roadmap), self.nx * self.ny, dtype=voxel_features.dtype, device=voxel_features.device) - + #print(road_map.shape) + road_mask = road_map[batch_itt] # Only include non-empty pillars batch_mask = coords[:, 0] == batch_itt this_coords = coords[batch_mask, :] @@ -178,8 +181,9 @@ def forward(self, voxel_features, coords, batch_size): voxels = voxels.t() # Now scatter the blob back to the canvas. - canvas[:, indices] = voxels - + canvas[:self.nchannels, indices] = voxels + if include_roadmap: + canvas[self.nchannels] = torch.from_numpy(np.reshape(road_mask,-1)).float().to('cuda') # Append to a list for later stacking. batch_canvas.append(canvas) @@ -187,6 +191,7 @@ def forward(self, voxel_features, coords, batch_size): batch_canvas = torch.stack(batch_canvas, 0) # Undo the column stacking to final 4-dim tensor - batch_canvas = batch_canvas.view(batch_size, self.nchannels, self.ny, self.nx) + batch_canvas = batch_canvas.view(batch_size, self.nchannels + int(include_roadmap), self.ny, self.nx) + #print(batch_canvas.shape) return batch_canvas diff --git a/second/pytorch/models/voxelnet.py b/second/pytorch/models/voxelnet.py index 3e496727..05de2460 100644 --- a/second/pytorch/models/voxelnet.py +++ b/second/pytorch/models/voxelnet.py @@ -3,7 +3,7 @@ from functools import reduce import numpy as np -import sparseconvnet as scn +#import sparseconvnet as scn import torch from torch import nn from torch.nn import functional as F @@ -188,7 +188,7 @@ def forward(self, features, num_voxels, coors): voxelwise = torch.max(features, dim=1)[0] return voxelwise - +''' class SparseMiddleExtractor(nn.Module): def __init__(self, output_shape, @@ -258,7 +258,7 @@ def forward(self, voxel_features, coors, batch_size): N, C, D, H, W = ret.shape ret = ret.view(N, C * D, H, W) return ret - +''' class ZeroPad3d(nn.ConstantPad3d): def __init__(self, padding): @@ -314,6 +314,7 @@ def forward(self, voxel_features, coors, batch_size): class RPN(nn.Module): def __init__(self, + include_roadmap, use_norm=True, num_class=2, layer_nums=[3, 5, 5], @@ -379,7 +380,7 @@ def __init__(self, self.block1 = Sequential( nn.ZeroPad2d(1), Conv2d( - num_input_filters, num_filters[0], 3, stride=layer_strides[0]), + num_input_filters + int(include_roadmap), num_filters[0], 3, stride=layer_strides[0]), BatchNorm2d(num_filters[0]), nn.ReLU(), ) @@ -473,6 +474,8 @@ def forward(self, x, bev=None): "box_preds": box_preds, "cls_preds": cls_preds, } + #if not self.training: + # ret_dict["final_tensor"] = x.data().cpu() if self._use_direction_classifier: dir_cls_preds = self.conv_dir_cls(x) dir_cls_preds = dir_cls_preds.permute(0, 2, 3, 1).contiguous() @@ -488,6 +491,7 @@ class LossNormType(Enum): class VoxelNet(nn.Module): def __init__(self, + include_roadmap, output_shape, num_class=2, num_input_features=4, @@ -616,6 +620,7 @@ def __init__(self, } rpn_class = rpn_class_dict[rpn_class_name] self.rpn = rpn_class( + include_roadmap, use_norm=True, num_class=num_class, layer_nums=rpn_layer_nums, @@ -670,7 +675,7 @@ def forward(self, example): preds_dict = self.sparse_rpn(voxel_features, coors, batch_size_dev) else: spatial_features = self.middle_feature_extractor( - voxel_features, coors, batch_size_dev) + voxel_features, coors, batch_size_dev,example["road_map"],example["include_roadmap"]) if self._use_bev: preds_dict = self.rpn(spatial_features, example["bev_map"]) else: @@ -748,9 +753,9 @@ def predict(self, example, preds_dict): batch_anchors = example["anchors"].view(batch_size, -1, 7) self._total_inference_count += batch_size - batch_rect = example["rect"] - batch_Trv2c = example["Trv2c"] - batch_P2 = example["P2"] + #batch_rect = example["rect"] + #batch_Trv2c = example["Trv2c"] + #batch_P2 = example["P2"] if "anchors_mask" not in example: batch_anchors_mask = [None] * batch_size else: @@ -778,9 +783,8 @@ def predict(self, example, preds_dict): batch_dir_preds = [None] * batch_size predictions_dicts = [] - for box_preds, cls_preds, dir_preds, rect, Trv2c, P2, img_idx, a_mask in zip( - batch_box_preds, batch_cls_preds, batch_dir_preds, batch_rect, - batch_Trv2c, batch_P2, batch_imgidx, batch_anchors_mask + for box_preds, cls_preds, dir_preds,img_idx, a_mask in zip( + batch_box_preds, batch_cls_preds, batch_dir_preds, batch_imgidx, batch_anchors_mask ): if a_mask is not None: box_preds = box_preds[a_mask] @@ -918,6 +922,8 @@ def predict(self, example, preds_dict): final_box_preds = box_preds final_scores = scores final_labels = label_preds + + ''' final_box_preds_camera = box_torch_ops.box_lidar_to_camera( final_box_preds, rect, Trv2c) locs = final_box_preds_camera[:, :3] @@ -938,6 +944,15 @@ def predict(self, example, preds_dict): # box_2d_preds = torch.stack([minx, miny, maxx, maxy], dim=1) box_2d_preds = torch.cat([minxy, maxxy], dim=1) # predictions + ''' + box_2d_preds = [100,100,200,200]*final_box_preds.shape[0] + box_2d_preds = torch.from_numpy(np.reshape(box_2d_preds,(-1,4))).float().to('cuda') + final_box_preds_camera = torch.zeros(final_box_preds.shape).float().to('cuda') + final_box_preds_camera[:] = final_box_preds.detach()[:] + final_box_preds_camera[:,[0,1,2]] = final_box_preds_camera[:,[1,2,0]] + final_box_preds_camera[:,[0,1]] = -final_box_preds_camera[:,[0,1]] + final_box_preds_camera[:,[3,4,5]] = final_box_preds_camera[:,[4,5,3]] + predictions_dict = { "bbox": box_2d_preds, "box3d_camera": final_box_preds_camera, diff --git a/second/pytorch/train.py b/second/pytorch/train.py index 4fcf0902..df0ea39e 100644 --- a/second/pytorch/train.py +++ b/second/pytorch/train.py @@ -1,10 +1,10 @@ +import sys import os import pathlib import pickle import shutil import time from functools import partial - import fire import numpy as np import torch @@ -21,7 +21,22 @@ second_builder) from second.utils.eval import get_coco_eval_result, get_official_eval_result from second.utils.progress_bar import ProgressBar +from numba.errors import NumbaDeprecationWarning, NumbaWarning, NumbaPerformanceWarning +import warnings + +warnings.simplefilter('ignore', category=NumbaDeprecationWarning) +warnings.simplefilter('ignore', category=NumbaWarning) +warnings.simplefilter('ignore', category=NumbaPerformanceWarning) + +import argparse +parser = argparse.ArgumentParser(description="arg parser") +parser.add_argument('--device', type=int, default=1, help='specify the gpu device for training') +#parser.add_argument('--config_path', type=str, default="abc", help='specify the gpu device for training') +#parser.add_argument('--model_dir', type=str, default="def", help='specify the gpu device for training') +#parser.add_argument('--include_roadmap', type=bool, default=False, help='specify the gpu device for training') +args,unknown = parser.parse_known_args() +torch.cuda.set_device(args.device) def _get_pos_neg_loss(cls_loss, labels): # cls_loss: [N, num_anchors, num_class] @@ -62,7 +77,7 @@ def flat_nested_json_dict(json_dict, sep=".") -> dict: def example_convert_to_torch(example, dtype=torch.float32, device=None) -> dict: - device = device or torch.device("cuda:0") + device = device or torch.device("cuda:"+str(args.device)) example_torch = {} float_names = [ "voxels", "anchors", "reg_targets", "reg_weights", "bev_map", "rect", @@ -89,7 +104,12 @@ def train(config_path, create_folder=False, display_step=50, summary_step=5, - pickle_result=True): + pickle_result=True, + include_roadmap=False, + dr_area = False, + include_roi=True, + include_road_points=False, + device=1): """train a VoxelNet model specified by a config file. """ if create_folder: @@ -130,9 +150,9 @@ def train(config_path, # BUILD NET ###################### center_limit_range = model_cfg.post_center_limit_range - net = second_builder.build(model_cfg, voxel_generator, target_assigner) + net = second_builder.build(model_cfg, voxel_generator, target_assigner,include_roadmap) net.cuda() - # net_train = torch.nn.DataParallel(net).cuda() + #net = torch.nn.DataParallel(net).cuda() print("num_trainable parameters:", len(list(net.parameters()))) # for n, p in net.named_parameters(): # print(n, p.shape) @@ -165,16 +185,25 @@ def train(config_path, ###################### # PREPARE INPUT ###################### + options = {} + options["include_roadmap"] = include_roadmap + options["include_roi"] = include_roi + options["dr_area"] = dr_area + options["include_road_points"] = include_road_points + #print(options) + dataset = input_reader_builder.build( input_cfg, model_cfg, + options, training=True, voxel_generator=voxel_generator, target_assigner=target_assigner) eval_dataset = input_reader_builder.build( eval_input_cfg, model_cfg, + options , training=False, voxel_generator=voxel_generator, target_assigner=target_assigner) @@ -243,7 +272,6 @@ def _worker_init_fn(worker_id): example_torch = example_convert_to_torch(example, float_dtype) batch_size = example["anchors"].shape[0] - ret_dict = net(example_torch) # box_preds = ret_dict["box_preds"] @@ -305,7 +333,7 @@ def _worker_init_fn(worker_id): metrics["num_anchors"] = int(num_anchors) metrics["lr"] = float( mixed_optimizer.param_groups[0]['lr']) - metrics["image_idx"] = example['image_idx'][0] + #metrics["image_idx"] = example['image_idx'][0] flatted_metrics = flat_nested_json_dict(metrics) flatted_summarys = flat_nested_json_dict(metrics, "/") for k, v in flatted_summarys.items(): @@ -331,15 +359,15 @@ def _worker_init_fn(worker_id): print(log_str) ckpt_elasped_time = time.time() - ckpt_start_time if ckpt_elasped_time > train_cfg.save_checkpoints_secs: - torchplus.train.save_models(model_dir, [net, optimizer], - net.get_global_step()) + # torchplus.train.save_models(model_dir, [net, optimizer], + # net.get_global_step()) ckpt_start_time = time.time() total_step_elapsed += steps torchplus.train.save_models(model_dir, [net, optimizer], net.get_global_step()) # Ensure that all evaluation points are saved forever - torchplus.train.save_models(eval_checkpoint_dir, [net, optimizer], net.get_global_step(), max_to_keep=100) + #torchplus.train.save_models(eval_checkpoint_dir, [net, optimizer], net.get_global_step(), max_to_keep=100) net.eval() result_path_step = result_path / f"step_{net.get_global_step()}" @@ -381,10 +409,11 @@ def _worker_init_fn(worker_id): f'generate label finished({sec_per_ex:.2f}/s). start eval:', file=logf) gt_annos = [ - info["annos"] for info in eval_dataset.dataset.kitti_infos + info for info in eval_dataset.dataset.kitti_infos ] if not pickle_result: dt_annos = kitti.get_label_annos(result_path_step) + #print("dt_annos",dt_annos) result, mAPbbox, mAPbev, mAP3d, mAPaos = get_official_eval_result(gt_annos, dt_annos, class_names, return_data=True) print(result, file=logf) @@ -394,10 +423,10 @@ def _worker_init_fn(worker_id): for i, class_name in enumerate(class_names): writer.add_scalar('bev_ap:{}'.format(class_name), mAPbev[i, 1, 0], global_step) writer.add_scalar('3d_ap:{}'.format(class_name), mAP3d[i, 1, 0], global_step) - writer.add_scalar('aos_ap:{}'.format(class_name), mAPaos[i, 1, 0], global_step) + #writer.add_scalar('aos_ap:{}'.format(class_name), mAPaos[i, 1, 0], global_step) writer.add_scalar('bev_map', np.mean(mAPbev[:, 1, 0]), global_step) writer.add_scalar('3d_map', np.mean(mAP3d[:, 1, 0]), global_step) - writer.add_scalar('aos_map', np.mean(mAPaos[:, 1, 0]), global_step) + #writer.add_scalar('aos_map', np.mean(mAPaos[:, 1, 0]), global_step) result = get_coco_eval_result(gt_annos, dt_annos, class_names) print(result, file=logf) @@ -471,7 +500,7 @@ def _predict_kitti_to_file(net, result_lines.append(result_line) else: result_lines = [] - result_file = f"{result_save_path}/{kitti.get_image_index_str(img_idx)}.txt" + result_file = f"{result_save_path}/{img_idx}.txt" result_str = '\n'.join(result_lines) with open(result_file, 'w') as f: f.write(result_str) @@ -504,17 +533,21 @@ def predict_kitti_to_anno(net, for box, box_lidar, bbox, score, label in zip( box_preds, box_preds_lidar, box_2d_preds, scores, label_preds): + ''' if not lidar_input: if bbox[0] > image_shape[1] or bbox[1] > image_shape[0]: continue if bbox[2] < 0 or bbox[3] < 0: continue + # print(img_shape) + if center_limit_range is not None: limit_range = np.array(center_limit_range) if (np.any(box_lidar[:3] < limit_range[:3]) or np.any(box_lidar[:3] > limit_range[3:])): continue + ''' bbox[2:] = np.minimum(bbox[2:], image_shape[::-1]) bbox[:2] = np.maximum(bbox[:2], [0, 0]) anno["name"].append(class_names[int(label)]) @@ -545,7 +578,7 @@ def predict_kitti_to_anno(net, annos.append(kitti.empty_result_anno()) num_example = annos[-1]["name"].shape[0] annos[-1]["image_idx"] = np.array( - [img_idx] * num_example, dtype=np.int64) + [img_idx] * num_example) return annos @@ -555,7 +588,10 @@ def evaluate(config_path, predict_test=False, ckpt_path=None, ref_detfile=None, - pickle_result=True): + pickle_result=True, + include_roadmap=False, + device=1): + print(config_path) model_dir = pathlib.Path(model_dir) if predict_test: result_name = 'predict_test' @@ -600,6 +636,7 @@ def evaluate(config_path, eval_dataset = input_reader_builder.build( input_cfg, model_cfg, + include_roadmap, training=False, voxel_generator=voxel_generator, target_assigner=target_assigner) @@ -643,9 +680,10 @@ def evaluate(config_path, print(f"avg forward time per example: {net.avg_forward_time:.3f}") print(f"avg postprocess time per example: {net.avg_postprocess_time:.3f}") if not predict_test: - gt_annos = [info["annos"] for info in eval_dataset.dataset.kitti_infos] + gt_annos = [info for info in eval_dataset.dataset.kitti_infos] if not pickle_result: dt_annos = kitti.get_label_annos(result_path_step) + #print(dt_annos) result = get_official_eval_result(gt_annos, dt_annos, class_names) print(result) result = get_coco_eval_result(gt_annos, dt_annos, class_names) diff --git a/second/pytorch/utils.py b/second/pytorch/utils.py index e49cbadc..fddd117b 100644 --- a/second/pytorch/utils.py +++ b/second/pytorch/utils.py @@ -11,7 +11,6 @@ def get_paddings_indicator(actual_num, max_num, axis=0): Returns: [type]: [description] """ - actual_num = torch.unsqueeze(actual_num, axis + 1) # tiled_actual_num: [N, M, 1] max_num_shape = [1] * len(actual_num.shape) diff --git a/second/result_vis.py b/second/result_vis.py new file mode 100644 index 00000000..d51560c2 --- /dev/null +++ b/second/result_vis.py @@ -0,0 +1,205 @@ +import json +import cv2 +import argoverse +from argoverse.data_loading.argoverse_tracking_loader import ArgoverseTrackingLoader + +import os +import os.path +import numpy as np +import time +import torch +from torch.utils.data import Dataset, DataLoader +from torchvision import transforms +import math +import pickle +import sys +from pprint import pprint +import operator +import time + +from argoverse.map_representation.map_api import ArgoverseMap +from shapely.geometry.polygon import Polygon +import numpy as np +import matplotlib.pyplot as plt +#%matplotlib inline +am = ArgoverseMap() + +def quaternion_to_euler(quat): + w,x,y,z = quat + + t0 = +2.0 * (w * x + y * z) + t1 = +1.0 - 2.0 * (x * x + y * y) + roll = math.atan2(t0, t1) + t2 = +2.0 * (w * y - z * x) + t2 = +1.0 if t2 > +1.0 else t2 + t2 = -1.0 if t2 < -1.0 else t2 + pitch = math.asin(t2) + t3 = +2.0 * (w * z + x * y) + t4 = +1.0 - 2.0 * (y * y + z * z) + yaw = math.atan2(t3, t4) + return yaw, pitch, roll + +def center_to_argo_2d_box(location,dimensions,yaw): + crnrs = [] + for i in range(len(location)): + x,y,z = location[i] + l,h,w = dimensions[i] + corners = np.array([ [l/2,w/2],[l/2,-w/2],[-l/2,-w/2],[-l/2,w/2] ]) + + rot_mat = np.array( [[ np.cos(yaw[i]) ,-np.sin(yaw[i]) ], + [np.sin(yaw[i]), np.cos(yaw[i])] ] ) + rot_corners = np.dot(rot_mat,corners.T).T + rot_corners = rot_corners + [x,y] + crnrs.append(rot_corners) + return crnrs + + +track_id = {} +gt_track_id = {} +cnt,gt_cnt = 1,1 +argoverse_loader = ArgoverseTrackingLoader("./../../argodataset/argoverse-tracking/val") + +for seq in range(len(argoverse_loader)): + argoverse_data = argoverse_loader[seq] + seq_name = argoverse_loader.log_list[seq] + print(seq_name) + nlf = argoverse_data.num_lidar_frame + gt_to_pred = {} + cur_time = time.time() + for frame in range(nlf): + points = argoverse_data.get_lidar(frame) + gt_objects = argoverse_data.get_label_object(frame) + + + points[:,[0,1,2]] = points[:,[1,2,0]] + points[:,[0,1]] = -points[:,[0,1]] + + bev = np.ones((5000,5000,3)) + for pt in points: + x,z = int(np.ceil((pt[0]+250)/0.1)), int(np.ceil((pt[2]+250)/0.1)) + #if 0<=x<1200 and 0<=z<1200: + bev[x,z] = 0 + + + lt = argoverse_data.lidar_timestamp_list[frame] + path = "./../../AB3DMOT/argo_results/val_final757k_t25.0_a22_h3_ioudot10.0_ka3_v0.3_d2.0/"+seq_name +"/per_sweep_annotations_amodal/tracked_object_labels_" + str(lt) + ".json" + + + print(time.time()-cur_time) + cur_time = time.time() + objects = json.load(open(path,'r')) + save_corners = [] + + obj_centers,obj_ids,obj_corners = [],[],[] + + tp_cnt,fp_cnt,fn_cnt,sw_cnt = 0,0,0,0 + for obj in objects: + if obj['label_class'] == "VEHICLE": + + center = [[ obj['center']['x'], obj['center']['y'], obj['center']['z'] ]] + dimensions = [[obj['length'],obj['height'],obj['width']]] + quat = (obj['rotation']['w'], obj['rotation']['x'], obj['rotation']['y'], obj['rotation']['z']) + + yaw,pitch,roll = quaternion_to_euler(quat) + corners = center_to_argo_2d_box(center,dimensions,[yaw])[0] + corners = corners[:,[0,1]] + corners[:,[1]] = -corners[:,[1]] + + if obj['track_label_uuid'] not in track_id: + track_id[obj['track_label_uuid']] = cnt + if cnt==3: + print(obj['track_label_uuid']) + cnt += 1 + + obj_centers.append(center[0]) + obj_ids.append(track_id[ obj['track_label_uuid'] ] ) + obj_corners.append(corners) + + + obj_centers = np.array(obj_centers) + mark_obj = [False]*len(obj_centers) + for gt_obj in gt_objects: + if gt_obj.label_class == "VEHICLE": + points = gt_obj.as_3d_bbox() + centroid = np.reshape(np.mean(points,axis=0), (1,3) ) + test_vec = (centroid - obj_centers) + #print(centroid.shape,obj_centers.shape,test_vec.shape) + #print(centroid,obj_centers,test_vec) + l2_dist = np.linalg.norm(test_vec,axis=1) + closest_det,closest_id = np.min( l2_dist),np.argmin(l2_dist) + #print(closest_det,closest_id,obj_ids[closest_id],centroid, test_vec[closest_id]) + #print(dir(gt_obj)) + + if gt_obj.track_id not in gt_track_id: + gt_track_id[gt_obj.track_id] = gt_cnt + gt_cnt += 1 + + + + corners = gt_obj.as_2d_bbox() + corners = corners[:,[0,1]] + corners[:,[1]] = -corners[:,[1]] + corners[[2,3],:] = corners[[3,2],:] + + save_corners.append((corners,yaw)) + corners = (corners +[250,250])/0.1 + xp,yp = np.mean(corners,axis=0) + font = cv2.FONT_HERSHEY_SIMPLEX + bottomLeftCornerOfText = (int(xp-30),int(yp-10)) + fontScale = 1.2 + fontColor = (0,0,255) + lineType = 8 + + if closest_det<2 and not mark_obj[closest_id]: + if gt_obj.track_id not in gt_to_pred: + gt_to_pred[ gt_obj.track_id ] = obj_ids[closest_id] + + mark_obj[closest_id] = True + + if gt_to_pred[gt_obj.track_id] != obj_ids[closest_id]: + sw_cnt += 1 + cv2.polylines(bev, np.int32([corners]), True, (255, 255, 0), 2) + cv2.putText(bev,f"SW", bottomLeftCornerOfText, font, fontScale,fontColor,lineType) + else: + tp_cnt+=1 + cv2.putText(bev,f"TP", bottomLeftCornerOfText, font, fontScale,fontColor,lineType) + cv2.polylines(bev, np.int32([corners]), True, (255, 0, 0), 2) + + else: + fn_cnt+=1 + cv2.putText(bev,f"FN", bottomLeftCornerOfText, font, fontScale,fontColor,lineType) + cv2.polylines(bev, np.int32([corners]), True, (0, 0, 255), 2) + + + #if not all(mark_obj): + # print("yes",seq,frame) + for fp_id,tps in enumerate(mark_obj): + if not tps: + fp_cnt += 1 + fp_corners = obj_corners[fp_id] + fp_corners = (fp_corners +[250,250])/0.1 + xp,yp = np.mean(fp_corners,axis=0) + cv2.polylines(bev, np.int32([fp_corners]), True, (0, 255, 0), 2) + font = cv2.FONT_HERSHEY_SIMPLEX + bottomLeftCornerOfText = (int(xp-30),int(yp-10)) + fontScale = 1.2 + fontColor = (0,0,255) + lineType = 8 + + cv2.putText(bev,f"FP", bottomLeftCornerOfText, font, fontScale,fontColor,lineType) + + fontScale=1.5 + cv2.putText(bev,f"TP:{int(tp_cnt)}",(10,100), font, fontScale,(0,0,0),lineType) + cv2.putText(bev,f"FN:{fn_cnt}", (10,150), font, fontScale,(0,0,0),lineType) + cv2.putText(bev,f"FP:{fp_cnt}", (10,200), font, fontScale,(0,0,0),lineType) + cv2.putText(bev,f"SW:{sw_cnt}", (10,250), font, fontScale,(0,0,0),lineType) + + + plt.figure(figsize=(20,20)) + plt.imshow(bev) + if not os.path.exists(f"./track_analysis/{seq_name}/"): + os.mkdir(f"./track_analysis/{seq_name}/") + plt.savefig(f"./track_analysis/{seq_name}/pred_"+str(frame)+ ".png") + #plt.show() + +print(cnt,gt_cnt) \ No newline at end of file diff --git a/second/sample.trch b/second/sample.trch new file mode 100644 index 00000000..d455d8b4 Binary files /dev/null and b/second/sample.trch differ diff --git a/second/sample_val.trch b/second/sample_val.trch new file mode 100644 index 00000000..53ab593e Binary files /dev/null and b/second/sample_val.trch differ diff --git a/second/test.trch b/second/test.trch new file mode 100644 index 00000000..d18fcb4a Binary files /dev/null and b/second/test.trch differ diff --git a/second/train.trch b/second/train.trch new file mode 100644 index 00000000..e15dd305 Binary files /dev/null and b/second/train.trch differ diff --git a/second/utils/eval.py b/second/utils/eval.py index 49c7421d..48e355cc 100644 --- a/second/utils/eval.py +++ b/second/utils/eval.py @@ -37,7 +37,7 @@ def get_thresholds(scores: np.ndarray, num_gt, num_sample_pts=41): def clean_data(gt_anno, dt_anno, current_class, difficulty): - CLASS_NAMES = ['car', 'pedestrian', 'cyclist', 'van', 'person_sitting', 'car', 'tractor', 'trailer'] + CLASS_NAMES = ['VEHICLE', 'PEDESTRIAN', 'cyclist', 'van', 'person_sitting', 'car', 'tractor', 'trailer'] MIN_HEIGHT = [40, 25, 25] MAX_OCCLUSION = [0, 1, 2] MAX_TRUNCATION = [0.15, 0.3, 0.5] @@ -56,7 +56,7 @@ def clean_data(gt_anno, dt_anno, current_class, difficulty): elif (current_cls_name == "Pedestrian".lower() and "Person_sitting".lower() == gt_name): valid_class = 0 - elif (current_cls_name == "Car".lower() and "Van".lower() == gt_name): + elif (current_cls_name == "VEHICLE".lower() and "Van".lower() == gt_name): valid_class = 0 else: valid_class = -1 @@ -88,7 +88,7 @@ def clean_data(gt_anno, dt_anno, current_class, difficulty): ignored_dt.append(0) else: ignored_dt.append(-1) - + return num_valid_gt, ignored_gt, ignored_dt, dc_bboxes @@ -661,12 +661,14 @@ def do_eval(gt_annos, dt_annos, current_class, min_overlaps, mAP_bbox = [] mAP_aos = [] + ''' for i in range(3): # i=difficulty ret = eval_class(gt_annos, dt_annos, current_class, i, 0, min_overlaps[0], compute_aos) mAP_bbox.append(get_mAP(ret["precision"])) if compute_aos: mAP_aos.append(get_mAP(ret["orientation"])) + ''' mAP_bev = [] for i in range(3): ret = eval_class(gt_annos, dt_annos, current_class, i, 1, @@ -694,13 +696,15 @@ def do_eval_v2(gt_annos, compute_aos=False, difficultys = [0, 1, 2]): # min_overlaps: [num_minoverlap, metric, num_class] - ret = eval_class_v3(gt_annos, dt_annos, current_classes, difficultys, 0, - min_overlaps, compute_aos) + + ##ret = eval_class_v3(gt_annos, dt_annos, current_classes, difficultys, 0, + ## min_overlaps, compute_aos) # ret: [num_class, num_diff, num_minoverlap, num_sample_points] - mAP_bbox = get_mAP_v2(ret["precision"]) - mAP_aos = None - if compute_aos: - mAP_aos = get_mAP_v2(ret["orientation"]) + ##mAP_bbox = get_mAP_v2(ret["precision"]) + ##mAP_aos = None + ##if compute_aos: + ## mAP_aos = get_mAP_v2(ret["orientation"]) + mAP_bbox, mAP_aos = None,None ret = eval_class_v3(gt_annos, dt_annos, current_classes, difficultys, 1, min_overlaps) mAP_bev = get_mAP_v2(ret["precision"]) @@ -720,7 +724,7 @@ def do_coco_style_eval(gt_annos, dt_annos, current_classes, overlap_ranges, mAP_bbox, mAP_bev, mAP_3d, mAP_aos = do_eval_v2( gt_annos, dt_annos, current_classes, min_overlaps, compute_aos) # ret: [num_class, num_diff, num_minoverlap] - mAP_bbox = mAP_bbox.mean(-1) + #mAP_bbox = mAP_bbox.mean(-1) mAP_bev = mAP_bev.mean(-1) mAP_3d = mAP_3d.mean(-1) if mAP_aos is not None: @@ -797,8 +801,8 @@ def get_official_eval_result(gt_annos, dt_annos, current_classes, difficultys=[0 [0.5, 0.25, 0.25, 0.5, 0.25, 0.5, 0.5, 0.5]]) min_overlaps = np.stack([overlap_0_7, overlap_0_5], axis=0) # [2, 3, 5] class_to_name = { - 0: 'Car', - 1: 'Pedestrian', + 0: 'VEHICLE', + 1: 'PEDESTRIAN', 2: 'Cyclist', 3: 'Van', 4: 'Person_sitting', @@ -834,19 +838,24 @@ def get_official_eval_result(gt_annos, dt_annos, current_classes, difficultys=[0 result += print_str( (f"{class_to_name[curcls]} " "AP@{:.2f}, {:.2f}, {:.2f}:".format(*min_overlaps[i, :, j]))) + + ''' result += print_str((f"bbox AP:{mAPbbox[j, 0, i]:.2f}, " f"{mAPbbox[j, 1, i]:.2f}, " f"{mAPbbox[j, 2, i]:.2f}")) - result += print_str((f"bev AP:{mAPbev[j, 0, i]:.2f}, " + ''' + result += print_str((f"bev AP:{mAPbev[j, 0, i]:.2f}, " f"{mAPbev[j, 1, i]:.2f}, " f"{mAPbev[j, 2, i]:.2f}")) result += print_str((f"3d AP:{mAP3d[j, 0, i]:.2f}, " f"{mAP3d[j, 1, i]:.2f}, " f"{mAP3d[j, 2, i]:.2f}")) + ''' if compute_aos: result += print_str((f"aos AP:{mAPaos[j, 0, i]:.2f}, " f"{mAPaos[j, 1, i]:.2f}, " f"{mAPaos[j, 2, i]:.2f}")) + ''' if return_data: return result, mAPbbox, mAPbev, mAP3d, mAPaos else: @@ -855,8 +864,8 @@ def get_official_eval_result(gt_annos, dt_annos, current_classes, difficultys=[0 def get_coco_eval_result(gt_annos, dt_annos, current_classes): class_to_name = { - 0: 'Car', - 1: 'Pedestrian', + 0: 'VEHICLE', + 1: 'PEDESTRIAN', 2: 'Cyclist', 3: 'Van', 4: 'Person_sitting', @@ -917,17 +926,21 @@ def get_coco_eval_result(gt_annos, dt_annos, current_classes): result += print_str( (f"{class_to_name[curcls]} " "coco AP@{:.2f}:{:.2f}:{:.2f}:".format(*o_range))) + ''' result += print_str((f"bbox AP:{mAPbbox[j, 0]:.2f}, " f"{mAPbbox[j, 1]:.2f}, " f"{mAPbbox[j, 2]:.2f}")) + ''' result += print_str((f"bev AP:{mAPbev[j, 0]:.2f}, " f"{mAPbev[j, 1]:.2f}, " f"{mAPbev[j, 2]:.2f}")) result += print_str((f"3d AP:{mAP3d[j, 0]:.2f}, " f"{mAP3d[j, 1]:.2f}, " f"{mAP3d[j, 2]:.2f}")) + ''' if compute_aos: result += print_str((f"aos AP:{mAPaos[j, 0]:.2f}, " f"{mAPaos[j, 1]:.2f}, " f"{mAPaos[j, 2]:.2f}")) + ''' return result diff --git a/second/val.trch b/second/val.trch new file mode 100644 index 00000000..27c0f217 Binary files /dev/null and b/second/val.trch differ diff --git a/second/val_full.trch b/second/val_full.trch new file mode 100644 index 00000000..3d0cd7f5 Binary files /dev/null and b/second/val_full.trch differ diff --git a/torchplus/train/checkpoint.py b/torchplus/train/checkpoint.py index 4db43ccf..b922e17d 100644 --- a/torchplus/train/checkpoint.py +++ b/torchplus/train/checkpoint.py @@ -104,7 +104,8 @@ def save(model_dir, min_step = min([get_step(name) for name in all_ckpts]) ckpt_to_delete = "{}-{}.tckpt".format(model_name, min_step) all_ckpts.remove(ckpt_to_delete) - os.remove(str(Path(model_dir) / ckpt_to_delete)) + if os.path.exists(str(Path(model_dir) / ckpt_to_delete)): + os.remove(str(Path(model_dir) / ckpt_to_delete)) all_ckpts_filename = _ordered_unique([Path(f).name for f in all_ckpts]) ckpt_info_dict['all_ckpts'][model_name] = all_ckpts_filename with open(ckpt_info_path, 'w') as f: @@ -142,6 +143,7 @@ def try_restore_latest_checkpoints(model_dir, models): name_to_model = _get_name_to_model_map(models) for name, model in name_to_model.items(): latest_ckpt = latest_checkpoint(model_dir, name) + print(latest_ckpt) if latest_ckpt is not None: restore(latest_ckpt, model)