14
14
import sys
15
15
16
16
class DaSiamRPNTracker :
17
- #initialization of used values, initial bounding box, used network
18
- def __init__ (self , im , target_pos , target_sz , net , kernel_r1 , kernel_cls1 ):
17
+ # Initialization of used values, initial bounding box, used network
18
+ def __init__ (self , net = "dasiamrpn_model.onnx" , kernel_r1 = "dasiamrpn_kernel_r1.onnx" , kernel_cls1 = "dasiamrpn_kernel_cls1.onnx" ):
19
19
self .windowing = "cosine"
20
20
self .exemplar_size = 127
21
21
self .instance_size = 271
@@ -28,42 +28,52 @@ def __init__(self, im, target_pos, target_sz, net, kernel_r1, kernel_cls1):
28
28
self .penalty_k = 0.055
29
29
self .window_influence = 0.42
30
30
self .lr = 0.295
31
+ self .score = []
32
+ if self .windowing == "cosine" :
33
+ self .window = np .outer (np .hanning (self .score_size ), np .hanning (self .score_size ))
34
+ elif self .windowing == "uniform" :
35
+ self .window = np .ones ((self .score_size , self .score_size ))
36
+ self .window = np .tile (self .window .flatten (), self .anchor_num )
37
+ # Loading network`s and kernel`s models
38
+ self .net = cv .dnn .readNet (net )
39
+ self .kernel_r1 = cv .dnn .readNet (kernel_r1 )
40
+ self .kernel_cls1 = cv .dnn .readNet (kernel_cls1 )
41
+
42
+ def init (self , im , init_bb ):
43
+ target_pos , target_sz = np .array ([init_bb [0 ], init_bb [1 ]]), np .array ([init_bb [2 ], init_bb [3 ]])
31
44
self .im_h = im .shape [0 ]
32
45
self .im_w = im .shape [1 ]
33
46
self .target_pos = target_pos
34
47
self .target_sz = target_sz
35
48
self .avg_chans = np .mean (im , axis = (0 , 1 ))
36
- self .net = net
37
- self .score = []
38
49
50
+ # When we trying to generate ONNX model from the pre-trained .pth model
51
+ # we are using only one state of the network. In our case used state
52
+ # with big bounding box, so we were forced to add assertion for
53
+ # too small bounding boxes - current state of the network can not
54
+ # work properly with such small bounding boxes
39
55
if ((self .target_sz [0 ] * self .target_sz [1 ]) / float (self .im_h * self .im_w )) < 0.004 :
40
- raise AssertionError ("Initializing BB is too small-try to restart tracker with larger BB" )
56
+ raise AssertionError (
57
+ "Initializing BB is too small-try to restart tracker with larger BB" )
41
58
42
59
self .anchor = self .__generate_anchor ()
43
60
wc_z = self .target_sz [0 ] + self .context_amount * sum (self .target_sz )
44
61
hc_z = self .target_sz [1 ] + self .context_amount * sum (self .target_sz )
45
62
s_z = round (np .sqrt (wc_z * hc_z ))
46
-
47
63
z_crop = self .__get_subwindow_tracking (im , self .exemplar_size , s_z )
48
64
z_crop = z_crop .transpose (2 , 0 , 1 ).reshape (1 , 3 , 127 , 127 ).astype (np .float32 )
49
65
self .net .setInput (z_crop )
50
66
z_f = self .net .forward ('63' )
51
- kernel_r1 .setInput (z_f )
52
- r1 = kernel_r1 .forward ()
53
- kernel_cls1 .setInput (z_f )
54
- cls1 = kernel_cls1 .forward ()
67
+ self . kernel_r1 .setInput (z_f )
68
+ r1 = self . kernel_r1 .forward ()
69
+ self . kernel_cls1 .setInput (z_f )
70
+ cls1 = self . kernel_cls1 .forward ()
55
71
r1 = r1 .reshape (20 , 256 , 4 , 4 )
56
72
cls1 = cls1 .reshape (10 , 256 , 4 , 4 )
57
73
self .net .setParam (self .net .getLayerId ('65' ), 0 , r1 )
58
74
self .net .setParam (self .net .getLayerId ('68' ), 0 , cls1 )
59
75
60
- if self .windowing == "cosine" :
61
- self .window = np .outer (np .hanning (self .score_size ), np .hanning (self .score_size ))
62
- elif self .windowing == "uniform" :
63
- self .window = np .ones ((self .score_size , self .score_size ))
64
- self .window = np .tile (self .window .flatten (), self .anchor_num )
65
-
66
- #creating anchor for tracking bounding box
76
+ # Сreating anchor for tracking bounding box
67
77
def __generate_anchor (self ):
68
78
self .anchor = np .zeros ((self .anchor_num , 4 ), dtype = np .float32 )
69
79
size = self .total_stride * self .total_stride
@@ -86,8 +96,8 @@ def __generate_anchor(self):
86
96
self .anchor [:, 0 ], self .anchor [:, 1 ] = xx .astype (np .float32 ), yy .astype (np .float32 )
87
97
return self .anchor
88
98
89
- #track function
90
- def track (self , im ):
99
+ # Function for updating tracker state
100
+ def update (self , im ):
91
101
wc_z = self .target_sz [1 ] + self .context_amount * sum (self .target_sz )
92
102
hc_z = self .target_sz [0 ] + self .context_amount * sum (self .target_sz )
93
103
s_z = np .sqrt (wc_z * hc_z )
@@ -96,7 +106,7 @@ def track(self, im):
96
106
pad = d_search / scale_z
97
107
s_x = round (s_z + 2 * pad )
98
108
99
- #region preprocessing
109
+ # Region preprocessing part
100
110
x_crop = self .__get_subwindow_tracking (im , self .instance_size , s_x )
101
111
x_crop = x_crop .transpose (2 , 0 , 1 ).reshape (1 , 3 , 271 , 271 ).astype (np .float32 )
102
112
self .score = self .__tracker_eval (x_crop , scale_z )
@@ -105,7 +115,12 @@ def track(self, im):
105
115
self .target_sz [0 ] = max (10 , min (self .im_w , self .target_sz [0 ]))
106
116
self .target_sz [1 ] = max (10 , min (self .im_h , self .target_sz [1 ]))
107
117
108
- #update bounding box position
118
+ cx , cy = self .target_pos
119
+ w , h = self .target_sz
120
+ updated_bb = (cx , cy , w , h )
121
+ return True , updated_bb
122
+
123
+ # Function for updating position of the bounding box
109
124
def __tracker_eval (self , x_crop , scale_z ):
110
125
target_size = self .target_sz * scale_z
111
126
self .net .setInput (x_crop )
@@ -160,7 +175,7 @@ def __softmax(self, x):
160
175
y = e_x / e_x .sum (axis = 0 )
161
176
return y
162
177
163
- #evaluations with cropped image
178
+ # Reshaping cropped image for using in the model
164
179
def __get_subwindow_tracking (self , im , model_size , original_sz ):
165
180
im_sz = im .shape
166
181
c = (original_sz + 1 ) / 2
@@ -171,19 +186,20 @@ def __get_subwindow_tracking(self, im, model_size, original_sz):
171
186
left_pad = int (max (0. , - context_xmin ))
172
187
top_pad = int (max (0. , - context_ymin ))
173
188
right_pad = int (max (0. , context_xmax - im_sz [1 ] + 1 ))
174
- bottom_pad = int (max (0. , context_ymax - im_sz [0 ] + 1 ))
189
+ bot_pad = int (max (0. , context_ymax - im_sz [0 ] + 1 ))
175
190
context_xmin += left_pad
176
191
context_xmax += left_pad
177
192
context_ymin += top_pad
178
193
context_ymax += top_pad
179
194
r , c , k = im .shape
180
195
181
- if any ([top_pad , bottom_pad , left_pad , right_pad ]):
182
- te_im = np .zeros ((r + top_pad + bottom_pad , c + left_pad + right_pad , k ), np .uint8 )
196
+ if any ([top_pad , bot_pad , left_pad , right_pad ]):
197
+ te_im = np .zeros ((
198
+ r + top_pad + bot_pad , c + left_pad + right_pad , k ), np .uint8 )
183
199
te_im [top_pad :top_pad + r , left_pad :left_pad + c , :] = im
184
200
if top_pad :
185
201
te_im [0 :top_pad , left_pad :left_pad + c , :] = self .avg_chans
186
- if bottom_pad :
202
+ if bot_pad :
187
203
te_im [r + top_pad :, left_pad :left_pad + c , :] = self .avg_chans
188
204
if left_pad :
189
205
te_im [:, 0 :left_pad , :] = self .avg_chans
@@ -195,23 +211,22 @@ def __get_subwindow_tracking(self, im, model_size, original_sz):
195
211
196
212
if not np .array_equal (model_size , original_sz ):
197
213
im_patch_original = cv .resize (im_patch_original , (model_size , model_size ))
198
-
199
214
return im_patch_original
200
215
201
- #function for reading paths, bounding box drawing, showing results
216
+ # Sample for using DaSiamRPN tracker
202
217
def main ():
203
218
parser = argparse .ArgumentParser (description = "Run tracker" )
219
+ parser .add_argument ("--input" , type = str , help = "Full path to input (empty for camera)" )
204
220
parser .add_argument ("--net" , type = str , default = "dasiamrpn_model.onnx" , help = "Full path to onnx model of net" )
205
221
parser .add_argument ("--kernel_r1" , type = str , default = "dasiamrpn_kernel_r1.onnx" , help = "Full path to onnx model of kernel_r1" )
206
222
parser .add_argument ("--kernel_cls1" , type = str , default = "dasiamrpn_kernel_cls1.onnx" , help = "Full path to onnx model of kernel_cls1" )
207
- parser .add_argument ("--input" , type = str , help = "Full path to input. Do not use if input is camera" )
208
223
args = parser .parse_args ()
209
224
point1 = ()
210
225
point2 = ()
211
226
mark = True
212
227
drawing = False
213
228
cx , cy , w , h = 0.0 , 0.0 , 0 , 0
214
-
229
+ # Fucntion for drawing during videostream
215
230
def get_bb (event , x , y , flag , param ):
216
231
nonlocal point1 , point2 , cx , cy , w , h , drawing , mark
217
232
@@ -233,12 +248,7 @@ def get_bb(event, x, y, flag, param):
233
248
h = abs (point1 [1 ] - point2 [1 ])
234
249
mark = False
235
250
236
- #loading network`s and kernel`s models
237
- net = cv .dnn .readNet (args .net )
238
- kernel_r1 = cv .dnn .readNet (args .kernel_r1 )
239
- kernel_cls1 = cv .dnn .readNet (args .kernel_cls1 )
240
-
241
- #initializing bounding box
251
+ # Creating window for visualization
242
252
cap = cv .VideoCapture (args .input if args .input else 0 )
243
253
cv .namedWindow ("DaSiamRPN" )
244
254
cv .setMouseCallback ("DaSiamRPN" , get_bb )
@@ -257,17 +267,17 @@ def get_bb(event, x, y, flag, param):
257
267
cv .imshow ("DaSiamRPN" , twin )
258
268
cv .waitKey (40 )
259
269
260
- target_pos , target_sz = np .array ([cx , cy ]), np .array ([w , h ])
261
- tracker = DaSiamRPNTracker (frame , target_pos , target_sz , net , kernel_r1 , kernel_cls1 )
270
+ init_bb = (cx , cy , w , h )
271
+ tracker = DaSiamRPNTracker (args .net , args .kernel_r1 , args .kernel_cls1 )
272
+ tracker .init (frame , init_bb )
262
273
263
- #tracking loop
274
+ # Tracking loop
264
275
while cap .isOpened ():
265
276
has_frame , frame = cap .read ()
266
277
if not has_frame :
267
278
sys .exit (0 )
268
- tracker .track (frame )
269
- w , h = tracker .target_sz
270
- cx , cy = tracker .target_pos
279
+ _ , new_bb = tracker .update (frame )
280
+ cx , cy , w , h = new_bb
271
281
cv .rectangle (frame , (int (cx - w // 2 ), int (cy - h // 2 )), (int (cx - w // 2 ) + int (w ), int (cy - h // 2 ) + int (h )),(0 , 255 , 255 ), 3 )
272
282
cv .imshow ("DaSiamRPN" , frame )
273
283
key = cv .waitKey (1 )
0 commit comments