@@ -86,16 +86,16 @@ namespace dl_infra {
86
86
}
87
87
88
88
ConvLayer::ConvLayer (WorkloadParams* workloadParams, int index_in_network, int total_layers_in_nw,
89
- Timer* timer, TensorMgr* tensor_mgr, engine eng, stream s,
89
+ Timer* timer, TensorMgr* tensor_mgr, engine * eng, stream * s,
90
90
int input_tensor_dims[], int filter_tensor_dims[], int output_tensor_dims[]): workloadParams_(workloadParams) {
91
91
92
92
Tracer::func_begin (" ConvLayer::ConvLayer" );
93
93
94
94
index_in_network_ = index_in_network;
95
95
total_layers_in_nw_ = total_layers_in_nw;
96
96
timer_ = timer;
97
- eng_ = std::move ( eng) ;
98
- s_ = std::move (s) ;
97
+ eng_ = eng;
98
+ s_ = s ;
99
99
tensor_mgr_ = tensor_mgr;
100
100
101
101
input_tensor_dims_ = input_tensor_dims;
@@ -106,9 +106,9 @@ namespace dl_infra {
106
106
}
107
107
108
108
ConvLayer::ConvLayer (WorkloadParams* workloadParams, int index_in_network, int total_layers_in_nw,
109
- Timer* timer, TensorMgr* tensor_mgr, IConvLayer* nextConvLayer, engine eng, stream s,
109
+ Timer* timer, TensorMgr* tensor_mgr, IConvLayer* nextConvLayer, engine * eng, stream * s,
110
110
int input_tensor_dims[], int filter_tensor_dims[], int output_tensor_dims[])
111
- : ConvLayer(workloadParams, index_in_network, total_layers_in_nw, timer, tensor_mgr, std::move( eng), std::move(s) , input_tensor_dims, filter_tensor_dims, output_tensor_dims) {
111
+ : ConvLayer(workloadParams, index_in_network, total_layers_in_nw, timer, tensor_mgr, eng, s , input_tensor_dims, filter_tensor_dims, output_tensor_dims) {
112
112
nextConvLayer_ = nextConvLayer;
113
113
};
114
114
@@ -132,7 +132,7 @@ namespace dl_infra {
132
132
#ifdef DEVICE_TIMER
133
133
Time start = get_time_now ();
134
134
#endif
135
- conv_pd = convolution_forward::primitive_desc (eng_,
135
+ conv_pd = convolution_forward::primitive_desc (* eng_,
136
136
prop_kind::forward_inference, algo,
137
137
tensor_mgr_->getTensorBagAt (index_in_network_)->conv_src_md ,
138
138
tensor_mgr_->getTensorBagAt (index_in_network_)->conv_weights_md ,
@@ -151,10 +151,33 @@ namespace dl_infra {
151
151
timer_->recordOpTimeTaken (index_in_network_, calculate_op_time_taken (start), " CONV_FORWARD CREATION" );
152
152
#endif
153
153
createWorkspace ();
154
-
154
+ reorderWeightsIfRequired ();
155
+
155
156
Tracer::func_end (" ConvLayer::initialize" );
156
157
}
157
158
159
+ void ConvLayer::reorderWeightsIfRequired () {
160
+ need_reorder_weights_ = conv_pd.weights_desc () != tensor_mgr_->getTensorBagAt (index_in_network_)->weights_mem_ .get_desc ();
161
+ // if(need_reorder_weights_)
162
+ // std::cout << "need_reorder_weights_" << std::endl;
163
+ auto conv_weights_mem = need_reorder_weights_ ? memory (conv_pd.weights_desc (), *eng_) : tensor_mgr_->getTensorBagAt (index_in_network_)->weights_mem_ ;
164
+
165
+ if (need_reorder_weights_) {
166
+ #ifdef DEVICE_TIMER
167
+ start = get_time_now ();
168
+ #endif
169
+ auto reorder_weights = reorder (tensor_mgr_->getTensorBagAt (index_in_network_)->weights_mem_ , conv_weights_mem);
170
+ reorder_weights.execute (*s_,
171
+ {{DNNL_ARG_FROM, tensor_mgr_->getTensorBagAt (index_in_network_)->weights_mem_ },
172
+ {DNNL_ARG_TO, conv_weights_mem}});
173
+ s_->wait (); // wait for the reorder to complete
174
+ tensor_mgr_->getTensorBagAt (index_in_network_)->weights_mem_ = conv_weights_mem;
175
+ #ifdef DEVICE_TIMER
176
+ timer_->recordOpTimeTaken (index_in_network_, calculate_op_time_taken (start), " REORDER WEIGHTS" );
177
+ #endif
178
+ }
179
+ }
180
+
158
181
void ConvLayer::doIOTensorAndWSAllocs () {
159
182
Tracer::func_begin (" ConvLayer::doTensorAndWSAllocs" );
160
183
@@ -167,7 +190,7 @@ namespace dl_infra {
167
190
#ifdef DEVICE_TIMER
168
191
Time start = get_time_now ();
169
192
#endif
170
- auto sycl_queue = dnnl::sycl_interop::get_queue (dnnl::stream (eng_));
193
+ auto sycl_queue = dnnl::sycl_interop::get_queue (dnnl::stream (* eng_));
171
194
sycl::free (tensor_mgr_->getTensorBagAt (index_in_network_)->src_mem_ .get_data_handle (), sycl_queue);
172
195
#ifdef DEVICE_TIMER
173
196
timer_->recordOpTimeTaken (index_in_network_, calculate_op_time_taken (start), " FREE_INPUT_DEV_PTR" );
@@ -194,7 +217,7 @@ namespace dl_infra {
194
217
#ifdef DEVICE_TIMER
195
218
Time start = get_time_now ();
196
219
#endif
197
- conv_scratchpad_mem_ = memory (conv_pd.scratchpad_desc (), eng_);
220
+ conv_scratchpad_mem_ = memory (conv_pd.scratchpad_desc (), * eng_);
198
221
#ifdef DEVICE_TIMER
199
222
timer_->recordOpTimeTaken (index_in_network_, calculate_op_time_taken (start), " MEMALLOC_SCRATCHPAD_DEV_MEM" );
200
223
#endif
@@ -225,6 +248,8 @@ namespace dl_infra {
225
248
need_reorder_src_ = conv_pd.src_desc () != tensor_mgr_->getTensorBagAt (index_in_network_)->src_mem_ .get_desc ();
226
249
227
250
// need_reorder_weights_ = conv_pd.weights_desc() != tensor_mgr_->getTensorBagAt(index_in_network_)->weights_mem_.get_desc();
251
+ // if(need_reorder_weights_)
252
+ // std::cout << "need_reorder_weights_" << std::endl;
228
253
229
254
if (index_in_network_ == total_layers_in_nw_-1 ) {
230
255
need_reorder_dst_ = conv_pd.dst_desc () != tensor_mgr_->getTensorBagAt (index_in_network_)->dst_mem_ .get_desc ();
@@ -239,13 +264,14 @@ namespace dl_infra {
239
264
#ifdef DEVICE_TIMER
240
265
start = get_time_now ();
241
266
#endif
242
- auto conv_src_mem = need_reorder_src_ ? memory (conv_pd.src_desc (), eng_) : tensor_mgr_->getTensorBagAt (index_in_network_)->src_mem_ ;
243
- // auto conv_weights_mem = need_reorder_weights_ ? memory(conv_pd.weights_desc(), eng_) : tensor_mgr_->getTensorBagAt(index_in_network_)->weights_mem_;
267
+ auto conv_src_mem = need_reorder_src_ ? memory (conv_pd.src_desc (), *eng_) : tensor_mgr_->getTensorBagAt (index_in_network_)->src_mem_ ;
268
+ // auto conv_weights_mem = need_reorder_weights_ ? memory(conv_pd.weights_desc(), *eng_) : tensor_mgr_->getTensorBagAt(index_in_network_)->weights_mem_;
269
+ auto conv_weights_mem = tensor_mgr_->getTensorBagAt (index_in_network_)->weights_mem_ ;
244
270
245
271
// in this workload we will forego reordering of weights
246
272
// we will assume that the pre-trained weights have been created in the memory format as determined by conv_pd.weights_desc()
247
- auto conv_weights_mem = tensor_mgr_->getTensorBagAt (index_in_network_)->weights_mem_ ;
248
- auto conv_dst_mem = memory (conv_pd.dst_desc (), eng_, tensor_mgr_->getTensorBagAt (index_in_network_)->dst_mem_ .get_data_handle ());
273
+ // auto conv_weights_mem = tensor_mgr_->getTensorBagAt(index_in_network_)->weights_mem_;
274
+ auto conv_dst_mem = memory (conv_pd.dst_desc (), * eng_, tensor_mgr_->getTensorBagAt (index_in_network_)->dst_mem_ .get_data_handle ());
249
275
tensor_mgr_->getTensorBagAt (index_in_network_)->dst_mem_ = conv_dst_mem;
250
276
#ifdef DEVICE_TIMER
251
277
timer_->recordOpTimeTaken (index_in_network_, calculate_op_time_taken (start), " REORDERED MEM CREATE" );
@@ -257,8 +283,8 @@ namespace dl_infra {
257
283
#endif
258
284
auto reorder_src = reorder (tensor_mgr_->getTensorBagAt (index_in_network_)->src_mem_ , conv_src_mem);
259
285
reorder_src.execute (
260
- s_, {{DNNL_ARG_FROM, tensor_mgr_->getTensorBagAt (index_in_network_)->src_mem_ }, {DNNL_ARG_TO, conv_src_mem}});
261
- s_. wait (); // wait for the reorder to complete
286
+ * s_, {{DNNL_ARG_FROM, tensor_mgr_->getTensorBagAt (index_in_network_)->src_mem_ }, {DNNL_ARG_TO, conv_src_mem}});
287
+ s_-> wait (); // wait for the reorder to complete
262
288
#ifdef DEVICE_TIMER
263
289
timer_->recordOpTimeTaken (index_in_network_, calculate_op_time_taken (start), " REORDER SRC" );
264
290
#endif
@@ -267,10 +293,10 @@ namespace dl_infra {
267
293
// if (need_reorder_weights_) {
268
294
// //start = get_time_now();
269
295
// auto reorder_weights = reorder(tensor_mgr_->getTensorBagAt(index_in_network_)->weights_mem_, conv_weights_mem);
270
- // reorder_weights.execute(s_,
296
+ // reorder_weights.execute(* s_,
271
297
// {{DNNL_ARG_FROM, tensor_mgr_->getTensorBagAt(index_in_network_)->weights_mem_},
272
298
// {DNNL_ARG_TO, conv_weights_mem}});
273
- // s_. wait(); // wait for the reorder to complete
299
+ // s_-> wait(); // wait for the reorder to complete
274
300
// timer_->recordOpTimeTaken(index_in_network_, calculate_op_time_taken(start), "REORDER WEIGHTS");
275
301
// }
276
302
// }
@@ -281,10 +307,10 @@ namespace dl_infra {
281
307
// conv_.execute(s_,
282
308
// {{DNNL_ARG_SRC, tensor_mgr_->getTensorBagAt(index_in_network_)->src_mem_}, {DNNL_ARG_WEIGHTS, tensor_mgr_->getTensorBagAt(index_in_network_)->weights_mem_},
283
309
// {DNNL_ARG_DST, tensor_mgr_->getTensorBagAt(index_in_network_)->dst_mem_}});
284
- conv_.execute (s_,
310
+ conv_.execute (* s_,
285
311
{{DNNL_ARG_SRC, conv_src_mem}, {DNNL_ARG_WEIGHTS, conv_weights_mem},
286
312
{DNNL_ARG_DST, conv_dst_mem}});
287
- s_. wait ();
313
+ s_-> wait ();
288
314
#ifdef DEVICE_TIMER
289
315
timer_->recordOpTimeTaken (index_in_network_, calculate_op_time_taken (start), " CONV_FORWARD EXECUTION" );
290
316
#endif
0 commit comments