Skip to content

Commit 23b0362

Browse files
authored
Merge pull request #79 from oneapi-src/ratul/dl-cifar/segfault_fix
dl-cifar - seg fault fix
2 parents 8543915 + b6611f1 commit 23b0362

File tree

6 files changed

+106
-54
lines changed

6 files changed

+106
-54
lines changed

dl-mnist/SYCL/conv_layer.onednn.cpp

Lines changed: 45 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -86,16 +86,16 @@ namespace dl_infra {
8686
}
8787

8888
ConvLayer::ConvLayer(WorkloadParams* workloadParams, int index_in_network, int total_layers_in_nw,
89-
Timer* timer, TensorMgr* tensor_mgr, engine eng, stream s,
89+
Timer* timer, TensorMgr* tensor_mgr, engine *eng, stream *s,
9090
int input_tensor_dims[], int filter_tensor_dims[], int output_tensor_dims[]): workloadParams_(workloadParams) {
9191

9292
Tracer::func_begin("ConvLayer::ConvLayer");
9393

9494
index_in_network_ = index_in_network;
9595
total_layers_in_nw_ = total_layers_in_nw;
9696
timer_ = timer;
97-
eng_ = std::move(eng);
98-
s_ = std::move(s);
97+
eng_ = eng;
98+
s_ = s;
9999
tensor_mgr_ = tensor_mgr;
100100

101101
input_tensor_dims_ = input_tensor_dims;
@@ -106,9 +106,9 @@ namespace dl_infra {
106106
}
107107

108108
ConvLayer::ConvLayer(WorkloadParams* workloadParams, int index_in_network, int total_layers_in_nw,
109-
Timer* timer, TensorMgr* tensor_mgr, IConvLayer* nextConvLayer, engine eng, stream s,
109+
Timer* timer, TensorMgr* tensor_mgr, IConvLayer* nextConvLayer, engine *eng, stream *s,
110110
int input_tensor_dims[], int filter_tensor_dims[], int output_tensor_dims[])
111-
: ConvLayer(workloadParams, index_in_network, total_layers_in_nw, timer, tensor_mgr, std::move(eng), std::move(s), input_tensor_dims, filter_tensor_dims, output_tensor_dims) {
111+
: ConvLayer(workloadParams, index_in_network, total_layers_in_nw, timer, tensor_mgr, eng, s, input_tensor_dims, filter_tensor_dims, output_tensor_dims) {
112112
nextConvLayer_ = nextConvLayer;
113113
};
114114

@@ -132,7 +132,7 @@ namespace dl_infra {
132132
#ifdef DEVICE_TIMER
133133
Time start = get_time_now();
134134
#endif
135-
conv_pd = convolution_forward::primitive_desc(eng_,
135+
conv_pd = convolution_forward::primitive_desc(*eng_,
136136
prop_kind::forward_inference, algo,
137137
tensor_mgr_->getTensorBagAt(index_in_network_)->conv_src_md,
138138
tensor_mgr_->getTensorBagAt(index_in_network_)->conv_weights_md,
@@ -151,10 +151,33 @@ namespace dl_infra {
151151
timer_->recordOpTimeTaken(index_in_network_, calculate_op_time_taken(start), "CONV_FORWARD CREATION");
152152
#endif
153153
createWorkspace();
154-
154+
reorderWeightsIfRequired();
155+
155156
Tracer::func_end("ConvLayer::initialize");
156157
}
157158

159+
void ConvLayer::reorderWeightsIfRequired() {
160+
need_reorder_weights_ = conv_pd.weights_desc() != tensor_mgr_->getTensorBagAt(index_in_network_)->weights_mem_.get_desc();
161+
// if(need_reorder_weights_)
162+
// std::cout << "need_reorder_weights_" << std::endl;
163+
auto conv_weights_mem = need_reorder_weights_ ? memory(conv_pd.weights_desc(), *eng_) : tensor_mgr_->getTensorBagAt(index_in_network_)->weights_mem_;
164+
165+
if (need_reorder_weights_) {
166+
#ifdef DEVICE_TIMER
167+
start = get_time_now();
168+
#endif
169+
auto reorder_weights = reorder(tensor_mgr_->getTensorBagAt(index_in_network_)->weights_mem_, conv_weights_mem);
170+
reorder_weights.execute(*s_,
171+
{{DNNL_ARG_FROM, tensor_mgr_->getTensorBagAt(index_in_network_)->weights_mem_},
172+
{DNNL_ARG_TO, conv_weights_mem}});
173+
s_->wait(); // wait for the reorder to complete
174+
tensor_mgr_->getTensorBagAt(index_in_network_)->weights_mem_ = conv_weights_mem;
175+
#ifdef DEVICE_TIMER
176+
timer_->recordOpTimeTaken(index_in_network_, calculate_op_time_taken(start), "REORDER WEIGHTS");
177+
#endif
178+
}
179+
}
180+
158181
void ConvLayer::doIOTensorAndWSAllocs() {
159182
Tracer::func_begin("ConvLayer::doTensorAndWSAllocs");
160183

@@ -167,7 +190,7 @@ namespace dl_infra {
167190
#ifdef DEVICE_TIMER
168191
Time start = get_time_now();
169192
#endif
170-
auto sycl_queue = dnnl::sycl_interop::get_queue(dnnl::stream(eng_));
193+
auto sycl_queue = dnnl::sycl_interop::get_queue(dnnl::stream(*eng_));
171194
sycl::free(tensor_mgr_->getTensorBagAt(index_in_network_)->src_mem_.get_data_handle(), sycl_queue);
172195
#ifdef DEVICE_TIMER
173196
timer_->recordOpTimeTaken(index_in_network_, calculate_op_time_taken(start), "FREE_INPUT_DEV_PTR");
@@ -194,7 +217,7 @@ namespace dl_infra {
194217
#ifdef DEVICE_TIMER
195218
Time start = get_time_now();
196219
#endif
197-
conv_scratchpad_mem_ = memory(conv_pd.scratchpad_desc(), eng_);
220+
conv_scratchpad_mem_ = memory(conv_pd.scratchpad_desc(), *eng_);
198221
#ifdef DEVICE_TIMER
199222
timer_->recordOpTimeTaken(index_in_network_, calculate_op_time_taken(start), "MEMALLOC_SCRATCHPAD_DEV_MEM");
200223
#endif
@@ -225,6 +248,8 @@ namespace dl_infra {
225248
need_reorder_src_ = conv_pd.src_desc() != tensor_mgr_->getTensorBagAt(index_in_network_)->src_mem_.get_desc();
226249

227250
//need_reorder_weights_ = conv_pd.weights_desc() != tensor_mgr_->getTensorBagAt(index_in_network_)->weights_mem_.get_desc();
251+
// if(need_reorder_weights_)
252+
// std::cout << "need_reorder_weights_" << std::endl;
228253

229254
if(index_in_network_ == total_layers_in_nw_-1) {
230255
need_reorder_dst_ = conv_pd.dst_desc() != tensor_mgr_->getTensorBagAt(index_in_network_)->dst_mem_.get_desc();
@@ -239,13 +264,14 @@ namespace dl_infra {
239264
#ifdef DEVICE_TIMER
240265
start = get_time_now();
241266
#endif
242-
auto conv_src_mem = need_reorder_src_ ? memory(conv_pd.src_desc(), eng_) : tensor_mgr_->getTensorBagAt(index_in_network_)->src_mem_;
243-
//auto conv_weights_mem = need_reorder_weights_ ? memory(conv_pd.weights_desc(), eng_) : tensor_mgr_->getTensorBagAt(index_in_network_)->weights_mem_;
267+
auto conv_src_mem = need_reorder_src_ ? memory(conv_pd.src_desc(), *eng_) : tensor_mgr_->getTensorBagAt(index_in_network_)->src_mem_;
268+
//auto conv_weights_mem = need_reorder_weights_ ? memory(conv_pd.weights_desc(), *eng_) : tensor_mgr_->getTensorBagAt(index_in_network_)->weights_mem_;
269+
auto conv_weights_mem = tensor_mgr_->getTensorBagAt(index_in_network_)->weights_mem_;
244270

245271
// in this workload we will forego reordering of weights
246272
// we will assume that the pre-trained weights have been created in the memory format as determined by conv_pd.weights_desc()
247-
auto conv_weights_mem = tensor_mgr_->getTensorBagAt(index_in_network_)->weights_mem_;
248-
auto conv_dst_mem = memory(conv_pd.dst_desc(), eng_, tensor_mgr_->getTensorBagAt(index_in_network_)->dst_mem_.get_data_handle());
273+
//auto conv_weights_mem = tensor_mgr_->getTensorBagAt(index_in_network_)->weights_mem_;
274+
auto conv_dst_mem = memory(conv_pd.dst_desc(), *eng_, tensor_mgr_->getTensorBagAt(index_in_network_)->dst_mem_.get_data_handle());
249275
tensor_mgr_->getTensorBagAt(index_in_network_)->dst_mem_ = conv_dst_mem;
250276
#ifdef DEVICE_TIMER
251277
timer_->recordOpTimeTaken(index_in_network_, calculate_op_time_taken(start), "REORDERED MEM CREATE");
@@ -257,8 +283,8 @@ namespace dl_infra {
257283
#endif
258284
auto reorder_src = reorder(tensor_mgr_->getTensorBagAt(index_in_network_)->src_mem_, conv_src_mem);
259285
reorder_src.execute(
260-
s_, {{DNNL_ARG_FROM, tensor_mgr_->getTensorBagAt(index_in_network_)->src_mem_}, {DNNL_ARG_TO, conv_src_mem}});
261-
s_.wait(); // wait for the reorder to complete
286+
*s_, {{DNNL_ARG_FROM, tensor_mgr_->getTensorBagAt(index_in_network_)->src_mem_}, {DNNL_ARG_TO, conv_src_mem}});
287+
s_->wait(); // wait for the reorder to complete
262288
#ifdef DEVICE_TIMER
263289
timer_->recordOpTimeTaken(index_in_network_, calculate_op_time_taken(start), "REORDER SRC");
264290
#endif
@@ -267,10 +293,10 @@ namespace dl_infra {
267293
// if (need_reorder_weights_) {
268294
// //start = get_time_now();
269295
// auto reorder_weights = reorder(tensor_mgr_->getTensorBagAt(index_in_network_)->weights_mem_, conv_weights_mem);
270-
// reorder_weights.execute(s_,
296+
// reorder_weights.execute(*s_,
271297
// {{DNNL_ARG_FROM, tensor_mgr_->getTensorBagAt(index_in_network_)->weights_mem_},
272298
// {DNNL_ARG_TO, conv_weights_mem}});
273-
// s_.wait(); // wait for the reorder to complete
299+
// s_->wait(); // wait for the reorder to complete
274300
// timer_->recordOpTimeTaken(index_in_network_, calculate_op_time_taken(start), "REORDER WEIGHTS");
275301
// }
276302
//}
@@ -281,10 +307,10 @@ namespace dl_infra {
281307
// conv_.execute(s_,
282308
// {{DNNL_ARG_SRC, tensor_mgr_->getTensorBagAt(index_in_network_)->src_mem_}, {DNNL_ARG_WEIGHTS, tensor_mgr_->getTensorBagAt(index_in_network_)->weights_mem_},
283309
// {DNNL_ARG_DST, tensor_mgr_->getTensorBagAt(index_in_network_)->dst_mem_}});
284-
conv_.execute(s_,
310+
conv_.execute(*s_,
285311
{{DNNL_ARG_SRC, conv_src_mem}, {DNNL_ARG_WEIGHTS, conv_weights_mem},
286312
{DNNL_ARG_DST, conv_dst_mem}});
287-
s_.wait();
313+
s_->wait();
288314
#ifdef DEVICE_TIMER
289315
timer_->recordOpTimeTaken(index_in_network_, calculate_op_time_taken(start), "CONV_FORWARD EXECUTION");
290316
#endif

dl-mnist/SYCL/conv_layer.onednn.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ namespace dl_infra {
4747
private:
4848
int index_in_network_, total_layers_in_nw_;
4949
Timer* timer_;
50-
engine eng_;
51-
stream s_;
50+
engine *eng_;
51+
stream *s_;
5252

5353
TensorMgr* tensor_mgr_;
5454

@@ -71,7 +71,7 @@ namespace dl_infra {
7171
bool add_mem_transfer_time_ = false;
7272

7373
bool need_reorder_src_ = false;
74-
//bool need_reorder_weights_ = false;
74+
bool need_reorder_weights_ = false;
7575
bool need_reorder_dst_ = false;
7676

7777
void write_to_dnnl_memory(void *handle, dnnl::memory &mem);
@@ -83,10 +83,10 @@ namespace dl_infra {
8383

8484
public:
8585
ConvLayer(WorkloadParams* workloadParams, int index_in_network, int total_layers_in_nw,
86-
Timer* timer, TensorMgr* tensor_mgr, engine eng, stream s,
86+
Timer* timer, TensorMgr* tensor_mgr, engine *eng, stream *s,
8787
int input_tensor_dims[], int filter_tensor_dims[], int output_tensor_dims[]);
8888
ConvLayer(WorkloadParams* workloadParams, int index_in_network, int total_layers_in_nw,
89-
Timer* timer, TensorMgr* tensor_mgr, IConvLayer* nextConvLayer, engine eng, stream s,
89+
Timer* timer, TensorMgr* tensor_mgr, IConvLayer* nextConvLayer, engine *eng, stream *s,
9090
int input_tensor_dims[], int filter_tensor_dims[], int output_tensor_dims[]);
9191
~ConvLayer();
9292

@@ -106,6 +106,7 @@ namespace dl_infra {
106106
void createWorkspace();
107107
void createTensorDescriptors();
108108
void createTensors();
109+
void reorderWeightsIfRequired();
109110

110111
void calculateStrideDims();
111112
};

dl-mnist/SYCL/dl_network_mgr.onednn.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ namespace dl_infra {
5353
class DlNetworkMgr {
5454
private:
5555
Timer* timer_, *dataFileReadTimer_;
56-
engine eng_;
57-
stream s_;
56+
engine *eng_;
57+
stream *s_;
5858
WorkloadParams* workloadParams_;
5959
//WorkloadParams::TensorMemPolicy tensorMemPolicy_;
6060

@@ -69,8 +69,8 @@ namespace dl_infra {
6969
void initializeNetwork(string networkName);
7070

7171
public:
72-
DlNetworkMgr(WorkloadParams* workloadParams, engine eng, stream s, Timer* timer, Timer* dataFileReadTimer)
73-
: workloadParams_(workloadParams), eng_(std::move(eng)), s_(std::move(s)), timer_(timer), tensorMgr(0), dataFileReadTimer_(dataFileReadTimer) {}
72+
DlNetworkMgr(WorkloadParams* workloadParams, engine* eng, stream* s, Timer* timer, Timer* dataFileReadTimer)
73+
: workloadParams_(workloadParams), eng_(eng), s_(s), timer_(timer), tensorMgr(0), dataFileReadTimer_(dataFileReadTimer) {}
7474
void createDLNetwork(string networkName, int no_of_conv_layers, int *conv_dims);
7575
void executeInferenceRun(string networkName);
7676
};

dl-mnist/SYCL/main.onednn.cpp

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -52,39 +52,39 @@ int main(int argc, const char** argv) {
5252

5353
cout << endl << "\t\tWelcome to DL-MNIST workload: SYCL version." << endl << endl;
5454
cout << "=======================================================================" << endl;
55-
sycl::device* dht = new sycl::device(sycl::gpu_selector());
55+
sycl::device* dht = new sycl::device(sycl::gpu_selector_v);
5656
#ifdef DEVICE_TIMER
5757
start = get_time_now();
5858
#endif
59-
sycl::context context(*dht);
59+
sycl::context *context = new sycl::context(*dht);
6060
#ifdef DEVICE_TIMER
6161
timer->recordOpTimeTaken(1000, calculate_op_time_taken(start), "CREATE_SYCL_CONTEXT");
6262
#endif
6363
//auto propList = sycl::property_list{sycl::property::queue::in_order()};
6464
#ifdef DEVICE_TIMER
6565
start = get_time_now();
6666
#endif
67-
sycl::queue deviceQueue1(context, *dht);
67+
sycl::queue *deviceQueue1 = new sycl::queue(*context, *dht);
6868
#ifdef DEVICE_TIMER
6969
timer->recordOpTimeTaken(1000, calculate_op_time_taken(start), "CREATE_SYCL_QUEUE");
7070
#endif
7171
#ifdef DEVICE_TIMER
7272
start = get_time_now();
7373
#endif
74-
//engine eng(engine::kind::gpu, 0);
75-
engine eng = dnnl::sycl_interop::make_engine(*dht, context);
74+
engine eng = dnnl::sycl_interop::make_engine(*dht, *context);
75+
7676
#ifdef DEVICE_TIMER
7777
timer->recordOpTimeTaken(1000, calculate_op_time_taken(start), "CREATE_ONEDNN_ENGINE");
7878
#endif
7979
#ifdef DEVICE_TIMER
8080
start = get_time_now();
8181
#endif
8282
//stream s(eng);
83-
stream s = dnnl::sycl_interop::make_stream(eng, deviceQueue1);
83+
stream s = dnnl::sycl_interop::make_stream(eng, *deviceQueue1);
8484
#ifdef DEVICE_TIMER
8585
timer->recordOpTimeTaken(1000, calculate_op_time_taken(start), "CREATE_ONEDNN STREAM");
8686
#endif
87-
SYCL sycl(dnnl::sycl_interop::get_queue(s).get_device());
87+
SYCL sycl(*dht);
8888
sycl.DisplayProperties();
8989
cout << "=======================================================================" << endl;
9090
cout << endl;
@@ -146,7 +146,7 @@ int main(int argc, const char** argv) {
146146
cout.precision(3);
147147

148148
int noOfIterations = workload_params.getNoOfIterations();
149-
DlNetworkMgr* dlNetworkMgr = new DlNetworkMgr(&workload_params, eng, s, timer, dataFileReadTimer);
149+
DlNetworkMgr* dlNetworkMgr = new DlNetworkMgr(&workload_params, &eng, &s, timer, dataFileReadTimer);
150150

151151
string networkName1_1 = "nw_1.1";
152152
dlNetworkMgr->createDLNetwork(networkName1_1, 10, (int *)&conv_dims1);
@@ -173,6 +173,11 @@ int main(int argc, const char** argv) {
173173
cout << "Final time across all networks: " << timer->getTotalOpTime() << " s" << std::endl;
174174
#endif
175175
delete dlNetworkMgr;
176+
177+
delete dht;
178+
delete context;
179+
delete deviceQueue1;
180+
176181
delete timer;
177182

178183
std::cout << "dl-mnist - total time for whole calculation: " << calculate_op_time_taken(wallClockStart) - dataFileReadTimer->getTotalOpTime()<< " s" << std::endl;

0 commit comments

Comments
 (0)