@@ -295,9 +295,20 @@ def init_sepconv1d(self, layer):
295
295
else :
296
296
layer .set_attr ('strategy' , 'latency' )
297
297
298
- layer .set_attr (
299
- 'n_partitions' , 1
300
- ) # TODO Once we have SeparableConv implementation for io_parallel this should be set properly
298
+ out_width = layer .get_output_variable ().shape [0 ]
299
+ chosen_pf = layer .model .config .get_layer_config_value (layer , 'ParallelizationFactor' , 1 )
300
+ valid_pf = self .get_valid_conv_partition_splits (1 , out_width )
301
+ if chosen_pf not in valid_pf :
302
+ closest_pf = self .get_closest_reuse_factor (valid_pf , chosen_pf )
303
+ valid_pf_str = ',' .join (map (str , valid_pf ))
304
+ print (
305
+ f'WARNING: Invalid ParallelizationFactor={ chosen_pf } in layer "{ layer .name } ".'
306
+ f'Using ParallelizationFactor={ closest_pf } instead. Valid ParallelizationFactor(s): { valid_pf_str } .'
307
+ )
308
+ else :
309
+ closest_pf = chosen_pf
310
+ layer .set_attr ('n_partitions' , out_width // closest_pf )
311
+
301
312
layer .set_attr ('implementation' , layer .model .config .get_conv_implementation (layer ).lower ())
302
313
303
314
# Set the output type of the depthwise phase
@@ -350,9 +361,21 @@ def init_sepconv2d(self, layer):
350
361
else :
351
362
layer .set_attr ('strategy' , 'latency' )
352
363
353
- layer .set_attr (
354
- 'n_partitions' , 1
355
- ) # TODO Once we have SeparableConv implementation for io_parallel this should be set properly
364
+ out_height = layer .get_output_variable ().shape [0 ]
365
+ out_width = layer .get_output_variable ().shape [1 ]
366
+ chosen_pf = layer .model .config .get_layer_config_value (layer , 'ParallelizationFactor' , 1 )
367
+ valid_pf = self .get_valid_conv_partition_splits (out_height , out_width )
368
+ if chosen_pf not in valid_pf :
369
+ closest_pf = self .get_closest_reuse_factor (valid_pf , chosen_pf )
370
+ valid_pf_str = ',' .join (map (str , valid_pf ))
371
+ print (
372
+ f'WARNING: Invalid ParallelizationFactor={ chosen_pf } in layer "{ layer .name } ".'
373
+ f'Using ParallelizationFactor={ closest_pf } instead. Valid ParallelizationFactor(s): { valid_pf_str } .'
374
+ )
375
+ else :
376
+ closest_pf = chosen_pf
377
+ layer .set_attr ('n_partitions' , out_height * out_width // closest_pf )
378
+
356
379
layer .set_attr ('implementation' , layer .model .config .get_conv_implementation (layer ).lower ())
357
380
358
381
# Set the output type of the depthwise phase
@@ -373,9 +396,21 @@ def init_depconv2d(self, layer):
373
396
else :
374
397
layer .set_attr ('strategy' , 'latency' )
375
398
376
- layer .set_attr (
377
- 'n_partitions' , 1
378
- ) # TODO Once we have SeparableConv implementation for io_parallel this should be set properly
399
+ out_height = layer .get_output_variable ().shape [0 ]
400
+ out_width = layer .get_output_variable ().shape [1 ]
401
+ chosen_pf = layer .model .config .get_layer_config_value (layer , 'ParallelizationFactor' , 1 )
402
+ valid_pf = self .get_valid_conv_partition_splits (out_height , out_width )
403
+ if chosen_pf not in valid_pf :
404
+ closest_pf = self .get_closest_reuse_factor (valid_pf , chosen_pf )
405
+ valid_pf_str = ',' .join (map (str , valid_pf ))
406
+ print (
407
+ f'WARNING: Invalid ParallelizationFactor={ chosen_pf } in layer "{ layer .name } ".'
408
+ f'Using ParallelizationFactor={ closest_pf } instead. Valid ParallelizationFactor(s): { valid_pf_str } .'
409
+ )
410
+ else :
411
+ closest_pf = chosen_pf
412
+ layer .set_attr ('n_partitions' , out_height * out_width // closest_pf )
413
+
379
414
layer .set_attr ('implementation' , layer .model .config .get_conv_implementation (layer ).lower ())
380
415
381
416
def _set_pooling_accum_t (self , layer , pool_size ):
0 commit comments