@@ -751,6 +751,177 @@ def _convert_greater(builder, node, graph, err):
751
751
output_name = node .outputs [0 ],
752
752
)
753
753
754
+ def _convert_gru (builder , node , graph , err ): # type: (NeuralNetworkBuilder, Node, Graph, ErrorHandling) -> None
755
+ '''
756
+ convert to CoreML GRU Layer:
757
+ https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L3104
758
+ '''
759
+
760
+ def get_weights (W , W_name , R , R_name , B ):
761
+ '''
762
+ Helper routine to return weights in CoreML LSTM required format
763
+ '''
764
+ W = np .expand_dims (np .expand_dims (W , 3 ), 3 )
765
+ R = np .expand_dims (np .expand_dims (R , 3 ), 3 )
766
+
767
+ if W is None :
768
+ err .missing_initializer (node ,
769
+ "Weight tensor: {} not found in the graph initializer" .format (W_name ))
770
+ if R is None :
771
+ err .missing_initializer (node ,
772
+ "Weight tensor: {} not found in the graph initializer" .format (R_name ))
773
+
774
+ W_z , W_r , W_h = np .split (np .squeeze (W ), 3 ) #type: ignore
775
+ R_z , R_r , R_h = np .split (np .squeeze (R ), 3 ) #type: ignore
776
+
777
+ W_x = [W_z , W_r , W_h ]
778
+ W_h = [R_z , R_r , R_h ]
779
+ b = None
780
+ if B is not None :
781
+ b_Wz , b_Wr , b_Wh , b_Rz , b_Rr , b_Rh = np .split (np .squeeze (B ), 6 ) #type: ignore
782
+ b = [b_Wz + b_Rz , b_Wr + b_Rr , b_Wh + b_Rh ]
783
+
784
+ return W_x , W_h , b
785
+
786
+ def expand_dim (node_name , input_name , output_name , axes ):
787
+ builder .add_expand_dims (
788
+ name = node_name ,
789
+ input_name = input_name ,
790
+ output_name = output_name ,
791
+ axes = axes
792
+ )
793
+
794
+ # Read attributes
795
+ # activation alpha and beta
796
+ if 'activation_alpha' in node .attrs or 'activation_beta' in node .attrs :
797
+ err .unsupported_feature_warning (node , "Activation parameter alpha and beta are currently not used" )
798
+
799
+ inner_activation = 'SIGMOID'
800
+ output_activation = 'TANH'
801
+
802
+ if 'activations' in node .attrs :
803
+ activations_list = node .attrs ['activations' ]
804
+
805
+ if len (activations_list ) < 2 :
806
+ err .unsupported_op_configuration (builder , node , graph , "Error in ONNX model: Less number of activations provided" )
807
+
808
+ inner_activation = activations_list [0 ].upper ()
809
+ output_activation = activations_list [1 ].upper ()
810
+
811
+ # Extract direction from ONNX attribute
812
+ direction = node .attrs .get ('direction' , 'forward' )
813
+ if direction == 'bidirectional' :
814
+ return err .unsupported_op_configuration (builder , node , graph , "Bidirectional GRU not supported!! Please consider adding custom conversion function/layer" )
815
+
816
+ hidden_size = node .attrs .get ('hidden_size' )
817
+
818
+ # Read inputs
819
+ W_name = node .inputs [1 ]
820
+ R_name = node .inputs [2 ]
821
+ B = None
822
+ if len (node .inputs ) > 3 :
823
+ B_name = node .inputs [3 ]
824
+ B = node .input_tensors .get (B_name , None )
825
+
826
+ if W_name not in node .input_tensors or R_name not in node .input_tensors :
827
+ return err .unsupported_op_configuration (builder , node , graph , "Input and Recursion weights must be known!! Please consider adding custom conversion function/layer" )
828
+
829
+ W = node .input_tensors .get (W_name , None )
830
+ R = node .input_tensors .get (R_name , None )
831
+
832
+ # Get weights for forward direction
833
+ W_x , W_h , b = get_weights (W , W_name , R , R_name , B )
834
+
835
+ # shape of input
836
+ input_size = W_x [0 ].shape [1 ]
837
+
838
+ # Get input and output for hidden and cell
839
+ input_h = node .inputs [5 ] if len (node .inputs ) > 5 else node .inputs [0 ] + '_h_input'
840
+ output_h = node .outputs [1 ] if len (node .outputs ) > 1 else node .outputs [0 ] + '_h_output'
841
+ output_h_5d = output_h + '_5d'
842
+
843
+ if len (node .inputs ) < 6 :
844
+ # if input is not present in the network, load they as constant
845
+ if node .inputs [0 ] not in graph .shape_dict :
846
+ err .unsupported_op_configuration (builder , node , graph , "Input shape not represented within Graph" )
847
+
848
+ # Input is represented as [Seq Len, Batch Size, Input Size]
849
+ batch_size = graph .shape_dict [node .inputs [0 ]][1 ]
850
+ builder .add_load_constant_nd (
851
+ name = node .name + '_load_initial_h' ,
852
+ output_name = input_h ,
853
+ constant_value = 0.0 ,
854
+ shape = [1 , batch_size , hidden_size ]
855
+ )
856
+
857
+ # CoreML GRU expects 5-d tensor
858
+ # Expand dimensions of input to 5-d for compatibility
859
+ input_rank = builder ._get_rank (node .inputs [0 ])
860
+ if input_rank == - 1 :
861
+ return err .unsupported_op_configuration (builder , node , graph , "Rank unknown for input" )
862
+
863
+ if input_rank < 5 :
864
+ add_nodes = 5 - input_rank
865
+
866
+ expand_dim (node .name + '_expand_in_0' , node .inputs [0 ], node .inputs [0 ]+ '_expand_out_0' , [input_rank ])
867
+ expand_dim (node .name + '_expand_in_h_0' , input_h , input_h + '_expand_out_h_0' , [input_rank ])
868
+
869
+ for i in range (1 , add_nodes ):
870
+ i_str = str (i )
871
+ i_p_str = str (i - 1 )
872
+ expand_dim (node .name + '_expand_in_' + i_str , node .inputs [0 ]+ '_expand_out_' + i_p_str , node .inputs [0 ]+ '_expand_out_' + i_str , [input_rank + i ])
873
+ expand_dim (node .name + '_expand_in_h_' + i_str , input_h + '_expand_out_h_' + i_p_str , input_h + '_expand_out_h_' + i_str , [input_rank + i ])
874
+
875
+ builder .add_gru (
876
+ name = node .name ,
877
+ W_h = W_h ,
878
+ W_x = W_x ,
879
+ b = b ,
880
+ hidden_size = hidden_size ,
881
+ input_size = input_size ,
882
+ input_names = [node .inputs [0 ] + '_expand_out_' + str (add_nodes - 1 ), input_h + '_expand_out_h_' + str (add_nodes - 1 )],
883
+ output_names = [node .outputs [0 ]+ '_5d_out' , output_h_5d ],
884
+ inner_activation = inner_activation ,
885
+ activation = output_activation ,
886
+ output_all = True ,
887
+ reverse_input = (direction == 'reverse' )
888
+ )
889
+
890
+ # CoreML output is [Seq Len, Batch Size, Num Dir * Hidden Size, 1, 1]
891
+ # Return output as [Seq Len, Num Dir, Batch Size, Hidden Size]
892
+ # Following steps:
893
+ # a. Reshape and split hidden size for direction [Seq Len, Batch Size, Num Dir, Hidden Size, 1]
894
+ # b. Squeeze last dimension [Seq Len, Batch Size, Num Dir, Hidden Size]
895
+ # c. Permute to fix the order [Seq Len, Num Dir, Batch Size, Hidden Size, 1]
896
+ builder .add_rank_preserving_reshape (
897
+ name = node .name + '_reshape_' ,
898
+ input_name = node .outputs [0 ]+ '_5d_out' ,
899
+ output_name = node .outputs [0 ]+ '_5d_reshaped' ,
900
+ output_shape = [0 , 0 , 1 , - 1 , 0 ]
901
+ )
902
+
903
+ builder .add_squeeze (
904
+ name = node .name + '_squeeze_out' ,
905
+ input_name = node .outputs [0 ]+ '_5d_reshaped' ,
906
+ output_name = node .outputs [0 ]+ '_4d' ,
907
+ axes = [- 1 ]
908
+ )
909
+
910
+ builder .add_transpose (
911
+ name = node .name + '_transpose' ,
912
+ axes = [0 , 2 , 1 , 3 ],
913
+ input_name = node .outputs [0 ] + '_4d' ,
914
+ output_name = node .outputs [0 ]
915
+ )
916
+
917
+ # Squeeze dimensions of output_h
918
+ builder .add_squeeze (
919
+ name = node .name + '_squeeze_out_h' ,
920
+ input_name = output_h_5d ,
921
+ output_name = output_h ,
922
+ axes = [- 1 , - 2 ]
923
+ )
924
+
754
925
def _convert_identity (builder , node , graph , err ):
755
926
'''
756
927
convert to CoreML Linear Activation Layer:
@@ -937,19 +1108,18 @@ def expand_dim(node_name, input_name, output_name, axes):
937
1108
if rank == - 1 :
938
1109
return err .unsupported_op_configuration (builder , node , graph , "Rank unknown for input" )
939
1110
if rank < 5 :
940
- total_dims = rank
941
- add_nodes = 5 - total_dims
1111
+ add_nodes = 5 - rank
942
1112
943
- expand_dim (node .name + '_expand_in_0' , node .inputs [0 ], node .inputs [0 ]+ '_expand_out_0' , [total_dims ])
944
- expand_dim (node .name + '_expand_in_h_0' , input_h , input_h + '_expand_out_h_0' , [total_dims ])
945
- expand_dim (node .name + '_expand_in_c_0' , input_c , input_c + '_expand_out_c_0' , [total_dims ])
1113
+ expand_dim (node .name + '_expand_in_0' , node .inputs [0 ], node .inputs [0 ]+ '_expand_out_0' , [rank ])
1114
+ expand_dim (node .name + '_expand_in_h_0' , input_h , input_h + '_expand_out_h_0' , [rank ])
1115
+ expand_dim (node .name + '_expand_in_c_0' , input_c , input_c + '_expand_out_c_0' , [rank ])
946
1116
947
1117
for i in range (1 , add_nodes ):
948
1118
i_str = str (i )
949
1119
i_p_str = str (i - 1 )
950
- expand_dim (node .name + '_expand_in_' + i_str , node .inputs [0 ]+ '_expand_out_' + i_p_str , node .inputs [0 ]+ '_expand_out_' + i_str , [total_dims + i ])
951
- expand_dim (node .name + '_expand_in_h_' + i_str , input_h + '_expand_out_h_' + i_p_str , input_h + '_expand_out_h_' + i_str , [total_dims + i ])
952
- expand_dim (node .name + '_expand_in_c_' + i_str , input_c + '_expand_out_c_' + i_p_str , input_c + '_expand_out_c_' + i_str , [total_dims + i ])
1120
+ expand_dim (node .name + '_expand_in_' + i_str , node .inputs [0 ]+ '_expand_out_' + i_p_str , node .inputs [0 ]+ '_expand_out_' + i_str , [rank + i ])
1121
+ expand_dim (node .name + '_expand_in_h_' + i_str , input_h + '_expand_out_h_' + i_p_str , input_h + '_expand_out_h_' + i_str , [rank + i ])
1122
+ expand_dim (node .name + '_expand_in_c_' + i_str , input_c + '_expand_out_c_' + i_p_str , input_c + '_expand_out_c_' + i_str , [rank + i ])
953
1123
954
1124
if direction == 1 :
955
1125
# Peephole from ONNX are of shape [Num Dir, 3 * hidden_size]
@@ -1288,7 +1458,6 @@ def _convert_mul(builder, node, graph, err):
1288
1458
load_input_constants (builder , node , graph , err )
1289
1459
add_broadcastable_op_chain (builder , node , err , builder .add_multiply_broadcastable )
1290
1460
1291
-
1292
1461
def _convert_nonzero (builder , node , graph , err ):
1293
1462
'''
1294
1463
convert to CoreML Where Non Zero Layer:
@@ -1588,6 +1757,84 @@ def _convert_reverse_sequence(builder, node, graph, err):
1588
1757
output_name = node .outputs [0 ]
1589
1758
)
1590
1759
1760
+ def _convert_roialign (builder , node , graph , err ):
1761
+ '''
1762
+ convert to CoreML CropResize and Pooling Layer:
1763
+ https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L2239
1764
+ https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L1702
1765
+ '''
1766
+
1767
+ target_height = node .attrs .get ('output_height' , 1 )
1768
+ target_width = node .attrs .get ('output_width' , 1 )
1769
+ mode = node .attrs .get ('mode' , 'AVERAGE' ).upper ()
1770
+ sampling_ratio = node .attrs .get ('sampling_ratio' , 0 )
1771
+ spatial_scale = node .attrs .get ('sampling_scale' , 1.0 )
1772
+
1773
+ if node .inputs [2 ] in graph .inputs :
1774
+ graph .inputs .remove (node .inputs [2 ])
1775
+
1776
+ builder .add_expand_dims (
1777
+ name = node .name + '_expand_0' ,
1778
+ input_name = node .inputs [0 ],
1779
+ output_name = node .inputs [0 ]+ '_expanded' ,
1780
+ axes = [0 ]
1781
+ )
1782
+ node .inputs [0 ] += '_expanded'
1783
+
1784
+ builder .add_expand_dims (
1785
+ name = node .name + '_expand_2' ,
1786
+ input_name = node .inputs [2 ],
1787
+ output_name = node .inputs [2 ]+ '_expanded' ,
1788
+ axes = [1 ]
1789
+ )
1790
+ node .inputs [2 ] += '_expanded'
1791
+
1792
+ builder .add_concat_nd (
1793
+ name = node .name + '_concat_indices' ,
1794
+ input_names = [node .inputs [2 ], node .inputs [1 ]],
1795
+ output_name = node .inputs [1 ]+ '_rois' ,
1796
+ axis = 1
1797
+ )
1798
+ node .inputs [1 ] += '_rois'
1799
+
1800
+ builder .add_expand_dims (
1801
+ name = node .name + '_expand_1' ,
1802
+ input_name = node .inputs [1 ],
1803
+ output_name = node .inputs [1 ]+ '_expanded' ,
1804
+ axes = [1 , 3 , 4 ]
1805
+ )
1806
+ node .inputs [1 ] += '_expanded'
1807
+
1808
+ builder .add_crop_resize (
1809
+ name = node .name + '_crop_resize' ,
1810
+ input_names = [node .inputs [0 ], node .inputs [1 ]],
1811
+ output_name = node .outputs [0 ]+ '_crop_resized' ,
1812
+ target_height = target_height * sampling_ratio ,
1813
+ target_width = target_width * sampling_ratio ,
1814
+ mode = 'ROI_ALIGN_MODE' ,
1815
+ box_indices_mode = 'CORNERS_WIDTH_FIRST' ,
1816
+ spatial_scale = spatial_scale
1817
+ )
1818
+
1819
+ builder .add_squeeze (
1820
+ name = node .name + '_squeeze' ,
1821
+ input_name = node .outputs [0 ]+ '_crop_resized' ,
1822
+ output_name = node .outputs [0 ]+ '_crop_resized_squeezed' ,
1823
+ axes = [1 ]
1824
+ )
1825
+
1826
+ builder .add_pooling (
1827
+ name = node .name + '_pool' ,
1828
+ height = sampling_ratio ,
1829
+ width = sampling_ratio ,
1830
+ layer_type = mode ,
1831
+ input_name = node .outputs [0 ]+ '_crop_resized_squeezed' ,
1832
+ output_name = node .outputs [0 ],
1833
+ stride_height = sampling_ratio ,
1834
+ stride_width = sampling_ratio ,
1835
+ padding_type = 'VALID'
1836
+ )
1837
+
1591
1838
def _convert_round (builder , node , graph , err ):
1592
1839
'''
1593
1840
convert to CoreML Round Layer:
@@ -1886,6 +2133,27 @@ def _convert_tile(builder, node, graph, err):
1886
2133
reps = node .input_tensors [node .inputs [1 ]]
1887
2134
)
1888
2135
2136
+ def _convert_topk (builder , node , graph , err ):
2137
+ '''
2138
+ convert to CoreML TopK Layer:
2139
+ https://github.com/apple/coremltools/blob/655b3be5cc0d42c3c4fa49f0f0e4a93a26b3e492/mlmodel/format/NeuralNetwork.proto#L5190
2140
+ '''
2141
+ load_input_constants (builder , node , graph , err )
2142
+ axis = node .attrs .get ("axis" , - 1 )
2143
+ bottom_k = node .attrs .get ("largest" , True ) == False
2144
+ # NOTE: Sorted order attribute is currently ignored in CoreML
2145
+ sorted_order = node .attrs .get ("sorted" , True )
2146
+ if "sorted" in node .attrs :
2147
+ err .unsupported_feature_warning (node , "Sorted Order attribute('sorted') is currently ignored in CoreML 3.0" )
2148
+
2149
+ builder .add_topk (
2150
+ name = node .name ,
2151
+ input_names = node .inputs ,
2152
+ output_names = node .outputs ,
2153
+ axis = axis ,
2154
+ use_bottom_k = bottom_k
2155
+ )
2156
+
1889
2157
def _convert_transpose (builder , node , graph , err ):
1890
2158
'''
1891
2159
convert to CoreML Transpose Layer:
@@ -1956,6 +2224,7 @@ def _convert_unsqueeze(builder, node, graph, err):
1956
2224
"Gather" : _convert_gather ,
1957
2225
"Gemm" : _convert_gemm ,
1958
2226
"Greater" : _convert_greater ,
2227
+ "GRU" : _convert_gru ,
1959
2228
"GlobalAveragePool" : _convert_pool ,
1960
2229
"GlobalMaxPool" : _convert_pool ,
1961
2230
"HardSigmoid" : _convert_hardsigmoid ,
@@ -2000,6 +2269,7 @@ def _convert_unsqueeze(builder, node, graph, err):
2000
2269
"Reshape" : _convert_reshape ,
2001
2270
"Resize" : _convert_resize ,
2002
2271
"ReverseSequence" : _convert_reverse_sequence ,
2272
+ "RoiAlign" : _convert_roialign ,
2003
2273
"Round" : _convert_round ,
2004
2274
"Scatter" : _convert_scatter ,
2005
2275
"Selu" : _convert_selu ,
@@ -2020,6 +2290,7 @@ def _convert_unsqueeze(builder, node, graph, err):
2020
2290
"Tanh" : _convert_tanh ,
2021
2291
"ThresholdedRelu" : _convert_thresholdedrelu ,
2022
2292
"Tile" : _convert_tile ,
2293
+ "TopK" : _convert_topk ,
2023
2294
"Transpose" : _convert_transpose ,
2024
2295
"Unsqueeze" : _convert_unsqueeze ,
2025
2296
"Upsample" : _convert_upsample ,
0 commit comments