Skip to content

Commit 8f21ff1

Browse files
author
Robert Muchsel
authored
ConvTranspose2d fixes, bias allocation, AI87 updates (#107)
* ConvTranspose2d fixes * Allow picking bias from x16 that is not actively processing * AI87 updates * Fix kernel tests for TF Conv1d
1 parent 450865b commit 8f21ff1

21 files changed

+580
-257
lines changed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# MAX78000 Model Training and Synthesis
22

3-
_February 10, 2021_
3+
_February 11, 2021_
44

55
The Maxim Integrated AI project is comprised of four repositories:
66

@@ -688,7 +688,7 @@ The MAX78000 hardware does not support arbitrary network parameters. Specificall
688688
* `ConvTranspose2d`:
689689

690690
* Kernel sizes must be 3×3.
691-
* Padding can be 0, 1, or 2.
691+
* Padding must be 1 *(Note: hardware supports additional padding modes, but there is no direct equivalent in PyToch)*.
692692
* Stride is fixed to [2, 2]. Output padding is fixed to 1.
693693

694694
* A programmable layer-specific shift operator is available at the output of a convolution, see [`output_shift` (Optional)](#output_shift \(Optional\)).
@@ -1558,7 +1558,7 @@ Example:
15581558

15591559
For layers that use a bias, this key can specify one or more bias memories that should be used. By default, the software uses a “Fit First Descending (FFD)” allocation algorithm that considers largest bias lengths first, and then the layer number, and places each bias in the available group with the most available space, descending to the smallest bias length.
15601560

1561-
“Available groups” is layer specific and is a list of the groups that have enabled processors for the respective layer. `bias_group` must reference one or more of the available groups. This check can be overridden using the command line option `--ignore-bias-groups` that allows any group or list of groups for any layer.
1561+
“Available groups” is layer specific and is a list of the groups that have enabled processors for the respective layer. `bias_group` must reference one or more of the available groups.
15621562

15631563
`bias_group` can be a list of integers or a single integer.
15641564

README.pdf

-480 Bytes
Binary file not shown.

izer/apbaccess.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ def write_mem(
175175
for (addr, val) in self.data_mem[group][proc][mem]:
176176
f.write(f'@{addr:04x} {val}\n')
177177

178-
if self.kernel_mem is not None and not self.zero_sram:
178+
if self.kernel_mem is not None:
179179
try:
180180
target_dir = target_dir = os.path.join(base_directory, test_name, 'masks')
181181
os.makedirs(target_dir, exist_ok=False)

izer/checkpoint.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def load(
115115
weight_min.append(w_min)
116116
weight_max.append(w_max)
117117

118-
if op == 'conv2d' and operator[seq] == opn.CONVTRANSPOSE2D:
118+
if operator[seq] == opn.CONVTRANSPOSE2D:
119119
# For ConvTranspose2d, flip the weights as follows:
120120
w = np.flip(w, axis=(2, 3)).swapaxes(0, 1)
121121

@@ -211,8 +211,8 @@ def load(
211211
if verbose:
212212
print(f'Checkpoint for epoch {checkpoint["epoch"]}, model {checkpoint["arch"]} - '
213213
'weight and bias data:')
214-
print(' InCh OutCh Weights Quant Shift Min Max Size '
215-
'Key Bias Quant Min Max Size Key')
214+
print(' InCh OutCh Weights Quant Shift Min Max Size '
215+
'Key Bias Quant Min Max Size Key')
216216
for ll in range(layers):
217217
if ll < len(weights) and weights[ll] is not None:
218218
weight_shape = str(weights[ll].shape)
@@ -227,10 +227,10 @@ def load(
227227
print(f'{input_channels[ll]:5} {output_channels[ll]:5} '
228228
f'{weight_shape:15} '
229229
f'{quant[ll]:5} {output_shift_shape:5} '
230-
f'{weight_min[ll]:4} {weight_max[ll]:3} {weight_size[ll]:6} '
230+
f'{weight_min[ll]:4} {weight_max[ll]:4} {weight_size[ll]:6} '
231231
f'{weight_keys[ll]:35} '
232232
f'{bias_shape:10} '
233-
f'{bias_quant[ll]:5} {bias_min[ll]:4} {bias_max[ll]:3} {bias_size[ll]:4} '
233+
f'{bias_quant[ll]:5} {bias_min[ll]:4} {bias_max[ll]:4} {bias_size[ll]:4} '
234234
f'{bias_keys[ll]:25}')
235235
print(f'TOTAL: {layers} layers, {param_count:,} parameters, {param_size:,} bytes')
236236

izer/commandline.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,8 @@ def get_parser():
198198
help="specify start offset for weights (debug, default: 0)")
199199
group.add_argument('--ignore-bias-groups', action='store_true', default=False,
200200
help="do not force `bias_group` to use an active group (default: false)")
201+
group.add_argument('--kernel-format', default='{0:4}', metavar='S',
202+
help="print format for kernels (default: '{0:4}')")
201203

202204
# RTL sim
203205
group = parser.add_argument_group('RTL simulation')

izer/compute.py

Lines changed: 48 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def conv2d(
6666
fractional_stride,
6767
output_pad,
6868
groups=1,
69-
debug=False,
69+
debug=False, # pylint: disable=unused-argument
7070
):
7171
"""
7272
Compute a 2D convolution.
@@ -77,80 +77,24 @@ def conv2d(
7777
in_channels = input_size[0]
7878
out_channels = output_size[0]
7979

80-
if debug:
81-
# Slow route using pure Python
82-
ref = np.full(shape=output_size, fill_value=np.nan, dtype=np.int64)
83-
debug_print('k,c,x,y,weight,data,prod,cacc,acc')
84-
85-
for k in range(out_channels):
86-
for y in range(-pad[0],
87-
input_size[1] - dilation[0] * (kernel_size[0] - 1) + pad[0],
88-
stride[0]):
89-
for y_frac in range(fractional_stride[0]):
90-
for x in range(-pad[1],
91-
input_size[2] - dilation[1] * (kernel_size[1] - 1) + pad[1],
92-
stride[1]):
93-
for x_frac in range(fractional_stride[1]):
94-
val = np.int64(0)
95-
c = 0
96-
while True:
97-
dc = c if groups == 1 else c + k * (in_channels // groups)
98-
sval = np.int(0)
99-
for h in range(kernel_size[0]):
100-
for w in range(kernel_size[1]):
101-
ypos = (y + pad[0])*fractional_stride[0] - pad[0] \
102-
+ y_frac + h * dilation[0]
103-
yd, yr = divmod(ypos, fractional_stride[0])
104-
xpos = (x + pad[1])*fractional_stride[1] - pad[1] \
105-
+ x_frac + w * dilation[1]
106-
xd, xr = divmod(xpos, fractional_stride[1])
107-
if yr == 0 and 0 <= yd < input_size[1] and \
108-
xr == 0 and 0 <= xd < input_size[2]:
109-
prod = weight[k][c][h][w] * data[dc][yd][xd]
110-
sval += prod
111-
val += prod
112-
stats.true_macc += 1
113-
debug_print(
114-
f'{k},{c},{x},{y},{weight[k][c][h][w]},'
115-
f'{data[dc][yd][xd]},{prod},{sval},{val}'
116-
)
117-
c += 16
118-
if c >= in_channels // groups:
119-
c = (c + 1) % 16
120-
if c in (0, in_channels // groups):
121-
break
122-
123-
if bias is not None:
124-
val += bias[k]
125-
debug_print(
126-
f' adding bias: {bias[k]} -> result: {val}'
127-
)
128-
129-
ref[k][
130-
((y + pad[0])*fractional_stride[0] + y_frac) // stride[0]
131-
][
132-
((x + pad[1])*fractional_stride[1] + x_frac) // stride[1]
133-
] = val
134-
135-
# Fast computation using NumPy
136-
13780
# Stretch data for fractionally-strided convolution
13881
if fractional_stride[0] > 1 or fractional_stride[1] > 1:
13982
ndata = np.zeros((data.shape[0],
140-
data.shape[1] * fractional_stride[0],
141-
data.shape[2] * fractional_stride[1]),
83+
data.shape[1] * fractional_stride[0] - 1,
84+
data.shape[2] * fractional_stride[1] - 1),
14285
dtype=data.dtype)
14386
ndata[:, 0::fractional_stride[0], 0::fractional_stride[1]] = data
14487
data = ndata
14588

146-
# Create zero padding around data and stretch weights for dilation.
89+
# Create zero padding around data
14790
if pad[0] or pad[1] or output_pad[0] or output_pad[1]:
14891
data = np.pad(data, pad_width=((0, 0),
149-
(pad[0], pad[0]),
150-
(pad[1], pad[1])),
92+
(pad[0], pad[0] + output_pad[0]),
93+
(pad[1], pad[1] + output_pad[1])),
15194
mode='constant', constant_values=0)
15295

15396
if dilation[0] > 1 or dilation[1] > 1:
97+
# Stretch weights for dilation
15498
nweight = np.zeros((weight.shape[0], weight.shape[1],
15599
(kernel_size[0] - 1) * dilation[0] + 1,
156100
(kernel_size[1] - 1) * dilation[1] + 1),
@@ -182,15 +126,51 @@ def conv2d(
182126
for k in range(out_channels):
183127
output[k] += bias[k]
184128

185-
if debug:
186-
if not (ref == output).all():
187-
eprint('NumPy <-> Python mismatch in compute.conv2d')
188-
189-
assert output.shape == tuple(output_size), f'Shape mismatch: {output.shape} vs {output_size}'
129+
assert output.shape == tuple(output_size), \
130+
f'Shape mismatch: NumPy result {output.shape} vs expected {output_size}'
190131

191132
return output
192133

193134

135+
def convtranspose2d(
136+
data,
137+
weight,
138+
bias,
139+
input_size,
140+
output_size,
141+
kernel_size,
142+
stride,
143+
pad,
144+
dilation,
145+
fractional_stride,
146+
output_pad,
147+
groups=1,
148+
debug=False,
149+
):
150+
"""
151+
Compute a transposed 2D convolution.
152+
"""
153+
154+
return conv2d(
155+
data,
156+
weight,
157+
bias,
158+
input_size,
159+
output_size,
160+
kernel_size,
161+
stride,
162+
(
163+
dilation[0] * (kernel_size[0] - 1) - pad[0],
164+
dilation[1] * (kernel_size[1] - 1) - pad[1]
165+
),
166+
dilation,
167+
fractional_stride,
168+
output_pad,
169+
groups,
170+
debug,
171+
)
172+
173+
194174
def conv1d(
195175
data,
196176
weight,

izer/izer.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,7 @@ def main():
229229
pool = params['pool'][:layers]
230230
pool_stride = params['pool_stride'][:layers]
231231
padding = params['padding'][:layers]
232+
output_padding = params['output_padding'][:layers]
232233
stride = params['stride'][:layers]
233234
dilation = params['dilation'][:layers]
234235
big_data = params['big_data'][:layers]
@@ -399,14 +400,12 @@ def main():
399400
(pooled_size[1] - dilation[ll][1] * (kernel_size[ll][1] - 1)
400401
- 1 + 2 * padding[ll][1]) // stride[ll][1] + 1]
401402
elif operator[ll] == op.CONVTRANSPOSE2D:
402-
# output padding is always 1
403-
output_padding = 1
404403
output_dim[ll] = [(pooled_size[0] - 1) * stride[ll][0] - 2 * padding[ll][0]
405404
+ dilation[ll][0] * (kernel_size[ll][0] - 1)
406-
+ output_padding + 1,
405+
+ output_padding[ll][0] + 1,
407406
(pooled_size[1] - 1) * stride[ll][1] - 2 * padding[ll][1]
408407
+ dilation[ll][1] * (kernel_size[ll][1] - 1)
409-
+ output_padding + 1]
408+
+ output_padding[ll][1] + 1]
410409
else: # Element-wise
411410
output_dim[ll] = [pooled_size[0], pooled_size[1]]
412411
if flatten[ll]:
@@ -594,6 +593,8 @@ def main():
594593
fifo_go=args.fifo_go,
595594
pretend_zero_sram=args.pretend_zero_sram,
596595
ignore_bias_groups=args.ignore_bias_groups,
596+
output_padding=output_padding,
597+
kernel_format=args.kernel_format,
597598
)
598599
if not args.embedded_code and args.autogen.lower() != 'none':
599600
rtlsim.append_regression(

izer/kbias.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ def load(
3232
processor_map,
3333
output_processor_map,
3434
out_expand,
35+
groups_used,
3536
debug, # pylint: disable=unused-argument
3637
):
3738
"""
@@ -74,7 +75,7 @@ def load(
7475
if ll == 0 and streaming[ll] and not tc.dev.SUPPORT_STREAM_BIAS:
7576
bias_len[ll] += 1 # Work around a problem on AI85
7677

77-
bias_map += [(ll, group_map[ll] if bias_group_map[ll] is None else bias_group_map[ll],
78+
bias_map += [(ll, groups_used if bias_group_map[ll] is None else bias_group_map[ll],
7879
bias_len[ll])]
7980
continue
8081

izer/kernels.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ def load( # pylint: disable=too-many-branches,too-many-statements
7979
api=False,
8080
start_offs=0,
8181
bypass=None,
82+
zero_sram=False,
8283
):
8384
"""
8485
Stack `kernel` values and write them to C code (for `embedded_code` if `True` or
@@ -133,12 +134,15 @@ def load( # pylint: disable=too-many-branches,too-many-statements
133134
in_exp = 1
134135
in_chan = in_expand_thresh[ll]
135136
elif calcx4[ll]:
137+
# FIXME for output channels % 4 != 0
136138
kernel_reshaped = kernel[ll].reshape(
137-
output_chan[ll],
139+
output_chan[ll] // 4,
140+
4,
138141
in_expand[ll],
139-
-1,
140-
).swapaxes(0, 1).reshape(
141-
kernel[ll].shape,
142+
in_expand_thresh[ll],
143+
kernel_size[ll][0] * kernel_size[ll][1],
144+
).transpose(0, 2, 1, 3, -1).reshape(
145+
kernel[ll].shape
142146
)
143147
in_exp = in_expand[ll]
144148
in_chan = input_chan[ll]
@@ -151,15 +155,15 @@ def load( # pylint: disable=too-many-branches,too-many-statements
151155
kernel_reshaped = kernel_reshaped.copy().clip(-1, 0)
152156

153157
if np.ndim(kernel_reshaped) > 2:
154-
if kernel_reshaped.shape[-2] != kernel_size[ll][0] \
155-
or kernel_reshaped.shape[-1] != kernel_size[ll][1]:
158+
if kernel_reshaped.shape[-1] != kernel_size[ll][0] \
159+
or kernel_reshaped.shape[-2] != kernel_size[ll][1]:
156160
eprint(f'The configured kernel dimensions ({kernel_size[ll][0]}x'
157-
f'{kernel_size[ll][1]}) for layer {ll} do not match the binary weights '
158-
f'({kernel_reshaped.shape[-2]}x{kernel_reshaped.shape[-1]})!')
161+
f'{kernel_size[ll][1]}) for layer {ll} do not match the weights file '
162+
f'({kernel_reshaped.shape[-1]}x{kernel_reshaped.shape[-2]})!')
159163
else:
160164
if kernel_reshaped.shape[-1] != kernel_size[ll][0]:
161165
eprint(f'The configured kernel dimensions ({kernel_size[ll][0]}) '
162-
f'for layer {ll} do not match the binary weights '
166+
f'for layer {ll} do not match the weights file '
163167
f'({kernel_reshaped.shape[-1]})!')
164168

165169
proc_map = processor_map[ll]
@@ -419,7 +423,8 @@ def add_kernel_data(ll, p, col_target, b):
419423
ll = kernel_map[p][col]
420424
if ll != _INVALID_VALUE:
421425
k = kernel_data[p][col]
422-
apb.write_kern(ll, p, col, k, calcx4=calcx4[ll])
426+
if not zero_sram or np.any(k != 0):
427+
apb.write_kern(ll, p, col, k, calcx4=calcx4[ll])
423428
apb.function_footer() # load_weights()
424429

425430
if embedded_code or mexpress:

0 commit comments

Comments
 (0)