Skip to content

Commit d9189ff

Browse files
author
Robert Muchsel
authored
Fit-First Descending (FFD) bias allocation; move default quantization check; AI87 RTL sims and fixes (#100)
1 parent 236a67d commit d9189ff

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+2090
-372
lines changed

.gitignore

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99
/etc/
1010
/go
1111
/include/
12-
/lib/
13-
/lib64/
12+
/lib
13+
/lib64
1414
/ninja-python-distributions
1515
/pip-selfcheck.json
1616
/pyvenv.cfg

gen-demos-max78000.sh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@ COMMON_ARGS="--device $DEVICE --compact-data --mexpress --timer 0 --display-chec
1010
./ai8xize.py --verbose --log --test-dir $TARGET --prefix cifar-100-mixed --checkpoint-file trained/ai85-cifar100-qat-mixed-q.pth.tar --config-file networks/cifar100-simple.yaml --softmax $COMMON_ARGS --boost 2.5 "$@"
1111
./ai8xize.py --verbose --log --test-dir $TARGET --prefix cifar-100-simplewide2x-mixed --checkpoint-file trained/ai85-cifar100-simplenetwide2x-qat-mixed-q.pth.tar --config-file networks/cifar100-simplewide2x.yaml --softmax $COMMON_ARGS --boost 2.5 "$@"
1212
./ai8xize.py --verbose --log --test-dir $TARGET --prefix cifar-100-residual --checkpoint-file trained/ai85-cifar100-residual-qat8-q.pth.tar --config-file networks/cifar100-ressimplenet.yaml --softmax $COMMON_ARGS --boost 2.5 "$@"
13-
./ai8xize.py --verbose --log --test-dir $TARGET --prefix kws20 --checkpoint-file trained/ai85-kws20-qat8-q.pth.tar --config-file networks/kws20-hwc.yaml --softmax $COMMON_ARGS "$@"
14-
./ai8xize.py --verbose --log --test-dir $TARGET --prefix kws20_v2 --checkpoint-file trained/ai85-kws20_v2-qat8-q.pth.tar --config-file networks/kws20-v2-hwc.yaml --softmax $COMMON_ARGS "$@"
1513
./ai8xize.py --verbose --log --test-dir $TARGET --prefix kws20_v3 --checkpoint-file trained/ai85-kws20_v3-qat8-q.pth.tar --config-file networks/kws20-v3-hwc.yaml --softmax $COMMON_ARGS "$@"
1614
./ai8xize.py --verbose --log --test-dir $TARGET --prefix faceid --checkpoint-file trained/ai85-faceid-qat8-q.pth.tar --config-file networks/faceid.yaml --fifo $COMMON_ARGS "$@"
1715
./ai8xize.py --verbose --log --test-dir $TARGET --prefix cats-dogs --checkpoint-file trained/ai85-catsdogs-qat8-q.pth.tar --config-file networks/cats-dogs-chw.yaml --softmax $COMMON_ARGS "$@"

izer/apbaccess.py

Lines changed: 20 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
"""
1010
import os
1111

12-
from . import toplevel
12+
from . import kernels, toplevel
1313
from . import tornadocnn as tc
1414
from . import unload
1515
from .eprint import eprint, wprint
@@ -72,6 +72,7 @@ def __init__(
7272
output_width=8,
7373
bias=False,
7474
wfi=True,
75+
zero_sram=False,
7576
):
7677
"""
7778
Create an APB class object that writes to memfile.
@@ -105,7 +106,6 @@ def __init__(
105106
self.blocklevel = blocklevel
106107
self.measure_energy = measure_energy
107108
self.timer = timer
108-
self.mexpress = mexpress
109109
self.pll = pll
110110
self.boost = boost
111111
self.forever = forever
@@ -121,6 +121,7 @@ def __init__(
121121
self.output_width = output_width
122122
self.bias = bias
123123
self.wfi = wfi
124+
self.zero_sram = zero_sram
124125

125126
self.data = 0
126127
self.num = 0
@@ -174,7 +175,7 @@ def write_mem(
174175
for (addr, val) in self.data_mem[group][proc][mem]:
175176
f.write(f'@{addr:04x} {val}\n')
176177

177-
if self.kernel_mem is not None:
178+
if self.kernel_mem is not None and not self.zero_sram:
178179
try:
179180
target_dir = target_dir = os.path.join(base_directory, test_name, 'masks')
180181
os.makedirs(target_dir, exist_ok=False)
@@ -231,6 +232,7 @@ def write(
231232
no_verify=False,
232233
fifo=None,
233234
base=None,
235+
fifo_wait=True,
234236
): # pylint: disable=unused-argument
235237
"""
236238
Write address `addr` and data `val` to the output file.
@@ -462,22 +464,11 @@ def write_kern(
462464
"""
463465
assert p < tc.dev.MAX_PROC
464466
assert idx < tc.dev.mask_width(p)
465-
if not calcx4:
466-
addr = tc.dev.C_GROUP_OFFS * (p // tc.dev.P_NUMPRO) \
467-
+ tc.dev.C_MRAM_BASE \
468-
+ (p % tc.dev.P_NUMPRO) * tc.dev.MASK_OFFS * 16 + idx * 16
469-
idx_x4 = idx
470-
else:
471-
if idx < tc.dev.MASK_WIDTH_SMALL:
472-
idx_x4 = (idx % 4) * (tc.dev.MASK_WIDTH_SMALL // 4) + idx // 4
473-
else:
474-
idx -= tc.dev.MASK_WIDTH_SMALL
475-
idx_x4 = (idx % 4) * ((tc.dev.MASK_WIDTH_LARGE - tc.dev.MASK_WIDTH_SMALL) // 4) \
476-
+ idx // 4
477-
idx += tc.dev.MASK_WIDTH_SMALL
478-
addr = tc.dev.C_GROUP_OFFS * (p // tc.dev.P_NUMPRO) \
479-
+ tc.dev.C_MRAM_BASE \
480-
+ (p % tc.dev.P_NUMPRO) * tc.dev.MASK_OFFS * 16 + idx_x4 * 16
467+
468+
idx_x4 = idx if not calcx4 else kernels.calcx4_index(idx)
469+
addr = tc.dev.C_GROUP_OFFS * (p // tc.dev.P_NUMPRO) \
470+
+ tc.dev.C_MRAM_BASE \
471+
+ (p % tc.dev.P_NUMPRO) * tc.dev.MASK_OFFS * 16 + idx_x4 * 16
481472

482473
if not verify_only:
483474
if self.kernel_mem is not None:
@@ -796,6 +787,7 @@ def write(
796787
no_verify=False,
797788
fifo=None,
798789
base=None,
790+
fifo_wait=True,
799791
): # pylint: disable=unused-argument
800792
"""
801793
Write address `addr` and data `val` to the .mem file.
@@ -911,6 +903,7 @@ def write(
911903
no_verify=False,
912904
fifo=None,
913905
base=None,
906+
fifo_wait=True,
914907
):
915908
"""
916909
Write address `addr` and data `val` to the .c file.
@@ -940,17 +933,19 @@ def write(
940933
else:
941934
if not self.fast_fifo:
942935
addr = self.apb_base + tc.dev.C_FIFO_BASE
943-
self.memfile.write(f'{indent}while (((*((volatile uint32_t *) '
944-
f'0x{addr + tc.dev.FIFO_STAT*4:08x})'
945-
f' & {1 << fifo})) != 0); // Wait for FIFO {fifo}\n')
936+
if fifo_wait:
937+
self.memfile.write(f'{indent}while (((*((volatile uint32_t *) '
938+
f'0x{addr + tc.dev.FIFO_STAT*4:08x})'
939+
f' & {1 << fifo})) != 0); // Wait for FIFO {fifo}\n')
946940
self.memfile.write(f'{indent}*((volatile uint32_t *) '
947941
f'0x{addr + tc.dev.FIFO_REG*4 + fifo*4:08x}) = '
948942
f'{val};{comment}\n')
949943
else:
950944
addr = tc.dev.FAST_FIFO_BASE
951-
self.memfile.write(f'{indent}while (((*((volatile uint32_t *) '
952-
f'0x{addr + tc.dev.FAST_FIFO_SR*4:08x})'
953-
f' & 2)) != 0); // Wait for FIFO\n')
945+
if fifo_wait:
946+
self.memfile.write(f'{indent}while (((*((volatile uint32_t *) '
947+
f'0x{addr + tc.dev.FAST_FIFO_SR*4:08x})'
948+
f' & 2)) != 0); // Wait for FIFO\n')
954949
self.memfile.write(f'{indent}*((volatile uint32_t *) '
955950
f'0x{addr + tc.dev.FAST_FIFO_DR*4:08x}) = '
956951
f'{val};{comment}\n')

izer/camera.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@
1010

1111

1212
VSYNC_LEADIN = 10
13-
VSYNC_HIGH = 5000
14-
VSYNC_LOW = 2000
15-
RETRACE = 318
13+
VSYNC_HIGH = 50 # 5000
14+
VSYNC_LOW = 20 # 2000
15+
RETRACE = 5 # 318
1616
FINAL = 10
1717

1818

izer/checkpoint.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ def load(
211211
if verbose:
212212
print(f'Checkpoint for epoch {checkpoint["epoch"]}, model {checkpoint["arch"]} - '
213213
'weight and bias data:')
214-
print('Layer InCh OutCh Weights Quant Shift Min Max Size '
214+
print(' InCh OutCh Weights Quant Shift Min Max Size '
215215
'Key Bias Quant Min Max Size Key')
216216
for ll in range(layers):
217217
if ll < len(weights) and weights[ll] is not None:
@@ -224,8 +224,7 @@ def load(
224224
output_shift_shape = output_shift[ll]
225225
else:
226226
output_shift_shape = 'N/A'
227-
print(f'{ll:4}: '
228-
f'{input_channels[ll]:5} {output_channels[ll]:5} '
227+
print(f'{input_channels[ll]:5} {output_channels[ll]:5} '
229228
f'{weight_shape:15} '
230229
f'{quant[ll]:5} {output_shift_shape:5} '
231230
f'{weight_min[ll]:4} {weight_max[ll]:3} {weight_size[ll]:6} '

izer/cmsisnn.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ def create_net( # pylint: disable=too-many-arguments,too-many-locals,too-many-b
6464
"""
6565
Create the CMSIS NN network.
6666
"""
67+
wprint('CMSIS-NN code generation is unsupported.')
68+
6769
if output_width[-1] != 8:
6870
wprint('CMSIS network generator does not currently support `output_width` that is not 8. '
6971
'Forcing to 8 bit.') # FIXME: Support 32-bit output

izer/commandline.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,12 +207,16 @@ def get_parser():
207207
f"(default: {camera.RETRACE})")
208208
group.add_argument('--input-csv-period', metavar='N', default=80,
209209
help="period for .csv input data (default: 80)")
210+
group.add_argument('--input-pix-clk', metavar='N', default=9,
211+
help="pixel clock for .csv input data (default: 9)")
210212
group.add_argument('--input-sync', action='store_true', default=False,
211213
help="use synchronous camera input (default: false)")
212214
group.add_argument('--input-fifo', action='store_true', default=False,
213215
help="use software FIFO to buffer input (default: false)")
214216
group.add_argument('--autogen', default='None', metavar='S',
215217
help="directory location for autogen_list (default: None)")
218+
group.add_argument('--autogen_list', default='autogen_list', metavar='S',
219+
help="file name for autogen_list")
216220
group.add_argument('--input-filename', default='input', metavar='S',
217221
help="input .mem file name base (default: 'input' -> 'input.mem')")
218222
group.add_argument('--output-filename', default='output', metavar='S',
@@ -370,7 +374,7 @@ def get_parser():
370374
wprint('`--unload` is no longer needed, and is ignored.')
371375

372376
if args.allow_streaming:
373-
wprint('`--allow-streaming` is not supported.')
377+
wprint('`--allow-streaming` is unsupported.')
374378

375379
# Set disabled legacy arguments
376380
args.unload = False

izer/compute.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,7 @@ def pool2d(
289289
pool,
290290
stride,
291291
average,
292+
dilation=(1, 1),
292293
floor=True,
293294
debug=False,
294295
):
@@ -305,7 +306,8 @@ def pool2d(
305306
for row in range(0, output_size[1]*stride[0], stride[0]):
306307
for col in range(0, output_size[2]*stride[1], stride[1]):
307308
if average:
308-
avg = np.average(data[c][row:row+pool[0], col:col+pool[1]])
309+
avg = np.average(data[c][row:row+pool[0]*dilation[0]:dilation[0],
310+
col:col+pool[1]*dilation[1]:dilation[1]])
309311
if floor:
310312
if avg < 0:
311313
val = np.ceil(avg).astype(np.int64).clip(min=-128, max=127)
@@ -314,20 +316,26 @@ def pool2d(
314316
else:
315317
val = np.floor(avg + 0.5).astype(np.int64).clip(min=-128, max=127)
316318
else:
317-
val = np.amax(data[c][row:row+pool[0], col:col+pool[1]])
319+
val = np.amax(data[c][row:row+pool[0]*dilation[0]:dilation[0],
320+
col:col+pool[1]*dilation[1]:dilation[1]])
318321
ref[c][row//stride[0]][col//stride[1]] = val
319322

320323
# Fast computation using NumPy
321-
data_pad = data[:, :(data.shape[1] - pool[0]) // stride[0] * stride[0] + pool[0],
322-
:(data.shape[2] - pool[1]) // stride[1] * stride[1] + pool[1], ...]
324+
data_pad = data[
325+
:,
326+
:(data.shape[1] - pool[0] + dilation[0] - 1) // stride[0] * stride[0] + pool[0],
327+
:(data.shape[2] - pool[1] + dilation[1] - 1) // stride[1] * stride[1] + pool[1],
328+
...
329+
]
323330
h, w = data_pad.strides[1:]
324331

325332
view = as_strided(data_pad,
326333
shape=(data_pad.shape[0],
327-
1 + (data_pad.shape[1]-pool[0]) // stride[0],
328-
1 + (data_pad.shape[2]-pool[1]) // stride[1],
334+
1 + (data_pad.shape[1] - pool[0] - dilation[0] + 1) // stride[0],
335+
1 + (data_pad.shape[2] - pool[1] - dilation[1] + 1) // stride[1],
329336
pool[0], pool[1]),
330-
strides=(data_pad.strides[0], stride[0] * h, stride[1] * w, h, w),
337+
strides=(data_pad.strides[0], stride[0] * h,
338+
stride[1] * w, h * dilation[0], w * dilation[1]),
331339
writeable=False)
332340

333341
if average:
@@ -343,7 +351,7 @@ def pool2d(
343351
if not match:
344352
eprint('NumPy <-> Python mismatch in compute.pool2d')
345353

346-
assert pooled.shape == tuple(output_size)
354+
assert pooled.shape == tuple(output_size), f'shape mismatch {pooled.shape} vs {output_size}'
347355

348356
return pooled
349357

@@ -355,6 +363,7 @@ def pool1d(
355363
pool,
356364
stride,
357365
average,
366+
dilation=1,
358367
floor=True,
359368
debug=False,
360369
): # pylint: disable=unused-argument
@@ -367,7 +376,7 @@ def pool1d(
367376
for c in range(input_size[0]):
368377
for x in range(0, output_size[1]*stride, stride):
369378
if average:
370-
avg = np.average(data[c][x:x+pool])
379+
avg = np.average(data[c][x:x+pool*dilation:dilation])
371380
if avg < 0:
372381
val = np.ceil(avg).astype(np.int64).clip(min=-128, max=127)
373382
else:

izer/izer.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,10 @@ def main():
250250
conv_groups = params['conv_groups'][:layers]
251251
write_gap = params['write_gap'][:layers]
252252
bypass = params['bypass'][:layers]
253+
bias_group_map = params['bias_group_map'][:layers]
254+
calcx4 = [True] * layers if args.calcx4 else params['calcx4'][:layers]
255+
readahead = [True] * layers if args.rd_ahead else params['readahead'][:layers]
256+
pool_dilation = params['pool_dilation'][:layers]
253257

254258
# Command line override
255259
if args.input_offset is not None:
@@ -304,6 +308,8 @@ def main():
304308
while ll < layers:
305309
if input_channels[ll] <= 0:
306310
eprint(f'Must specify `in_channels` for layer {ll}.')
311+
if quantization[ll] is None:
312+
quantization[ll] = 8 if not bypass[ll] and operator[ll] != op.NONE else 0 # Defaults
307313
if operator[ll] != op.NONE and not bypass[ll]:
308314
if quantization[ll] == -1:
309315
w = np.abs(weights[ll])
@@ -366,13 +372,13 @@ def main():
366372
eprint(f'{op.string(operator[ll])} in layer {ll} does not support non-square '
367373
f'pooling stride (currently set to '
368374
f'{pool_stride[ll][0]}x{pool_stride[ll][1]}).')
369-
pooled_size = [(input_dim[ll][0] + pool_stride[ll][0] - pool[ll][0])
370-
// pool_stride[ll][0],
371-
(input_dim[ll][1] + pool_stride[ll][1] - pool[ll][1])
372-
// pool_stride[ll][1]]
375+
pooled_size = [(input_dim[ll][0] + pool_stride[ll][0] - pool[ll][0]
376+
- pool_dilation[ll][0] + 1) // pool_stride[ll][0],
377+
(input_dim[ll][1] + pool_stride[ll][1] - pool[ll][1]
378+
- pool_dilation[ll][1] + 1) // pool_stride[ll][1]]
373379
else:
374-
pooled_size = [(input_dim[ll][0] + pool_stride[ll][0] - pool[ll][0])
375-
// pool_stride[ll][0],
380+
pooled_size = [(input_dim[ll][0] + pool_stride[ll][0] - pool[ll][0]
381+
- pool_dilation[ll][0] + 1) // pool_stride[ll][0],
376382
1]
377383

378384
pooled_dim[ll] = pooled_size
@@ -575,24 +581,26 @@ def main():
575581
measure_energy=args.energy,
576582
timer=args.timer,
577583
board_name=args.board_name,
578-
rd_ahead=args.rd_ahead,
579-
calcx4=args.calcx4,
584+
rd_ahead=readahead,
585+
calcx4=calcx4,
580586
rtl_preload=args.rtl_preload,
581587
result_output=args.result_output,
582588
weight_start=args.weight_start,
583589
wfi=args.wfi,
584590
bypass=bypass,
591+
bias_group_map=bias_group_map,
592+
pool_dilation=pool_dilation,
593+
input_pix_clk=args.input_pix_clk,
585594
)
586595
if not args.embedded_code and args.autogen.lower() != 'none':
587596
rtlsim.append_regression(
588597
args.top_level,
589598
tn,
590599
args.queue_name,
591600
args.autogen,
601+
args.autogen_list,
592602
)
593603
else:
594-
wprint('CMSIS-NN code generation is unsupported.')
595-
596604
cmsisnn.create_net(
597605
args.prefix,
598606
args.verbose,

0 commit comments

Comments
 (0)