analogdevicesinc
diff --git a/‎.gitignore
Lines changed: 2 additions & 2 deletions b/‎.gitignore
Lines changed: 2 additions & 2 deletions
diff --git a/‎gen-demos-max78000.sh
Lines changed: 0 additions & 2 deletions b/‎gen-demos-max78000.sh
Lines changed: 0 additions & 2 deletions
diff --git a/‎izer/apbaccess.py
Lines changed: 20 additions & 25 deletions b/‎izer/apbaccess.py
Lines changed: 20 additions & 25 deletions
diff --git a/‎izer/camera.py
Lines changed: 3 additions & 3 deletions b/‎izer/camera.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎izer/checkpoint.py
Lines changed: 2 additions & 3 deletions b/‎izer/checkpoint.py
Lines changed: 2 additions & 3 deletions
diff --git a/‎izer/cmsisnn.py
Lines changed: 2 additions & 0 deletions b/‎izer/cmsisnn.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎izer/commandline.py
Lines changed: 5 additions & 1 deletion b/‎izer/commandline.py
Lines changed: 5 additions & 1 deletion
diff --git a/‎izer/compute.py
Lines changed: 18 additions & 9 deletions b/‎izer/compute.py
Lines changed: 18 additions & 9 deletions
diff --git a/‎izer/izer.py
Lines changed: 18 additions & 10 deletions b/‎izer/izer.py
Lines changed: 18 additions & 10 deletions
@@ -9,8 +9,8 @@
 /etc/
 /go
 /include/
-/lib/
-/lib64/
+/lib
+/lib64
 /ninja-python-distributions
 /pip-selfcheck.json
 /pyvenv.cfg
 
@@ -10,8 +10,6 @@ COMMON_ARGS="--device $DEVICE --compact-data --mexpress --timer 0 --display-chec
 ./ai8xize.py --verbose --log --test-dir $TARGET --prefix cifar-100-mixed --checkpoint-file trained/ai85-cifar100-qat-mixed-q.pth.tar --config-file networks/cifar100-simple.yaml --softmax $COMMON_ARGS --boost 2.5 "$@"
 ./ai8xize.py --verbose --log --test-dir $TARGET --prefix cifar-100-simplewide2x-mixed --checkpoint-file trained/ai85-cifar100-simplenetwide2x-qat-mixed-q.pth.tar --config-file networks/cifar100-simplewide2x.yaml --softmax $COMMON_ARGS --boost 2.5 "$@"
 ./ai8xize.py --verbose --log --test-dir $TARGET --prefix cifar-100-residual --checkpoint-file trained/ai85-cifar100-residual-qat8-q.pth.tar --config-file networks/cifar100-ressimplenet.yaml --softmax $COMMON_ARGS --boost 2.5 "$@"
-./ai8xize.py --verbose --log --test-dir $TARGET --prefix kws20 --checkpoint-file trained/ai85-kws20-qat8-q.pth.tar --config-file networks/kws20-hwc.yaml --softmax $COMMON_ARGS "$@"
-./ai8xize.py --verbose --log --test-dir $TARGET --prefix kws20_v2 --checkpoint-file trained/ai85-kws20_v2-qat8-q.pth.tar --config-file networks/kws20-v2-hwc.yaml --softmax $COMMON_ARGS "$@"
 ./ai8xize.py --verbose --log --test-dir $TARGET --prefix kws20_v3 --checkpoint-file trained/ai85-kws20_v3-qat8-q.pth.tar --config-file networks/kws20-v3-hwc.yaml --softmax $COMMON_ARGS "$@"
 ./ai8xize.py --verbose --log --test-dir $TARGET --prefix faceid --checkpoint-file trained/ai85-faceid-qat8-q.pth.tar --config-file networks/faceid.yaml --fifo $COMMON_ARGS "$@"
 ./ai8xize.py --verbose --log --test-dir $TARGET --prefix cats-dogs --checkpoint-file trained/ai85-catsdogs-qat8-q.pth.tar --config-file networks/cats-dogs-chw.yaml --softmax $COMMON_ARGS "$@"
@@ -9,7 +9,7 @@
 """
 import os
 
-from . import toplevel
+from . import kernels, toplevel
 from . import tornadocnn as tc
 from . import unload
 from .eprint import eprint, wprint
@@ -72,6 +72,7 @@ def __init__(
             output_width=8,
             bias=False,
             wfi=True,
+            zero_sram=False,
     ):
         """
         Create an APB class object that writes to memfile.
@@ -105,7 +106,6 @@ def __init__(
         self.blocklevel = blocklevel
         self.measure_energy = measure_energy
         self.timer = timer
-        self.mexpress = mexpress
         self.pll = pll
         self.boost = boost
         self.forever = forever
@@ -121,6 +121,7 @@ def __init__(
         self.output_width = output_width
         self.bias = bias
         self.wfi = wfi
+        self.zero_sram = zero_sram
 
         self.data = 0
         self.num = 0
@@ -174,7 +175,7 @@ def write_mem(
                                 for (addr, val) in self.data_mem[group][proc][mem]:
                                     f.write(f'@{addr:04x} {val}\n')
 
-        if self.kernel_mem is not None:
+        if self.kernel_mem is not None and not self.zero_sram:
             try:
                 target_dir = target_dir = os.path.join(base_directory, test_name, 'masks')
                 os.makedirs(target_dir, exist_ok=False)
@@ -231,6 +232,7 @@ def write(
             no_verify=False,
             fifo=None,
             base=None,
+            fifo_wait=True,
     ):  # pylint: disable=unused-argument
         """
         Write address `addr` and data `val` to the output file.
@@ -462,22 +464,11 @@ def write_kern(
         """
         assert p < tc.dev.MAX_PROC
         assert idx < tc.dev.mask_width(p)
-        if not calcx4:
-            addr = tc.dev.C_GROUP_OFFS * (p // tc.dev.P_NUMPRO) \
-                + tc.dev.C_MRAM_BASE \
-                + (p % tc.dev.P_NUMPRO) * tc.dev.MASK_OFFS * 16 + idx * 16
-            idx_x4 = idx
-        else:
-            if idx < tc.dev.MASK_WIDTH_SMALL:
-                idx_x4 = (idx % 4) * (tc.dev.MASK_WIDTH_SMALL // 4) + idx // 4
-            else:
-                idx -= tc.dev.MASK_WIDTH_SMALL
-                idx_x4 = (idx % 4) * ((tc.dev.MASK_WIDTH_LARGE - tc.dev.MASK_WIDTH_SMALL) // 4) \
-                    + idx // 4
-                idx += tc.dev.MASK_WIDTH_SMALL
-            addr = tc.dev.C_GROUP_OFFS * (p // tc.dev.P_NUMPRO) \
-                + tc.dev.C_MRAM_BASE \
-                + (p % tc.dev.P_NUMPRO) * tc.dev.MASK_OFFS * 16 + idx_x4 * 16
+
+        idx_x4 = idx if not calcx4 else kernels.calcx4_index(idx)
+        addr = tc.dev.C_GROUP_OFFS * (p // tc.dev.P_NUMPRO) \
+            + tc.dev.C_MRAM_BASE \
+            + (p % tc.dev.P_NUMPRO) * tc.dev.MASK_OFFS * 16 + idx_x4 * 16
 
         if not verify_only:
             if self.kernel_mem is not None:
@@ -796,6 +787,7 @@ def write(
             no_verify=False,
             fifo=None,
             base=None,
+            fifo_wait=True,
     ):  # pylint: disable=unused-argument
         """
         Write address `addr` and data `val` to the .mem file.
@@ -911,6 +903,7 @@ def write(
             no_verify=False,
             fifo=None,
             base=None,
+            fifo_wait=True,
     ):
         """
         Write address `addr` and data `val` to the .c file.
@@ -940,17 +933,19 @@ def write(
         else:
             if not self.fast_fifo:
                 addr = self.apb_base + tc.dev.C_FIFO_BASE
-                self.memfile.write(f'{indent}while (((*((volatile uint32_t *) '
-                                   f'0x{addr + tc.dev.FIFO_STAT*4:08x})'
-                                   f' & {1 << fifo})) != 0); // Wait for FIFO {fifo}\n')
+                if fifo_wait:
+                    self.memfile.write(f'{indent}while (((*((volatile uint32_t *) '
+                                       f'0x{addr + tc.dev.FIFO_STAT*4:08x})'
+                                       f' & {1 << fifo})) != 0); // Wait for FIFO {fifo}\n')
                 self.memfile.write(f'{indent}*((volatile uint32_t *) '
                                    f'0x{addr + tc.dev.FIFO_REG*4 + fifo*4:08x}) = '
                                    f'{val};{comment}\n')
             else:
                 addr = tc.dev.FAST_FIFO_BASE
-                self.memfile.write(f'{indent}while (((*((volatile uint32_t *) '
-                                   f'0x{addr + tc.dev.FAST_FIFO_SR*4:08x})'
-                                   f' & 2)) != 0); // Wait for FIFO\n')
+                if fifo_wait:
+                    self.memfile.write(f'{indent}while (((*((volatile uint32_t *) '
+                                       f'0x{addr + tc.dev.FAST_FIFO_SR*4:08x})'
+                                       f' & 2)) != 0); // Wait for FIFO\n')
                 self.memfile.write(f'{indent}*((volatile uint32_t *) '
                                    f'0x{addr + tc.dev.FAST_FIFO_DR*4:08x}) = '
                                    f'{val};{comment}\n')
 
@@ -10,9 +10,9 @@
 
 
 VSYNC_LEADIN = 10
-VSYNC_HIGH = 5000
-VSYNC_LOW = 2000
-RETRACE = 318
+VSYNC_HIGH = 50  # 5000
+VSYNC_LOW = 20  # 2000
+RETRACE = 5  # 318
 FINAL = 10
 
 
 
@@ -211,7 +211,7 @@ def load(
     if verbose:
         print(f'Checkpoint for epoch {checkpoint["epoch"]}, model {checkpoint["arch"]} - '
               'weight and bias data:')
-        print('Layer  InCh OutCh  Weights         Quant Shift  Min Max   Size '
+        print(' InCh OutCh  Weights         Quant Shift  Min Max   Size '
               'Key                                 Bias       Quant  Min Max Size Key')
         for ll in range(layers):
             if ll < len(weights) and weights[ll] is not None:
@@ -224,8 +224,7 @@ def load(
                     output_shift_shape = output_shift[ll]
                 else:
                     output_shift_shape = 'N/A'
-                print(f'{ll:4}: '
-                      f'{input_channels[ll]:5} {output_channels[ll]:5}  '
+                print(f'{input_channels[ll]:5} {output_channels[ll]:5}  '
                       f'{weight_shape:15} '
                       f'{quant[ll]:5} {output_shift_shape:5} '
                       f'{weight_min[ll]:4} {weight_max[ll]:3} {weight_size[ll]:6} '
 
@@ -64,6 +64,8 @@ def create_net(  # pylint: disable=too-many-arguments,too-many-locals,too-many-b
     """
     Create the CMSIS NN network.
     """
+    wprint('CMSIS-NN code generation is unsupported.')
+
     if output_width[-1] != 8:
         wprint('CMSIS network generator does not currently support `output_width` that is not 8. '
                'Forcing to 8 bit.')  # FIXME: Support 32-bit output
 
@@ -207,12 +207,16 @@ def get_parser():
                             f"(default: {camera.RETRACE})")
     group.add_argument('--input-csv-period', metavar='N', default=80,
                        help="period for .csv input data (default: 80)")
+    group.add_argument('--input-pix-clk', metavar='N', default=9,
+                       help="pixel clock for .csv input data (default: 9)")
     group.add_argument('--input-sync', action='store_true', default=False,
                        help="use synchronous camera input (default: false)")
     group.add_argument('--input-fifo', action='store_true', default=False,
                        help="use software FIFO to buffer input (default: false)")
     group.add_argument('--autogen', default='None', metavar='S',
                        help="directory location for autogen_list (default: None)")
+    group.add_argument('--autogen_list', default='autogen_list', metavar='S',
+                       help="file name for autogen_list")
     group.add_argument('--input-filename', default='input', metavar='S',
                        help="input .mem file name base (default: 'input' -> 'input.mem')")
     group.add_argument('--output-filename', default='output', metavar='S',
@@ -370,7 +374,7 @@ def get_parser():
         wprint('`--unload` is no longer needed, and is ignored.')
 
     if args.allow_streaming:
-        wprint('`--allow-streaming` is not supported.')
+        wprint('`--allow-streaming` is unsupported.')
 
     # Set disabled legacy arguments
     args.unload = False
 
@@ -289,6 +289,7 @@ def pool2d(
         pool,
         stride,
         average,
+        dilation=(1, 1),
         floor=True,
         debug=False,
 ):
@@ -305,7 +306,8 @@ def pool2d(
             for row in range(0, output_size[1]*stride[0], stride[0]):
                 for col in range(0, output_size[2]*stride[1], stride[1]):
                     if average:
-                        avg = np.average(data[c][row:row+pool[0], col:col+pool[1]])
+                        avg = np.average(data[c][row:row+pool[0]*dilation[0]:dilation[0],
+                                                 col:col+pool[1]*dilation[1]:dilation[1]])
                         if floor:
                             if avg < 0:
                                 val = np.ceil(avg).astype(np.int64).clip(min=-128, max=127)
@@ -314,20 +316,26 @@ def pool2d(
                         else:
                             val = np.floor(avg + 0.5).astype(np.int64).clip(min=-128, max=127)
                     else:
-                        val = np.amax(data[c][row:row+pool[0], col:col+pool[1]])
+                        val = np.amax(data[c][row:row+pool[0]*dilation[0]:dilation[0],
+                                              col:col+pool[1]*dilation[1]:dilation[1]])
                     ref[c][row//stride[0]][col//stride[1]] = val
 
     # Fast computation using NumPy
-    data_pad = data[:, :(data.shape[1] - pool[0]) // stride[0] * stride[0] + pool[0],
-                    :(data.shape[2] - pool[1]) // stride[1] * stride[1] + pool[1], ...]
+    data_pad = data[
+        :,
+        :(data.shape[1] - pool[0] + dilation[0] - 1) // stride[0] * stride[0] + pool[0],
+        :(data.shape[2] - pool[1] + dilation[1] - 1) // stride[1] * stride[1] + pool[1],
+        ...
+    ]
     h, w = data_pad.strides[1:]
 
     view = as_strided(data_pad,
                       shape=(data_pad.shape[0],
-                             1 + (data_pad.shape[1]-pool[0]) // stride[0],
-                             1 + (data_pad.shape[2]-pool[1]) // stride[1],
+                             1 + (data_pad.shape[1] - pool[0] - dilation[0] + 1) // stride[0],
+                             1 + (data_pad.shape[2] - pool[1] - dilation[1] + 1) // stride[1],
                              pool[0], pool[1]),
-                      strides=(data_pad.strides[0], stride[0] * h, stride[1] * w, h, w),
+                      strides=(data_pad.strides[0], stride[0] * h,
+                               stride[1] * w, h * dilation[0], w * dilation[1]),
                       writeable=False)
 
     if average:
@@ -343,7 +351,7 @@ def pool2d(
         if not match:
             eprint('NumPy <-> Python mismatch in compute.pool2d')
 
-    assert pooled.shape == tuple(output_size)
+    assert pooled.shape == tuple(output_size), f'shape mismatch {pooled.shape} vs {output_size}'
 
     return pooled
 
@@ -355,6 +363,7 @@ def pool1d(
         pool,
         stride,
         average,
+        dilation=1,
         floor=True,
         debug=False,
 ):  # pylint: disable=unused-argument
@@ -367,7 +376,7 @@ def pool1d(
     for c in range(input_size[0]):
         for x in range(0, output_size[1]*stride, stride):
             if average:
-                avg = np.average(data[c][x:x+pool])
+                avg = np.average(data[c][x:x+pool*dilation:dilation])
                 if avg < 0:
                     val = np.ceil(avg).astype(np.int64).clip(min=-128, max=127)
                 else:
 
@@ -250,6 +250,10 @@ def main():
     conv_groups = params['conv_groups'][:layers]
     write_gap = params['write_gap'][:layers]
     bypass = params['bypass'][:layers]
+    bias_group_map = params['bias_group_map'][:layers]
+    calcx4 = [True] * layers if args.calcx4 else params['calcx4'][:layers]
+    readahead = [True] * layers if args.rd_ahead else params['readahead'][:layers]
+    pool_dilation = params['pool_dilation'][:layers]
 
     # Command line override
     if args.input_offset is not None:
@@ -304,6 +308,8 @@ def main():
     while ll < layers:
         if input_channels[ll] <= 0:
             eprint(f'Must specify `in_channels` for layer {ll}.')
+        if quantization[ll] is None:
+            quantization[ll] = 8 if not bypass[ll] and operator[ll] != op.NONE else 0  # Defaults
         if operator[ll] != op.NONE and not bypass[ll]:
             if quantization[ll] == -1:
                 w = np.abs(weights[ll])
@@ -366,13 +372,13 @@ def main():
                 eprint(f'{op.string(operator[ll])} in layer {ll} does not support non-square '
                        f'pooling stride (currently set to '
                        f'{pool_stride[ll][0]}x{pool_stride[ll][1]}).')
-            pooled_size = [(input_dim[ll][0] + pool_stride[ll][0] - pool[ll][0])
-                           // pool_stride[ll][0],
-                           (input_dim[ll][1] + pool_stride[ll][1] - pool[ll][1])
-                           // pool_stride[ll][1]]
+            pooled_size = [(input_dim[ll][0] + pool_stride[ll][0] - pool[ll][0]
+                            - pool_dilation[ll][0] + 1) // pool_stride[ll][0],
+                           (input_dim[ll][1] + pool_stride[ll][1] - pool[ll][1]
+                            - pool_dilation[ll][1] + 1) // pool_stride[ll][1]]
         else:
-            pooled_size = [(input_dim[ll][0] + pool_stride[ll][0] - pool[ll][0])
-                           // pool_stride[ll][0],
+            pooled_size = [(input_dim[ll][0] + pool_stride[ll][0] - pool[ll][0]
+                            - pool_dilation[ll][0] + 1) // pool_stride[ll][0],
                            1]
 
         pooled_dim[ll] = pooled_size
@@ -575,24 +581,26 @@ def main():
             measure_energy=args.energy,
             timer=args.timer,
             board_name=args.board_name,
-            rd_ahead=args.rd_ahead,
-            calcx4=args.calcx4,
+            rd_ahead=readahead,
+            calcx4=calcx4,
             rtl_preload=args.rtl_preload,
             result_output=args.result_output,
             weight_start=args.weight_start,
             wfi=args.wfi,
             bypass=bypass,
+            bias_group_map=bias_group_map,
+            pool_dilation=pool_dilation,
+            input_pix_clk=args.input_pix_clk,
         )
         if not args.embedded_code and args.autogen.lower() != 'none':
             rtlsim.append_regression(
                 args.top_level,
                 tn,
                 args.queue_name,
                 args.autogen,
+                args.autogen_list,
             )
     else:
-        wprint('CMSIS-NN code generation is unsupported.')
-
         cmsisnn.create_net(
             args.prefix,
             args.verbose,