Prevent malformed kernel headers on Windows when not using --mexpress (#244)

Robert Muchsel · web-flow · commit 7cea14ae5fca · 2022-06-15T17:40:25.000-05:00
* Prevent malformed kernel headers on Windows when not using --mexpress
* Add `--apb` as alias for `--no-pll`
diff --git a/izer/commandline.py b/izer/commandline.py
@@ -54,14 +54,14 @@ def get_parser() -> argparse.Namespace:
     mgroup = group.add_mutually_exclusive_group()
     mgroup.add_argument('--pll', action='store_true', default=None,
                         help="enable PLL (default: automatic)")
-    mgroup.add_argument('--no-pll', action='store_false', dest='pll',
+    mgroup.add_argument('--no-pll', '--apb', action='store_false', dest='pll',
                         help="disable PLL (default: automatic)")
     mgroup = group.add_mutually_exclusive_group()
     mgroup.add_argument('--balance-speed', action='store_true', default=True,
-                        help="balance data and weight loading speed and power (default: True)")
+                        help="balance data and weight loading speed and power (default: true)")
     mgroup.add_argument('--max-speed', action='store_false', dest='balance_speed',
                         help="load data and weights as fast as possible (MAX78002 only, "
-                             "requires --pll, default: False)")
+                             "requires --pll, default: false)")
     group.add_argument('--config-file', required=True, metavar='S',
                        help="YAML configuration file containing layer configuration")
     group.add_argument('--checkpoint-file', metavar='S',
@@ -188,7 +188,7 @@ def get_parser() -> argparse.Namespace:
                             "default: false)")
     group.add_argument('--no-fifo-wait', dest='fifo_wait', action='store_false', default=True,
                        help="do not check the FIFO for available space (requires matching source "
-                            "speed to inference, default: False)")
+                            "speed to inference, default: false)")
     group.add_argument('--fifo-go', action='store_true', default=False,
                        help="start processing before first FIFO push (default: false)")
     group.add_argument('--slow-load', type=int, metavar='N', default=0,
@@ -282,13 +282,13 @@ def get_parser() -> argparse.Namespace:
     group.add_argument('--debug-snoop', action='store_true', default=False,
                        help="insert snoop register debug code (default: False)")
     group.add_argument('--snoop-loop', action='store_true', default=False,
-                       help="insert snoop loop (default: False)")
+                       help="insert snoop loop (default: false)")
     group.add_argument('--ignore-hw-limits', action='store_true', default=False,
-                       help="ignore certain hardware limits (default: False)")
+                       help="ignore certain hardware limits (default: false)")
     group.add_argument('--ignore-bn', action='store_true', default=False,
-                       help="ignore BatchNorm weights in checkpoint file (default: False)")
+                       help="ignore BatchNorm weights in checkpoint file (default: false)")
     group.add_argument('--ignore-activation', action='store_true', default=False,
-                       help="ignore activations in YAML file (default: False)")
+                       help="ignore activations in YAML file (default: false)")
     group.add_argument('--no-greedy-kernel', action='store_false', dest='greedy_kernel_allocator',
                        default=True,
                        help="do not use greedy kernel memory allocator (default: use)")
diff --git a/izer/kernels.py b/izer/kernels.py
@@ -100,7 +100,7 @@ def load(  # pylint: disable=too-many-branches,too-many-statements
     kernel_map = np.full((tc.dev.MAX_PROC, tc.dev.MASK_WIDTH_LARGE),
                          fill_value=_INVALID_VALUE, dtype=np.int64)
     kernels_used = np.zeros((tc.dev.MAX_PROC, tc.dev.MASK_WIDTH_LARGE), dtype=np.int64)
-    kernel_data = np.zeros((tc.dev.MAX_PROC, tc.dev.MASK_WIDTH_LARGE, 9), dtype=np.int8)
+    kernel_data = np.zeros((tc.dev.MAX_PROC, tc.dev.MASK_WIDTH_LARGE, 9), dtype=np.uint8)
     # There are four 32-bit words per 9-byte kernel.
     # The value map is initialized with zeros so we can later ignore unused entries and use
     # memcpy() on initialized and uninitialized data.
@@ -438,6 +438,8 @@ def add_kernel_data(ll, p, col_target, b):
                     kernel_map[p][col] = ll
 
                 assert kernels_used[p][col] <= 8
+                assert isinstance(b, np.int64), f'Kernel is type {type(b)} instead of numpy.int64'
+                assert 0 <= b <= 255, f'Trying to add kernel value {b}'
                 kernel_data[p][col][8 - kernels_used[p][col]] = b & 0xff
                 kernels_used[p][col] += 1
 
@@ -457,7 +459,7 @@ def add_kernel_data(ll, p, col_target, b):
                 col_target, col_bytes = divmod(start_col * ksize * in_exp, 9)
                 # Pad out the leftovers
                 for _ in range(col_bytes // qfactor):  # FIXME for quantization
-                    col_target = add_kernel_data(ll, p, col_target, 0)
+                    col_target = add_kernel_data(ll, p, col_target, np.int64(0))
 
                 out_range = out_expand[ll] if conv_groups[ll] == 1 else 1
                 for expand in range(out_range):
@@ -506,8 +508,10 @@ def add_kernel_data(ll, p, col_target, b):
                                                 & (2**abs(quantization[ll])-1)
                                             if not flatten[ll]:
                                                 k |= this_kern << (i * abs(quantization[ll]))
-                                            else:
+                                            elif len(k) > 0:
                                                 k = np.append(k, this_kern)
+                                            else:
+                                                k = this_kern
                                         n += 1
                                     mask >>= 1
                                 if debug:
@@ -525,8 +529,8 @@ def add_kernel_data(ll, p, col_target, b):
                                             ),
                                         )
                                     for i in range(0, len(k) // qfactor):
-                                        e = 0
-                                        for j in range(qfactor):
+                                        e = k[i * qfactor]
+                                        for j in range(1, qfactor):
                                             e |= k[i * qfactor + j] << (j * abs(quantization[ll]))
                                         col_target = add_kernel_data(ll, p, col_target, e)
                                 else:
@@ -536,7 +540,7 @@ def add_kernel_data(ll, p, col_target, b):
 
                             else:  # When expanding, need to pad with zero kernels if needed
                                 for _ in range(ksize // qfactor):
-                                    col_target = add_kernel_data(ll, p, col_target, 0)
+                                    col_target = add_kernel_data(ll, p, col_target, np.int64(0))
 
                         # Consume kernels
                         if not flatten[ll]:
@@ -552,7 +556,7 @@ def add_kernel_data(ll, p, col_target, b):
                    and kernels_used[p][kern_offs[ll] + col_target] > 0:  # Partials
                     col_target += 1
                 while col_target - start_col < kern_len[ll]:
-                    col_target = add_kernel_data(ll, p, col_target, 0)
+                    col_target = add_kernel_data(ll, p, col_target, np.int64(0))
                 if flatten[ll]:
                     kern_len[ll] = col_target
                 elif not state.new_kernel_loader:
diff --git a/izer/toplevel.py b/izer/toplevel.py
@@ -888,7 +888,7 @@ def c_define(
     prefix, formatting = fmt.split('%')
     memfile.write(f'#define {define_name} {{ \\\n  ')
     for i, e in enumerate(array):
-        memfile.write(f'{prefix}{e:{formatting}}')
+        memfile.write(f'{prefix}{e & 0xffffffff:{formatting}}')
         if i + 1 < len(array):
             memfile.write(', ')
             if (i + 1) % columns == 0: