Add softmax function to allow the last layer to use 4/2/1 bit weights (#104)

Robert Muchsel · web-flow · commit 236a67d74b6a · 2021-02-01T13:04:24.000-06:00
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # MAX78000 Model Training and Synthesis
 
-_January 28, 2021_
+_February 1, 2021_
 
 The Maxim Integrated AI project is comprised of four repositories:
 
@@ -1442,6 +1442,8 @@ The 32-bit intermediate result is multiplied by $2^{totalshift}$, where the tota
 
 Using `output_shift` can help normalize data, particularly when using small weights. By default, `output_shift` is generated by the training software, and it is used for batch normalization as well as quantization-aware training.
 
+*Note:* When using 32-bit wide outputs in the final layer, no hardware shift is performed and `output_shift` is ignored.
+
 Example:
 	`output_shift: 2`
 
@@ -1814,7 +1816,7 @@ To run another inference, ensure all groups are disabled (stopping the state mac
 
 `ai8xize.py` can generate a call to a software Softmax function using the command line switch `--softmax`. That function is provided in the `assets/device-all` folder. To use the provided software Softmax on MAX78000/MAX78002, the last layer output should be 32-bit wide (`output_width: 32`).
 
-The software Softmax function is optimized for processing time and it quantizes the input.
+The software Softmax function is optimized for processing time and it quantizes the input. When the last layer uses weights that are not 8-bits, the software function used will shift the input values first.
 
 ![softmax](docs/softmax.png)
 
diff --git a/README.pdf b/README.pdf
diff --git a/assets/device-all/softmax.c b/assets/device-all/softmax.c
@@ -131,6 +131,38 @@ void softmax_q17p14_q15(const q31_t * vec_in, const uint16_t dim_vec, q15_t * p_
 
 }
 
+  /**
+   * @brief Q17.14 fixed point softmax function with input shift, returns Q15
+   * @param[in]       vec_in      pointer to input vector
+   * @param[in]       dim_vec     input vector dimension
+   * @param[in]       in_shift    input vector shift count
+   * @param[out]      p_out       pointer to output vector
+   * @return none.
+   *
+   * @details
+   *
+   *  Here, instead of typical e based softmax, we use
+   *  2-based softmax, i.e.,:
+   *
+   *  y_i = 2^(x_i/16384) / sum(2^(x_j/16384))
+   *
+   *  The relative output will be different here.
+   *  But mathematically, the gradient will be the same
+   *  with a log(2) scaling factor.
+   */
+
+void softmax_shift_q17p14_q15(q31_t * vec_in, const uint16_t dim_vec, uint8_t in_shift, q15_t * p_out)
+{
+    int16_t   i;
+
+    for (i = 0; i < dim_vec; i++)
+    {
+        vec_in[i] <<= in_shift;
+    }
+
+    softmax_q17p14_q15(vec_in, dim_vec, p_out);
+}
+
 /**
  * @} end of Softmax group
  */
diff --git a/assets/device-all/templatecnn.h b/assets/device-all/templatecnn.h
@@ -29,8 +29,10 @@ typedef int16_t q15_t;
 #define SYS_START LED_On(0)
 #define SYS_COMPLETE LED_Off(0)
 
-/* Unload data from accelerator and run software SoftMax */
+/* Run software SoftMax on unloaded data */
 void softmax_q17p14_q15(const q31_t * vec_in, const uint16_t dim_vec, q15_t * p_out);
+/* Shift the input, then calculate SoftMax */
+void softmax_shift_q17p14_q15(q31_t * vec_in, const uint16_t dim_vec, uint8_t in_shift, q15_t * p_out);
 
 /* Stopwatch - holds the runtime when accelerator finishes */
 extern volatile uint32_t cnn_time;
diff --git a/izer/max7800x.py b/izer/max7800x.py
@@ -2265,6 +2265,7 @@ def run_eltwise(
             if softmax:
                 apb.softmax_layer(
                     output_width=output_width[final_layer],
+                    shift=8 - abs(quantization[final_layer]) if not bypass[final_layer] else 0,
                 )
 
             summary_stats = '/*\n' + \
diff --git a/izer/toplevel.py b/izer/toplevel.py
@@ -772,6 +772,7 @@ def main(
 def softmax_layer(
         memfile,
         output_width=8,
+        shift=0,
 ):
     """
     Write the call to the softmax layer to `memfile`.
@@ -786,8 +787,12 @@ def softmax_layer(
     memfile.write(f'  cnn_unload((uint32_t *) ml_data{"32" if output_width != 32 else ""});\n')
 
     if output_width == 32:
-        memfile.write('  softmax_q17p14_q15((const q31_t *) ml_data, '
-                      'CNN_NUM_OUTPUTS, ml_softmax);\n')
+        if shift == 0:
+            memfile.write('  softmax_q17p14_q15((const q31_t *) ml_data, '
+                          'CNN_NUM_OUTPUTS, ml_softmax);\n')
+        else:
+            memfile.write('  softmax_shift_q17p14_q15((q31_t *) ml_data, '
+                          f'CNN_NUM_OUTPUTS, {shift}, ml_softmax);\n')
     else:
         memfile.write('  arm_softmax_q7_q15((const q7_t *) ml_data32, '
                       'CNN_NUM_OUTPUTS, ml_softmax);\n')

Original file line number	Diff line number	Diff line change
`@@ -2265,6 +2265,7 @@ def run_eltwise(`
`2265`	`2265`	`if softmax:`
`2266`	`2266`	`apb.softmax_layer(`
`2267`	`2267`	`output_width=output_width[final_layer],`
	`2268`	`+ shift=8 - abs(quantization[final_layer]) if not bypass[final_layer] else 0,`
`2268`	`2269`	`)`
`2269`	`2270`
`2270`	`2271`	`summary_stats = '/*\n' + \`