Skip to content

Commit 258cf48

Browse files
cad-audioNileshkumar Vaishnavrascani
authored
Adding streaming_conv reference code. (#2521)
* Adding streaming_conv reference code. * Streaming conv support under xtensa * Enabled HiFi5 support for streaming conv kernel. * Add bazel build rules for streaming_conv * Fix formatting * Fix dims_shape array type * Fix VP6 build * Fix asan error * Disable streaming conv test on HIFIMINI & VP6 --------- Co-authored-by: Nileshkumar Vaishnav <nileshv@cadence.com> Co-authored-by: RJ Ascani <rjascani@google.com>
1 parent ba203cf commit 258cf48

15 files changed

+1303
-12
lines changed

tensorflow/lite/micro/kernels/BUILD

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,8 @@ tflm_kernel_cc_library(
282282
"split_v.cc",
283283
"squared_difference.cc",
284284
"squeeze.cc",
285+
"streaming_conv.cc",
286+
"streaming_conv_common.cc",
285287
"strided_slice.cc",
286288
"strided_slice_common.cc",
287289
"sub.cc",
@@ -321,6 +323,7 @@ tflm_kernel_cc_library(
321323
"reduce.h",
322324
"reshape.h",
323325
"softmax.h",
326+
"streaming_conv.h",
324327
"strided_slice.h",
325328
"sub.h",
326329
"svdf.h",

tensorflow/lite/micro/kernels/conv.cc

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -147,9 +147,4 @@ TFLMRegistration Register_CONV_2D() {
147147
return tflite::micro::RegisterOp(ConvInit, ConvPrepare, ConvEval);
148148
}
149149

150-
TFLMRegistration Register_STREAMING_CONV_2D() {
151-
// TODO(rjascani): These should be replaced with Streaming wrapper functions.
152-
return tflite::micro::RegisterOp(ConvInit, ConvPrepare, ConvEval);
153-
}
154-
155150
} // namespace tflite

tensorflow/lite/micro/kernels/conv_test.cc

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,18 @@ static TfLiteConvParams common_conv_params = {
5656
kTfLiteNoType // quantized_bias_type
5757
};
5858

59+
#if not defined(HIFIMINI) && not defined(VISION_P6)
60+
static TfLiteConvParams common_streaming_conv_params = {
61+
kTfLitePaddingValid, // padding
62+
1, // stride_width
63+
1, // stride_height
64+
kTfLiteActNone, // activation
65+
1, // dilation_width_factor
66+
1, // dilation_height_factor
67+
kTfLiteNoType // quantized_bias_type
68+
};
69+
#endif
70+
5971
} // namespace
6072
} // namespace testing
6173
} // namespace tflite
@@ -216,6 +228,55 @@ TF_LITE_MICRO_TEST(HybridModeIsError) {
216228
tflite::Register_CONV_2D(), output_data));
217229
}
218230

231+
#if not defined(HIFIMINI) && not defined(VISION_P6)
232+
TF_LITE_MICRO_TEST(SimpleTestQuantized16x8PerChannel64bBiasStreaming) {
233+
constexpr int kInputElementsStreamConv = 20;
234+
static int kInputShapeStreamConv[] = {4, 2, 5, 1, 2};
235+
static const float kInputDataStreamConv[kInputElementsStreamConv] = {
236+
1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5};
237+
238+
constexpr int kFilterElementsStreamConv = 8;
239+
static int kFilterShapeStreamConv[] = {4, 1, 2, 2, 2};
240+
static const float kFilterDataStreamConv[kFilterElementsStreamConv] = {
241+
1, 1, 1, 1, 1, 1, 1, 1};
242+
243+
constexpr int kBiasElementsStreamConv = 1;
244+
static int kBiasShapeStreamConv[] = {1, 1};
245+
static const float kBiasDataStreamConv[kBiasElementsStreamConv] = {0};
246+
247+
constexpr int kOutputElementsStreamConv = 8;
248+
static int kOutputShapeStreamConv[] = {4, 2, 4, 1, 1};
249+
static const float kGoldenDataStreamConv[kOutputElementsStreamConv] = {
250+
6, 10, 14, 18, 12, 20, 28, 36};
251+
252+
int16_t output_data[kOutputElementsStreamConv];
253+
254+
const float input_scale = 0.5f;
255+
const float output_scale = 1.0f;
256+
const int input_zero_point = 0;
257+
const int output_zero_point = 0;
258+
259+
int16_t input_quantized[kInputElementsStreamConv];
260+
int8_t filter_quantized[kFilterElementsStreamConv];
261+
std::int64_t bias_quantized[kBiasElementsStreamConv];
262+
int16_t golden_quantized[kOutputElementsStreamConv];
263+
int zero_points[kBiasElementsStreamConv + 1];
264+
float scales[kBiasElementsStreamConv + 1];
265+
266+
TF_LITE_MICRO_EXPECT_EQ(
267+
kTfLiteOk,
268+
tflite::testing::TestConvQuantizedPerChannel(
269+
kInputShapeStreamConv, kInputDataStreamConv, input_quantized,
270+
input_scale, input_zero_point, kFilterShapeStreamConv,
271+
kFilterDataStreamConv, filter_quantized, kBiasShapeStreamConv,
272+
kBiasDataStreamConv, bias_quantized, scales, zero_points,
273+
kOutputShapeStreamConv, kGoldenDataStreamConv, golden_quantized,
274+
output_scale, output_zero_point,
275+
&tflite::testing::common_streaming_conv_params,
276+
tflite::Register_STREAMING_CONV_2D(), output_data));
277+
}
278+
#endif
279+
219280
TF_LITE_MICRO_TEST(SimpleTestQuantized16x8PerChannel64bBias) {
220281
const int output_dims_count = 12;
221282
int16_t output_data[output_dims_count];
Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License.
14+
==============================================================================*/
15+
16+
#include "tensorflow/lite/micro/kernels/streaming_conv.h"
17+
18+
#include "tensorflow/lite/c/builtin_op_data.h"
19+
#include "tensorflow/lite/c/common.h"
20+
#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h"
21+
#include "tensorflow/lite/kernels/internal/reference/conv.h"
22+
#include "tensorflow/lite/kernels/internal/reference/integer_ops/conv.h"
23+
#include "tensorflow/lite/kernels/kernel_util.h"
24+
#include "tensorflow/lite/micro/kernels/kernel_util.h"
25+
#include "tensorflow/lite/micro/micro_log.h"
26+
27+
namespace tflite {
28+
namespace {
29+
30+
void updateStreamingConvBuffer(int16_t* pbuf, const int16_t* pinp, int ih,
31+
int ic, int kw) {
32+
/* This will update the persistent mem. Input data is coming in as ihx1xic,
33+
* buffer is ihxkwxic */
34+
int i, j;
35+
36+
/* step 1 : Move the older columns in persistent buffer */
37+
int striplength = (kw - 1) * ic;
38+
int16_t* pbuf_dst = pbuf;
39+
int16_t* pbuf_src = pbuf + ic;
40+
41+
for (i = 0; i < ih; i++) {
42+
memmove(pbuf_dst, pbuf_src, striplength * sizeof(int16_t));
43+
pbuf_dst += kw * ic;
44+
pbuf_src += kw * ic;
45+
}
46+
47+
/* step 2: Copy new input column to buffer */
48+
int16_t* pbuf_base = pbuf + (ic * (kw - 1));
49+
const int16_t* pinp_base = pinp;
50+
51+
for (i = 0; i < ih; i++) {
52+
for (j = 0; j < ic; j++) {
53+
pbuf_base[j] = pinp_base[j];
54+
}
55+
pbuf_base += (kw * ic);
56+
pinp_base += ic;
57+
}
58+
}
59+
60+
template <typename AccumScalar>
61+
inline void StreamingConvPerChannel(
62+
const ConvParams& params, const int32_t* output_multiplier,
63+
const int32_t* output_shift, const RuntimeShape& input_shape,
64+
const int16_t* input_data, const RuntimeShape& filter_shape,
65+
const int8_t* filter_data, const RuntimeShape& bias_shape,
66+
const AccumScalar* bias_data, const RuntimeShape& output_shape,
67+
int16_t* output_data, int16_t* input_state) {
68+
// Get parameters.
69+
const int stride_width = params.stride_width;
70+
const int stride_height = params.stride_height;
71+
const int dilation_width_factor = params.dilation_width_factor;
72+
const int dilation_height_factor = params.dilation_height_factor;
73+
const int pad_width = params.padding_values.width;
74+
const int pad_height = params.padding_values.height;
75+
76+
// Set min and max value of the output.
77+
const int32_t output_activation_min = params.quantized_activation_min;
78+
const int32_t output_activation_max = params.quantized_activation_max;
79+
80+
// Consistency check.
81+
TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
82+
TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
83+
TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
84+
TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
85+
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
86+
const int input_depth = input_shape.Dims(3);
87+
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
88+
if (bias_data) {
89+
TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
90+
}
91+
92+
// Check dimensions of the tensors.
93+
const int input_height = input_shape.Dims(1);
94+
int input_width = input_shape.Dims(2);
95+
const int filter_height = filter_shape.Dims(1);
96+
const int filter_width = filter_shape.Dims(2);
97+
const int filter_input_depth = filter_shape.Dims(3);
98+
99+
/* Update streaming conv buffer with input data */
100+
input_width = filter_width;
101+
const int32_t dims_shape[4] = {1, input_height, filter_width, input_depth};
102+
RuntimeShape input_state_shape(4, dims_shape);
103+
104+
const int groups = input_depth / filter_input_depth;
105+
TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
106+
const int filters_per_group = output_depth / groups;
107+
const int output_height = output_shape.Dims(1);
108+
const int output_width = output_shape.Dims(2);
109+
for (int batch = 0; batch < batches; ++batch) {
110+
updateStreamingConvBuffer(input_state,
111+
&input_data[Offset(input_shape, batch, 0, 0, 0)],
112+
input_height, input_depth, filter_width);
113+
for (int out_y = 0; out_y < output_height; ++out_y) {
114+
const int in_y_origin = (out_y * stride_height) - pad_height;
115+
for (int out_x = 0; out_x < output_width; ++out_x) {
116+
const int in_x_origin = (out_x * stride_width) - pad_width;
117+
for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
118+
auto group = out_channel / filters_per_group;
119+
AccumScalar acc = 0;
120+
for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
121+
const int in_y = in_y_origin + dilation_height_factor * filter_y;
122+
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
123+
const int in_x = in_x_origin + dilation_width_factor * filter_x;
124+
125+
// Zero padding by omitting the areas outside the image.
126+
const bool is_point_inside_image =
127+
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) &&
128+
(in_y < input_height);
129+
130+
if (!is_point_inside_image) {
131+
continue;
132+
}
133+
134+
for (int in_channel = 0; in_channel < filter_input_depth;
135+
++in_channel) {
136+
int32_t input_val = input_state[Offset(
137+
input_state_shape, 0, in_y, in_x,
138+
in_channel + group * filter_input_depth)];
139+
int32_t filter_val = filter_data[Offset(
140+
filter_shape, out_channel, filter_y, filter_x, in_channel)];
141+
// Accumulate with 64 bits accumulator.
142+
// int64_t += int8_t * int16_t so the highest value we can
143+
// get from each accumulation is [-127, 127] * ([-32768,
144+
// 32767] -
145+
// [-32768, 32767]), which is [-8322945, 8322945].
146+
// log2(8322945) = 22.99.
147+
acc += filter_val * input_val;
148+
}
149+
}
150+
}
151+
if (bias_data) {
152+
acc += bias_data[out_channel];
153+
}
154+
int32_t scaled_acc = MultiplyByQuantizedMultiplier(
155+
acc, output_multiplier[out_channel], output_shift[out_channel]);
156+
scaled_acc = std::max(scaled_acc, output_activation_min);
157+
scaled_acc = std::min(scaled_acc, output_activation_max);
158+
output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
159+
static_cast<int16_t>(scaled_acc);
160+
}
161+
}
162+
}
163+
}
164+
}
165+
166+
TfLiteStatus StreamingConvEval(TfLiteContext* context, TfLiteNode* node) {
167+
const TfLiteEvalTensor* input =
168+
tflite::micro::GetEvalInput(context, node, kConvInputTensor);
169+
const TfLiteEvalTensor* filter =
170+
tflite::micro::GetEvalInput(context, node, kConvWeightsTensor);
171+
const TfLiteEvalTensor* bias =
172+
(NumInputs(node) == 3)
173+
? tflite::micro::GetEvalInput(context, node, kConvBiasTensor)
174+
: nullptr;
175+
TfLiteEvalTensor* output =
176+
tflite::micro::GetEvalOutput(context, node, kConvOutputTensor);
177+
178+
TFLITE_DCHECK(node->builtin_data != nullptr);
179+
const auto& params =
180+
*(reinterpret_cast<TfLiteConvParams*>(node->builtin_data));
181+
TFLITE_DCHECK(node->user_data != nullptr);
182+
const auto& sdata =
183+
*(static_cast<const OpDataStreamingConv*>(node->user_data));
184+
const auto& data = sdata.op_data;
185+
186+
switch (input->type) { // Already know in/out types are same.
187+
case kTfLiteInt16: {
188+
if (bias == nullptr || bias->type == kTfLiteInt32) {
189+
StreamingConvPerChannel(
190+
StreamingConvParamsQuantized(params, data),
191+
data.per_channel_output_multiplier, data.per_channel_output_shift,
192+
tflite::micro::GetTensorShape(input),
193+
tflite::micro::GetTensorData<int16_t>(input),
194+
tflite::micro::GetTensorShape(filter),
195+
tflite::micro::GetTensorData<int8_t>(filter),
196+
tflite::micro::GetTensorShape(bias),
197+
tflite::micro::GetOptionalTensorData<std::int32_t>(bias),
198+
tflite::micro::GetTensorShape(output),
199+
tflite::micro::GetTensorData<int16_t>(output),
200+
(int16_t*)sdata.input_state);
201+
} else if (bias->type == kTfLiteInt64) {
202+
StreamingConvPerChannel(
203+
StreamingConvParamsQuantized(params, data),
204+
data.per_channel_output_multiplier, data.per_channel_output_shift,
205+
tflite::micro::GetTensorShape(input),
206+
tflite::micro::GetTensorData<int16_t>(input),
207+
tflite::micro::GetTensorShape(filter),
208+
tflite::micro::GetTensorData<int8_t>(filter),
209+
tflite::micro::GetTensorShape(bias),
210+
tflite::micro::GetOptionalTensorData<std::int64_t>(bias),
211+
tflite::micro::GetTensorShape(output),
212+
tflite::micro::GetTensorData<int16_t>(output),
213+
(int16_t*)sdata.input_state);
214+
} else {
215+
MicroPrintf("Bias type %s (%d) not supported.",
216+
TfLiteTypeGetName(bias->type), bias->type);
217+
return kTfLiteError;
218+
}
219+
break;
220+
}
221+
default:
222+
MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
223+
input->type);
224+
return kTfLiteError;
225+
}
226+
return kTfLiteOk;
227+
}
228+
229+
} // namespace
230+
231+
TFLMRegistration Register_STREAMING_CONV_2D() {
232+
return tflite::micro::RegisterOp(StreamingConvInit, StreamingConvPrepare,
233+
StreamingConvEval);
234+
}
235+
236+
} // namespace tflite

0 commit comments

Comments
 (0)