Skip to content

Commit 57f96f2

Browse files
authored
Added number to string benches for json_writer (#7864)
# Which issue does this PR close? - Closes: None # Rationale for this change It is suggested to merge benches before merging a speed optimization (see #7819) # What changes are included in this PR? adding the following benches to convert the following type arrays to a string - i64 - i32 - f64 - f32 - i64, i32, f64, f32 # Are these changes tested? I am not sure we are testing benches # Are there any user-facing changes? No
1 parent 985ec7e commit 57f96f2

File tree

1 file changed

+139
-1
lines changed

1 file changed

+139
-1
lines changed

arrow/benches/json_writer.rs

Lines changed: 139 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,9 @@ use arrow::util::bench_util::{
2525
use arrow::util::test_util::seedable_rng;
2626
use arrow_array::{Array, ListArray, RecordBatch, StructArray};
2727
use arrow_buffer::{BooleanBuffer, NullBuffer, OffsetBuffer};
28-
use arrow_json::LineDelimitedWriter;
28+
use arrow_json::{LineDelimitedWriter, ReaderBuilder};
2929
use rand::Rng;
30+
use serde::Serialize;
3031
use std::sync::Arc;
3132

3233
const NUM_ROWS: usize = 65536;
@@ -181,6 +182,138 @@ fn bench_struct_list(c: &mut Criterion) {
181182
do_bench(c, "bench_struct_list", &batch)
182183
}
183184

185+
fn do_number_to_string_bench<S: Serialize>(
186+
name: &str,
187+
c: &mut Criterion,
188+
schema: Arc<Schema>,
189+
rows: Vec<S>,
190+
) {
191+
c.bench_function(name, |b| {
192+
b.iter(|| {
193+
let mut decoder = ReaderBuilder::new(schema.clone())
194+
.with_coerce_primitive(true) // important for coercion
195+
.build_decoder()
196+
.expect("Failed to build decoder");
197+
198+
decoder.serialize(&rows).expect("Failed to serialize rows");
199+
200+
decoder
201+
.flush()
202+
.expect("Failed to flush")
203+
.expect("No RecordBatch produced");
204+
})
205+
});
206+
}
207+
208+
fn bench_i64_to_string(c: &mut Criterion) {
209+
#[derive(Serialize)]
210+
struct TestRow {
211+
val: i64,
212+
}
213+
214+
let schema = Arc::new(Schema::new(vec![Field::new("val", DataType::Utf8, false)]));
215+
216+
let a_bunch_of_numbers = create_primitive_array::<Int64Type>(NUM_ROWS, 0.0);
217+
218+
let rows: Vec<TestRow> = (0..NUM_ROWS)
219+
.map(|i| TestRow {
220+
val: a_bunch_of_numbers.value(i),
221+
})
222+
.collect();
223+
224+
do_number_to_string_bench("i64_to_string", c, schema, rows)
225+
}
226+
227+
fn bench_i32_to_string(c: &mut Criterion) {
228+
#[derive(Serialize)]
229+
struct TestRow {
230+
val: i32,
231+
}
232+
233+
let schema = Arc::new(Schema::new(vec![Field::new("val", DataType::Utf8, false)]));
234+
235+
let a_bunch_of_numbers = create_primitive_array::<Int32Type>(NUM_ROWS, 0.0);
236+
237+
let rows: Vec<TestRow> = (0..NUM_ROWS)
238+
.map(|i| TestRow {
239+
val: a_bunch_of_numbers.value(i),
240+
})
241+
.collect();
242+
243+
do_number_to_string_bench("i32_to_string", c, schema, rows)
244+
}
245+
246+
fn bench_f32_to_string(c: &mut Criterion) {
247+
#[derive(Serialize)]
248+
struct TestRow {
249+
val: f32,
250+
}
251+
252+
let schema = Arc::new(Schema::new(vec![Field::new("val", DataType::Utf8, false)]));
253+
254+
let a_bunch_of_numbers = create_primitive_array::<Float32Type>(NUM_ROWS, 0.0);
255+
256+
let rows: Vec<TestRow> = (0..NUM_ROWS)
257+
.map(|i| TestRow {
258+
val: a_bunch_of_numbers.value(i),
259+
})
260+
.collect();
261+
262+
do_number_to_string_bench("f32_to_string", c, schema, rows)
263+
}
264+
265+
fn bench_f64_to_string(c: &mut Criterion) {
266+
#[derive(Serialize)]
267+
struct TestRow {
268+
val: f64,
269+
}
270+
271+
let schema = Arc::new(Schema::new(vec![Field::new("val", DataType::Utf8, false)]));
272+
273+
let a_bunch_of_numbers = create_primitive_array::<Float64Type>(NUM_ROWS, 0.0);
274+
275+
let rows: Vec<TestRow> = (0..NUM_ROWS)
276+
.map(|i| TestRow {
277+
val: a_bunch_of_numbers.value(i),
278+
})
279+
.collect();
280+
281+
do_number_to_string_bench("f64_to_string", c, schema, rows)
282+
}
283+
284+
fn bench_mixed_numbers_to_string(c: &mut Criterion) {
285+
#[derive(Serialize)]
286+
struct TestRow {
287+
val1: f64,
288+
val2: f32,
289+
val3: i64,
290+
val4: i32,
291+
}
292+
293+
let schema = Arc::new(Schema::new(vec![
294+
Field::new("val1", DataType::Utf8, false),
295+
Field::new("val2", DataType::Utf8, false),
296+
Field::new("val3", DataType::Utf8, false),
297+
Field::new("val4", DataType::Utf8, false),
298+
]));
299+
300+
let f64_array = create_primitive_array::<Float64Type>(NUM_ROWS, 0.0);
301+
let f32_array = create_primitive_array::<Float32Type>(NUM_ROWS, 0.0);
302+
let i64_array = create_primitive_array::<Int64Type>(NUM_ROWS, 0.0);
303+
let i32_array = create_primitive_array::<Int32Type>(NUM_ROWS, 0.0);
304+
305+
let rows: Vec<TestRow> = (0..NUM_ROWS)
306+
.map(|i| TestRow {
307+
val1: f64_array.value(i),
308+
val2: f32_array.value(i),
309+
val3: i64_array.value(i),
310+
val4: i32_array.value(i),
311+
})
312+
.collect();
313+
314+
do_number_to_string_bench("mixed_numbers_to_string", c, schema, rows)
315+
}
316+
184317
fn criterion_benchmark(c: &mut Criterion) {
185318
bench_integer(c);
186319
bench_float(c);
@@ -192,6 +325,11 @@ fn criterion_benchmark(c: &mut Criterion) {
192325
bench_list(c);
193326
bench_nullable_list(c);
194327
bench_struct_list(c);
328+
bench_f64_to_string(c);
329+
bench_f32_to_string(c);
330+
bench_i64_to_string(c);
331+
bench_i32_to_string(c);
332+
bench_mixed_numbers_to_string(c);
195333
}
196334

197335
criterion_group!(benches, criterion_benchmark);

0 commit comments

Comments
 (0)