Skip to content

Commit 514847f

Browse files
committed
Add StringView benchmark for regexp_is_match
Signed-off-by: Tai Le Manh <[email protected]>
1 parent 595d64c commit 514847f

File tree

1 file changed

+51
-10
lines changed

1 file changed

+51
-10
lines changed

arrow/benches/comparison_kernels.rs

Lines changed: 51 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,18 @@
1717

1818
#[macro_use]
1919
extern crate criterion;
20-
use arrow::util::test_util::seedable_rng;
21-
use criterion::Criterion;
22-
2320
extern crate arrow;
2421

22+
use std::time::Duration;
23+
2524
use arrow::compute::kernels::cmp::*;
2625
use arrow::util::bench_util::*;
26+
use arrow::util::test_util::seedable_rng;
2727
use arrow::{array::*, datatypes::Float32Type, datatypes::Int32Type};
2828
use arrow_buffer::IntervalMonthDayNano;
2929
use arrow_string::like::*;
3030
use arrow_string::regexp::regexp_is_match_utf8_scalar;
31+
use criterion::{Criterion, SamplingMode};
3132
use rand::rngs::StdRng;
3233
use rand::Rng;
3334

@@ -53,6 +54,15 @@ fn bench_nilike_utf8_scalar(arr_a: &StringArray, value_b: &str) {
5354
nilike(arr_a, &StringArray::new_scalar(value_b)).unwrap();
5455
}
5556

57+
fn bench_regexp_is_match_utf8view_scalar(arr_a: &StringViewArray, value_b: &str) {
58+
regexp_is_match_utf8_scalar(
59+
criterion::black_box(arr_a),
60+
criterion::black_box(value_b),
61+
None,
62+
)
63+
.unwrap();
64+
}
65+
5666
fn bench_regexp_is_match_utf8_scalar(arr_a: &StringArray, value_b: &str) {
5767
regexp_is_match_utf8_scalar(
5868
criterion::black_box(arr_a),
@@ -78,6 +88,7 @@ fn add_benchmark(c: &mut Criterion) {
7888
let arr_month_day_nano_b = create_month_day_nano_array_with_seed(SIZE, 0.0, 43);
7989

8090
let arr_string = create_string_array::<i32>(SIZE, 0.0);
91+
let arr_string_view = create_string_view_array(SIZE, 0.0);
8192

8293
let scalar = Float32Array::from(vec![1.0]);
8394

@@ -322,35 +333,65 @@ fn add_benchmark(c: &mut Criterion) {
322333
});
323334

324335
// StringArray: NOT ILIKE benchmarks
336+
let mut group = c.benchmark_group("StringArray: NOT ILIKE benchmarks".to_string());
337+
group.sampling_mode(SamplingMode::Flat);
338+
group.sample_size(60);
339+
group.measurement_time(Duration::from_secs(6));
325340

326-
c.bench_function("nilike_utf8 scalar equals", |b| {
341+
group.bench_function("nilike_utf8 scalar equals", |b| {
327342
b.iter(|| bench_nilike_utf8_scalar(&arr_string, "xxXX"))
328343
});
329344

330-
c.bench_function("nilike_utf8 scalar contains", |b| {
345+
group.bench_function("nilike_utf8 scalar contains", |b| {
331346
b.iter(|| bench_nilike_utf8_scalar(&arr_string, "%xxXX%"))
332347
});
333348

334-
c.bench_function("nilike_utf8 scalar ends with", |b| {
349+
group.bench_function("nilike_utf8 scalar ends with", |b| {
335350
b.iter(|| bench_nilike_utf8_scalar(&arr_string, "%xXXx"))
336351
});
337352

338-
c.bench_function("nilike_utf8 scalar starts with", |b| {
353+
group.bench_function("nilike_utf8 scalar starts with", |b| {
339354
b.iter(|| bench_nilike_utf8_scalar(&arr_string, "XXXx%"))
340355
});
341356

342-
c.bench_function("nilike_utf8 scalar complex", |b| {
357+
group.bench_function("nilike_utf8 scalar complex", |b| {
343358
b.iter(|| bench_nilike_utf8_scalar(&arr_string, "%xx_xX%xXX"))
344359
});
345360

346-
c.bench_function("regexp_matches_utf8 scalar starts with", |b| {
361+
group.bench_function("regexp_matches_utf8 scalar starts with", |b| {
347362
b.iter(|| bench_regexp_is_match_utf8_scalar(&arr_string, "^xx"))
348363
});
349364

350-
c.bench_function("regexp_matches_utf8 scalar ends with", |b| {
365+
group.bench_function("regexp_matches_utf8 scalar contains", |b| {
366+
b.iter(|| bench_regexp_is_match_utf8_scalar(&arr_string, ".*xx.*"))
367+
});
368+
369+
group.bench_function("regexp_matches_utf8 scalar ends with", |b| {
351370
b.iter(|| bench_regexp_is_match_utf8_scalar(&arr_string, "xx$"))
352371
});
353372

373+
group.bench_function("regexp_matches_utf8 scalar complex", |b| {
374+
b.iter(|| bench_regexp_is_match_utf8_scalar(&arr_string, ".*xx.xX.*xXX"))
375+
});
376+
377+
group.bench_function("regexp_matches_utf8view scalar starts with", |b| {
378+
b.iter(|| bench_regexp_is_match_utf8view_scalar(&arr_string_view, "^xx"))
379+
});
380+
381+
group.bench_function("regexp_matches_utf8view scalar contains", |b| {
382+
b.iter(|| bench_regexp_is_match_utf8view_scalar(&arr_string_view, ".*xx.*"))
383+
});
384+
385+
group.bench_function("regexp_matches_utf8view scalar ends with", |b| {
386+
b.iter(|| bench_regexp_is_match_utf8view_scalar(&arr_string_view, "xx$"))
387+
});
388+
389+
group.bench_function("regexp_matches_utf8view scalar complex", |b| {
390+
b.iter(|| bench_regexp_is_match_utf8view_scalar(&arr_string_view, ".*xx.xX.*xXX"))
391+
});
392+
393+
group.finish();
394+
354395
// DictionaryArray benchmarks
355396

356397
let strings = create_string_array::<i32>(20, 0.);

0 commit comments

Comments
 (0)