Skip to content

Commit e80deea

Browse files
committed
Add StringView benchmark for regexp_is_match
Signed-off-by: Tai Le Manh <[email protected]>
1 parent 595d64c commit e80deea

File tree

1 file changed

+55
-5
lines changed

1 file changed

+55
-5
lines changed

arrow/benches/comparison_kernels.rs

Lines changed: 55 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,18 @@
1717

1818
#[macro_use]
1919
extern crate criterion;
20-
use arrow::util::test_util::seedable_rng;
21-
use criterion::Criterion;
22-
2320
extern crate arrow;
2421

22+
use std::time::Duration;
23+
2524
use arrow::compute::kernels::cmp::*;
2625
use arrow::util::bench_util::*;
26+
use arrow::util::test_util::seedable_rng;
2727
use arrow::{array::*, datatypes::Float32Type, datatypes::Int32Type};
2828
use arrow_buffer::IntervalMonthDayNano;
2929
use arrow_string::like::*;
3030
use arrow_string::regexp::regexp_is_match_utf8_scalar;
31+
use criterion::{Criterion, SamplingMode};
3132
use rand::rngs::StdRng;
3233
use rand::Rng;
3334

@@ -53,6 +54,15 @@ fn bench_nilike_utf8_scalar(arr_a: &StringArray, value_b: &str) {
5354
nilike(arr_a, &StringArray::new_scalar(value_b)).unwrap();
5455
}
5556

57+
fn bench_regexp_is_match_utf8view_scalar(arr_a: &StringViewArray, value_b: &str) {
58+
regexp_is_match_utf8_scalar(
59+
criterion::black_box(arr_a),
60+
criterion::black_box(value_b),
61+
None,
62+
)
63+
.unwrap();
64+
}
65+
5666
fn bench_regexp_is_match_utf8_scalar(arr_a: &StringArray, value_b: &str) {
5767
regexp_is_match_utf8_scalar(
5868
criterion::black_box(arr_a),
@@ -78,6 +88,7 @@ fn add_benchmark(c: &mut Criterion) {
7888
let arr_month_day_nano_b = create_month_day_nano_array_with_seed(SIZE, 0.0, 43);
7989

8090
let arr_string = create_string_array::<i32>(SIZE, 0.0);
91+
let arr_string_view = create_string_view_array(SIZE, 0.0);
8192

8293
let scalar = Float32Array::from(vec![1.0]);
8394

@@ -343,14 +354,53 @@ fn add_benchmark(c: &mut Criterion) {
343354
b.iter(|| bench_nilike_utf8_scalar(&arr_string, "%xx_xX%xXX"))
344355
});
345356

346-
c.bench_function("regexp_matches_utf8 scalar starts with", |b| {
357+
// StringArray: regexp_matches_utf8 scalar benchmarks
358+
let mut group =
359+
c.benchmark_group("StringArray: regexp_matches_utf8 scalar benchmarks".to_string());
360+
group.sampling_mode(SamplingMode::Flat);
361+
group.sample_size(60);
362+
group.measurement_time(Duration::from_secs(8));
363+
364+
group.bench_function("regexp_matches_utf8 scalar starts with", |b| {
347365
b.iter(|| bench_regexp_is_match_utf8_scalar(&arr_string, "^xx"))
348366
});
349367

350-
c.bench_function("regexp_matches_utf8 scalar ends with", |b| {
368+
group.bench_function("regexp_matches_utf8 scalar contains", |b| {
369+
b.iter(|| bench_regexp_is_match_utf8_scalar(&arr_string, ".*xx.*"))
370+
});
371+
372+
group.bench_function("regexp_matches_utf8 scalar ends with", |b| {
351373
b.iter(|| bench_regexp_is_match_utf8_scalar(&arr_string, "xx$"))
352374
});
353375

376+
group.bench_function("regexp_matches_utf8 scalar complex", |b| {
377+
b.iter(|| bench_regexp_is_match_utf8_scalar(&arr_string, ".*x{2}.xX.*xXX"))
378+
});
379+
380+
group.finish();
381+
382+
// StringViewArray: regexp_matches_utf8view scalar benchmarks
383+
group =
384+
c.benchmark_group("StringViewArray: regexp_matches_utf8view scalar benchmarks".to_string());
385+
386+
group.bench_function("regexp_matches_utf8view scalar starts with", |b| {
387+
b.iter(|| bench_regexp_is_match_utf8view_scalar(&arr_string_view, "^xx"))
388+
});
389+
390+
group.bench_function("regexp_matches_utf8view scalar contains", |b| {
391+
b.iter(|| bench_regexp_is_match_utf8view_scalar(&arr_string_view, ".*xx.*"))
392+
});
393+
394+
group.bench_function("regexp_matches_utf8view scalar ends with", |b| {
395+
b.iter(|| bench_regexp_is_match_utf8view_scalar(&arr_string_view, "xx$"))
396+
});
397+
398+
group.bench_function("regexp_matches_utf8view scalar complex", |b| {
399+
b.iter(|| bench_regexp_is_match_utf8view_scalar(&arr_string_view, ".*x{2}.xX.*xXX"))
400+
});
401+
402+
group.finish();
403+
354404
// DictionaryArray benchmarks
355405

356406
let strings = create_string_array::<i32>(20, 0.);

0 commit comments

Comments
 (0)