1717
1818#[ macro_use]
1919extern crate criterion;
20- use arrow:: util:: test_util:: seedable_rng;
21- use criterion:: Criterion ;
22-
2320extern crate arrow;
2421
22+ use std:: time:: Duration ;
23+
2524use arrow:: compute:: kernels:: cmp:: * ;
2625use arrow:: util:: bench_util:: * ;
26+ use arrow:: util:: test_util:: seedable_rng;
2727use arrow:: { array:: * , datatypes:: Float32Type , datatypes:: Int32Type } ;
2828use arrow_buffer:: IntervalMonthDayNano ;
2929use arrow_string:: like:: * ;
3030use arrow_string:: regexp:: regexp_is_match_utf8_scalar;
31+ use criterion:: { Criterion , SamplingMode } ;
3132use rand:: rngs:: StdRng ;
3233use rand:: Rng ;
3334
@@ -53,6 +54,15 @@ fn bench_nilike_utf8_scalar(arr_a: &StringArray, value_b: &str) {
5354 nilike ( arr_a, & StringArray :: new_scalar ( value_b) ) . unwrap ( ) ;
5455}
5556
57+ fn bench_regexp_is_match_utf8view_scalar ( arr_a : & StringViewArray , value_b : & str ) {
58+ regexp_is_match_utf8_scalar (
59+ criterion:: black_box ( arr_a) ,
60+ criterion:: black_box ( value_b) ,
61+ None ,
62+ )
63+ . unwrap ( ) ;
64+ }
65+
5666fn bench_regexp_is_match_utf8_scalar ( arr_a : & StringArray , value_b : & str ) {
5767 regexp_is_match_utf8_scalar (
5868 criterion:: black_box ( arr_a) ,
@@ -78,6 +88,7 @@ fn add_benchmark(c: &mut Criterion) {
7888 let arr_month_day_nano_b = create_month_day_nano_array_with_seed ( SIZE , 0.0 , 43 ) ;
7989
8090 let arr_string = create_string_array :: < i32 > ( SIZE , 0.0 ) ;
91+ let arr_string_view = create_string_view_array ( SIZE , 0.0 ) ;
8192
8293 let scalar = Float32Array :: from ( vec ! [ 1.0 ] ) ;
8394
@@ -343,14 +354,53 @@ fn add_benchmark(c: &mut Criterion) {
343354 b. iter ( || bench_nilike_utf8_scalar ( & arr_string, "%xx_xX%xXX" ) )
344355 } ) ;
345356
346- c. bench_function ( "regexp_matches_utf8 scalar starts with" , |b| {
357+ // StringArray: regexp_matches_utf8 scalar benchmarks
358+ let mut group =
359+ c. benchmark_group ( "StringArray: regexp_matches_utf8 scalar benchmarks" . to_string ( ) ) ;
360+ group. sampling_mode ( SamplingMode :: Flat ) ;
361+ group. sample_size ( 60 ) ;
362+ group. measurement_time ( Duration :: from_secs ( 8 ) ) ;
363+
364+ group. bench_function ( "regexp_matches_utf8 scalar starts with" , |b| {
347365 b. iter ( || bench_regexp_is_match_utf8_scalar ( & arr_string, "^xx" ) )
348366 } ) ;
349367
350- c. bench_function ( "regexp_matches_utf8 scalar ends with" , |b| {
368+ group. bench_function ( "regexp_matches_utf8 scalar contains" , |b| {
369+ b. iter ( || bench_regexp_is_match_utf8_scalar ( & arr_string, ".*xx.*" ) )
370+ } ) ;
371+
372+ group. bench_function ( "regexp_matches_utf8 scalar ends with" , |b| {
351373 b. iter ( || bench_regexp_is_match_utf8_scalar ( & arr_string, "xx$" ) )
352374 } ) ;
353375
376+ group. bench_function ( "regexp_matches_utf8 scalar complex" , |b| {
377+ b. iter ( || bench_regexp_is_match_utf8_scalar ( & arr_string, ".*x{2}.xX.*xXX" ) )
378+ } ) ;
379+
380+ group. finish ( ) ;
381+
382+ // StringViewArray: regexp_matches_utf8view scalar benchmarks
383+ group =
384+ c. benchmark_group ( "StringViewArray: regexp_matches_utf8view scalar benchmarks" . to_string ( ) ) ;
385+
386+ group. bench_function ( "regexp_matches_utf8view scalar starts with" , |b| {
387+ b. iter ( || bench_regexp_is_match_utf8view_scalar ( & arr_string_view, "^xx" ) )
388+ } ) ;
389+
390+ group. bench_function ( "regexp_matches_utf8view scalar contains" , |b| {
391+ b. iter ( || bench_regexp_is_match_utf8view_scalar ( & arr_string_view, ".*xx.*" ) )
392+ } ) ;
393+
394+ group. bench_function ( "regexp_matches_utf8view scalar ends with" , |b| {
395+ b. iter ( || bench_regexp_is_match_utf8view_scalar ( & arr_string_view, "xx$" ) )
396+ } ) ;
397+
398+ group. bench_function ( "regexp_matches_utf8view scalar complex" , |b| {
399+ b. iter ( || bench_regexp_is_match_utf8view_scalar ( & arr_string_view, ".*x{2}.xX.*xXX" ) )
400+ } ) ;
401+
402+ group. finish ( ) ;
403+
354404 // DictionaryArray benchmarks
355405
356406 let strings = create_string_array :: < i32 > ( 20 , 0. ) ;
0 commit comments