1717
1818#[ macro_use]
1919extern crate criterion;
20- use arrow:: util:: test_util:: seedable_rng;
21- use criterion:: Criterion ;
22-
2320extern crate arrow;
2421
22+ use std:: time:: Duration ;
23+
2524use arrow:: compute:: kernels:: cmp:: * ;
2625use arrow:: util:: bench_util:: * ;
26+ use arrow:: util:: test_util:: seedable_rng;
2727use arrow:: { array:: * , datatypes:: Float32Type , datatypes:: Int32Type } ;
2828use arrow_buffer:: IntervalMonthDayNano ;
2929use arrow_string:: like:: * ;
3030use arrow_string:: regexp:: regexp_is_match_utf8_scalar;
31+ use criterion:: { Criterion , SamplingMode } ;
3132use rand:: rngs:: StdRng ;
3233use rand:: Rng ;
3334
@@ -53,6 +54,15 @@ fn bench_nilike_utf8_scalar(arr_a: &StringArray, value_b: &str) {
5354 nilike ( arr_a, & StringArray :: new_scalar ( value_b) ) . unwrap ( ) ;
5455}
5556
57+ fn bench_regexp_is_match_utf8view_scalar ( arr_a : & StringViewArray , value_b : & str ) {
58+ regexp_is_match_utf8_scalar (
59+ criterion:: black_box ( arr_a) ,
60+ criterion:: black_box ( value_b) ,
61+ None ,
62+ )
63+ . unwrap ( ) ;
64+ }
65+
5666fn bench_regexp_is_match_utf8_scalar ( arr_a : & StringArray , value_b : & str ) {
5767 regexp_is_match_utf8_scalar (
5868 criterion:: black_box ( arr_a) ,
@@ -78,6 +88,7 @@ fn add_benchmark(c: &mut Criterion) {
7888 let arr_month_day_nano_b = create_month_day_nano_array_with_seed ( SIZE , 0.0 , 43 ) ;
7989
8090 let arr_string = create_string_array :: < i32 > ( SIZE , 0.0 ) ;
91+ let arr_string_view = create_string_view_array ( SIZE , 0.0 ) ;
8192
8293 let scalar = Float32Array :: from ( vec ! [ 1.0 ] ) ;
8394
@@ -322,35 +333,65 @@ fn add_benchmark(c: &mut Criterion) {
322333 } ) ;
323334
324335 // StringArray: NOT ILIKE benchmarks
336+ let mut group = c. benchmark_group ( "StringArray: NOT ILIKE benchmarks" . to_string ( ) ) ;
337+ group. sampling_mode ( SamplingMode :: Flat ) ;
338+ group. sample_size ( 60 ) ;
339+ group. measurement_time ( Duration :: from_secs ( 6 ) ) ;
325340
326- c . bench_function ( "nilike_utf8 scalar equals" , |b| {
341+ group . bench_function ( "nilike_utf8 scalar equals" , |b| {
327342 b. iter ( || bench_nilike_utf8_scalar ( & arr_string, "xxXX" ) )
328343 } ) ;
329344
330- c . bench_function ( "nilike_utf8 scalar contains" , |b| {
345+ group . bench_function ( "nilike_utf8 scalar contains" , |b| {
331346 b. iter ( || bench_nilike_utf8_scalar ( & arr_string, "%xxXX%" ) )
332347 } ) ;
333348
334- c . bench_function ( "nilike_utf8 scalar ends with" , |b| {
349+ group . bench_function ( "nilike_utf8 scalar ends with" , |b| {
335350 b. iter ( || bench_nilike_utf8_scalar ( & arr_string, "%xXXx" ) )
336351 } ) ;
337352
338- c . bench_function ( "nilike_utf8 scalar starts with" , |b| {
353+ group . bench_function ( "nilike_utf8 scalar starts with" , |b| {
339354 b. iter ( || bench_nilike_utf8_scalar ( & arr_string, "XXXx%" ) )
340355 } ) ;
341356
342- c . bench_function ( "nilike_utf8 scalar complex" , |b| {
357+ group . bench_function ( "nilike_utf8 scalar complex" , |b| {
343358 b. iter ( || bench_nilike_utf8_scalar ( & arr_string, "%xx_xX%xXX" ) )
344359 } ) ;
345360
346- c . bench_function ( "regexp_matches_utf8 scalar starts with" , |b| {
361+ group . bench_function ( "regexp_matches_utf8 scalar starts with" , |b| {
347362 b. iter ( || bench_regexp_is_match_utf8_scalar ( & arr_string, "^xx" ) )
348363 } ) ;
349364
350- c. bench_function ( "regexp_matches_utf8 scalar ends with" , |b| {
365+ group. bench_function ( "regexp_matches_utf8 scalar contains" , |b| {
366+ b. iter ( || bench_regexp_is_match_utf8_scalar ( & arr_string, ".*xx.*" ) )
367+ } ) ;
368+
369+ group. bench_function ( "regexp_matches_utf8 scalar ends with" , |b| {
351370 b. iter ( || bench_regexp_is_match_utf8_scalar ( & arr_string, "xx$" ) )
352371 } ) ;
353372
373+ group. bench_function ( "regexp_matches_utf8 scalar complex" , |b| {
374+ b. iter ( || bench_regexp_is_match_utf8_scalar ( & arr_string, ".*xx.xX.*xXX" ) )
375+ } ) ;
376+
377+ group. bench_function ( "regexp_matches_utf8view scalar starts with" , |b| {
378+ b. iter ( || bench_regexp_is_match_utf8view_scalar ( & arr_string_view, "^xx" ) )
379+ } ) ;
380+
381+ group. bench_function ( "regexp_matches_utf8view scalar contains" , |b| {
382+ b. iter ( || bench_regexp_is_match_utf8view_scalar ( & arr_string_view, ".*xx.*" ) )
383+ } ) ;
384+
385+ group. bench_function ( "regexp_matches_utf8view scalar ends with" , |b| {
386+ b. iter ( || bench_regexp_is_match_utf8view_scalar ( & arr_string_view, "xx$" ) )
387+ } ) ;
388+
389+ group. bench_function ( "regexp_matches_utf8view scalar complex" , |b| {
390+ b. iter ( || bench_regexp_is_match_utf8view_scalar ( & arr_string_view, ".*xx.xX.*xXX" ) )
391+ } ) ;
392+
393+ group. finish ( ) ;
394+
354395 // DictionaryArray benchmarks
355396
356397 let strings = create_string_array :: < i32 > ( 20 , 0. ) ;
0 commit comments