@@ -350,6 +350,7 @@ mod tests {
350350    use  arrow:: datatypes:: Schema ; 
351351    use  arrow:: datatypes:: { DataType ,  Field } ; 
352352    use  datafusion_common:: { config:: ConfigOptions ,  TableReference ,  ToDFSchema } ; 
353+     use  datafusion_common:: { DataFusionError ,  Result } ; 
353354    use  datafusion_expr:: { 
354355        builder:: LogicalTableSource ,  cast,  col,  lit,  AggregateUDF ,  Expr ,  ScalarUDF , 
355356        TableSource ,  WindowUDF , 
@@ -994,6 +995,26 @@ mod tests {
994995        create_physical_expr ( expr,  & df_schema,  schema,  & execution_props) . unwrap ( ) 
995996    } 
996997
998+     // Note the values in the `String` column are: 
999+     // ❯ select * from './parquet-testing/data/data_index_bloom_encoding_stats.parquet'; 
1000+     // +-----------+ 
1001+     // | String    | 
1002+     // +-----------+ 
1003+     // | Hello     | 
1004+     // | This is   | 
1005+     // | a         | 
1006+     // | test      | 
1007+     // | How       | 
1008+     // | are you   | 
1009+     // | doing     | 
1010+     // | today     | 
1011+     // | the quick | 
1012+     // | brown fox | 
1013+     // | jumps     | 
1014+     // | over      | 
1015+     // | the lazy  | 
1016+     // | dog       | 
1017+     // +-----------+ 
9971018    #[ tokio:: test]  
9981019    async  fn  test_row_group_bloom_filter_pruning_predicate_simple_expr ( )  { 
9991020        // load parquet file 
@@ -1002,7 +1023,7 @@ mod tests {
10021023        let  path = format ! ( "{testdata}/{file_name}" ) ; 
10031024        let  data = bytes:: Bytes :: from ( std:: fs:: read ( path) . unwrap ( ) ) ; 
10041025
1005-         // generate pruning predicate 
1026+         // generate pruning predicate `(String = "Hello_Not_exists")`  
10061027        let  schema = Schema :: new ( vec ! [ Field :: new( "String" ,  DataType :: Utf8 ,  false ) ] ) ; 
10071028        let  expr = col ( r#""String""# ) . eq ( lit ( "Hello_Not_Exists" ) ) ; 
10081029        let  expr = logical2physical ( & expr,  & schema) ; 
@@ -1029,7 +1050,7 @@ mod tests {
10291050        let  path = format ! ( "{testdata}/{file_name}" ) ; 
10301051        let  data = bytes:: Bytes :: from ( std:: fs:: read ( path) . unwrap ( ) ) ; 
10311052
1032-         // generate pruning predicate 
1053+         // generate pruning predicate `(String = "Hello_Not_exists" OR String = "Hello_Not_exists2")`  
10331054        let  schema = Schema :: new ( vec ! [ Field :: new( "String" ,  DataType :: Utf8 ,  false ) ] ) ; 
10341055        let  expr = lit ( "1" ) . eq ( lit ( "1" ) ) . and ( 
10351056            col ( r#""String""# ) 
@@ -1091,7 +1112,7 @@ mod tests {
10911112        let  path = format ! ( "{testdata}/{file_name}" ) ; 
10921113        let  data = bytes:: Bytes :: from ( std:: fs:: read ( path) . unwrap ( ) ) ; 
10931114
1094-         // generate pruning predicate 
1115+         // generate pruning predicate `(String = "Hello")`  
10951116        let  schema = Schema :: new ( vec ! [ Field :: new( "String" ,  DataType :: Utf8 ,  false ) ] ) ; 
10961117        let  expr = col ( r#""String""# ) . eq ( lit ( "Hello" ) ) ; 
10971118        let  expr = logical2physical ( & expr,  & schema) ; 
@@ -1110,6 +1131,94 @@ mod tests {
11101131        assert_eq ! ( pruned_row_groups,  row_groups) ; 
11111132    } 
11121133
1134+     #[ tokio:: test]  
1135+     async  fn  test_row_group_bloom_filter_pruning_predicate_with_exists_2_values ( )  { 
1136+         // load parquet file 
1137+         let  testdata = datafusion_common:: test_util:: parquet_test_data ( ) ; 
1138+         let  file_name = "data_index_bloom_encoding_stats.parquet" ; 
1139+         let  path = format ! ( "{testdata}/{file_name}" ) ; 
1140+         let  data = bytes:: Bytes :: from ( std:: fs:: read ( path) . unwrap ( ) ) ; 
1141+ 
1142+         // generate pruning predicate `(String = "Hello") OR (String = "the quick")` 
1143+         let  schema = Schema :: new ( vec ! [ Field :: new( "String" ,  DataType :: Utf8 ,  false ) ] ) ; 
1144+         let  expr = col ( r#""String""# ) 
1145+             . eq ( lit ( "Hello" ) ) 
1146+             . or ( col ( r#""String""# ) . eq ( lit ( "the quick" ) ) ) ; 
1147+         let  expr = logical2physical ( & expr,  & schema) ; 
1148+         let  pruning_predicate =
1149+             PruningPredicate :: try_new ( expr,  Arc :: new ( schema) ) . unwrap ( ) ; 
1150+ 
1151+         let  row_groups = vec ! [ 0 ] ; 
1152+         let  pruned_row_groups = test_row_group_bloom_filter_pruning_predicate ( 
1153+             file_name, 
1154+             data, 
1155+             & pruning_predicate, 
1156+             & row_groups, 
1157+         ) 
1158+         . await 
1159+         . unwrap ( ) ; 
1160+         assert_eq ! ( pruned_row_groups,  row_groups) ; 
1161+     } 
1162+ 
1163+     #[ tokio:: test]  
1164+     async  fn  test_row_group_bloom_filter_pruning_predicate_with_exists_3_values ( )  { 
1165+         // load parquet file 
1166+         let  testdata = datafusion_common:: test_util:: parquet_test_data ( ) ; 
1167+         let  file_name = "data_index_bloom_encoding_stats.parquet" ; 
1168+         let  path = format ! ( "{testdata}/{file_name}" ) ; 
1169+         let  data = bytes:: Bytes :: from ( std:: fs:: read ( path) . unwrap ( ) ) ; 
1170+ 
1171+         // generate pruning predicate `(String = "Hello") OR (String = "the quick") OR (String = "are you")` 
1172+         let  schema = Schema :: new ( vec ! [ Field :: new( "String" ,  DataType :: Utf8 ,  false ) ] ) ; 
1173+         let  expr = col ( r#""String""# ) 
1174+             . eq ( lit ( "Hello" ) ) 
1175+             . or ( col ( r#""String""# ) . eq ( lit ( "the quick" ) ) ) 
1176+             . or ( col ( r#""String""# ) . eq ( lit ( "are you" ) ) ) ; 
1177+         let  expr = logical2physical ( & expr,  & schema) ; 
1178+         let  pruning_predicate =
1179+             PruningPredicate :: try_new ( expr,  Arc :: new ( schema) ) . unwrap ( ) ; 
1180+ 
1181+         let  row_groups = vec ! [ 0 ] ; 
1182+         let  pruned_row_groups = test_row_group_bloom_filter_pruning_predicate ( 
1183+             file_name, 
1184+             data, 
1185+             & pruning_predicate, 
1186+             & row_groups, 
1187+         ) 
1188+         . await 
1189+         . unwrap ( ) ; 
1190+         assert_eq ! ( pruned_row_groups,  row_groups) ; 
1191+     } 
1192+ 
1193+     #[ tokio:: test]  
1194+     async  fn  test_row_group_bloom_filter_pruning_predicate_with_or_not_eq ( )  { 
1195+         // load parquet file 
1196+         let  testdata = datafusion_common:: test_util:: parquet_test_data ( ) ; 
1197+         let  file_name = "data_index_bloom_encoding_stats.parquet" ; 
1198+         let  path = format ! ( "{testdata}/{file_name}" ) ; 
1199+         let  data = bytes:: Bytes :: from ( std:: fs:: read ( path) . unwrap ( ) ) ; 
1200+ 
1201+         // generate pruning predicate `(String = "foo") OR (String != "bar")` 
1202+         let  schema = Schema :: new ( vec ! [ Field :: new( "String" ,  DataType :: Utf8 ,  false ) ] ) ; 
1203+         let  expr = col ( r#""String""# ) 
1204+             . not_eq ( lit ( "foo" ) ) 
1205+             . or ( col ( r#""String""# ) . not_eq ( lit ( "bar" ) ) ) ; 
1206+         let  expr = logical2physical ( & expr,  & schema) ; 
1207+         let  pruning_predicate =
1208+             PruningPredicate :: try_new ( expr,  Arc :: new ( schema) ) . unwrap ( ) ; 
1209+ 
1210+         let  row_groups = vec ! [ 0 ] ; 
1211+         let  pruned_row_groups = test_row_group_bloom_filter_pruning_predicate ( 
1212+             file_name, 
1213+             data, 
1214+             & pruning_predicate, 
1215+             & row_groups, 
1216+         ) 
1217+         . await 
1218+         . unwrap ( ) ; 
1219+         assert_eq ! ( pruned_row_groups,  row_groups) ; 
1220+     } 
1221+ 
11131222    #[ tokio:: test]  
11141223    async  fn  test_row_group_bloom_filter_pruning_predicate_without_bloom_filter ( )  { 
11151224        // load parquet file 
@@ -1118,7 +1227,7 @@ mod tests {
11181227        let  path = format ! ( "{testdata}/{file_name}" ) ; 
11191228        let  data = bytes:: Bytes :: from ( std:: fs:: read ( path) . unwrap ( ) ) ; 
11201229
1121-         // generate pruning predicate 
1230+         // generate pruning predicate on a column without a bloom filter  
11221231        let  schema = Schema :: new ( vec ! [ Field :: new( "string_col" ,  DataType :: Utf8 ,  false ) ] ) ; 
11231232        let  expr = col ( r#""string_col""# ) . eq ( lit ( "0" ) ) ; 
11241233        let  expr = logical2physical ( & expr,  & schema) ; 
0 commit comments