@@ -62,8 +62,8 @@ use datafusion_common::tree_node::{
6262} ;
6363use datafusion_common:: { plan_err, DataFusionError } ;
6464use datafusion_physical_expr:: { PhysicalSortExpr , PhysicalSortRequirement } ;
65-
6665use datafusion_physical_plan:: repartition:: RepartitionExec ;
66+
6767use itertools:: izip;
6868
6969/// This rule inspects [`SortExec`]'s in the given physical plan and removes the
@@ -842,14 +842,16 @@ mod tests {
842842 use crate :: physical_plan:: repartition:: RepartitionExec ;
843843 use crate :: physical_plan:: { displayable, get_plan_string, Partitioning } ;
844844 use crate :: prelude:: { SessionConfig , SessionContext } ;
845- use crate :: test:: { csv_exec_sorted, stream_exec_ordered} ;
845+ use crate :: test:: { csv_exec_ordered , csv_exec_sorted, stream_exec_ordered} ;
846846
847847 use arrow:: compute:: SortOptions ;
848848 use arrow:: datatypes:: { DataType , Field , Schema , SchemaRef } ;
849849 use datafusion_common:: Result ;
850850 use datafusion_expr:: JoinType ;
851851 use datafusion_physical_expr:: expressions:: { col, Column , NotExpr } ;
852852
853+ use rstest:: rstest;
854+
853855 fn create_test_schema ( ) -> Result < SchemaRef > {
854856 let nullable_column = Field :: new ( "nullable_col" , DataType :: Int32 , true ) ;
855857 let non_nullable_column = Field :: new ( "non_nullable_col" , DataType :: Int32 , false ) ;
@@ -2213,12 +2215,19 @@ mod tests {
22132215 Ok ( ( ) )
22142216 }
22152217
2218+ #[ rstest]
22162219 #[ tokio:: test]
2217- async fn test_with_lost_ordering_unbounded ( ) -> Result < ( ) > {
2220+ async fn test_with_lost_ordering_unbounded_bounded (
2221+ #[ values( false , true ) ] source_unbounded : bool ,
2222+ ) -> Result < ( ) > {
22182223 let schema = create_test_schema3 ( ) ?;
22192224 let sort_exprs = vec ! [ sort_expr( "a" , & schema) ] ;
2220- // create an unbounded source
2221- let source = stream_exec_ordered ( & schema, sort_exprs) ;
2225+ // create either bounded or unbounded source
2226+ let source = if source_unbounded {
2227+ stream_exec_ordered ( & schema, sort_exprs)
2228+ } else {
2229+ csv_exec_ordered ( & schema, sort_exprs)
2230+ } ;
22222231 let repartition_rr = repartition_exec ( source) ;
22232232 let repartition_hash = Arc :: new ( RepartitionExec :: try_new (
22242233 repartition_rr,
@@ -2227,50 +2236,71 @@ mod tests {
22272236 let coalesce_partitions = coalesce_partitions_exec ( repartition_hash) ;
22282237 let physical_plan = sort_exec ( vec ! [ sort_expr( "a" , & schema) ] , coalesce_partitions) ;
22292238
2230- let expected_input = [
2239+ // Expected inputs unbounded and bounded
2240+ let expected_input_unbounded = vec ! [
22312241 "SortExec: expr=[a@0 ASC]" ,
22322242 " CoalescePartitionsExec" ,
22332243 " RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10" ,
22342244 " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1" ,
22352245 " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC]" ,
22362246 ] ;
2237- let expected_optimized = [
2247+ let expected_input_bounded = vec ! [
2248+ "SortExec: expr=[a@0 ASC]" ,
2249+ " CoalescePartitionsExec" ,
2250+ " RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10" ,
2251+ " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1" ,
2252+ " CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], has_header=true" ,
2253+ ] ;
2254+
2255+ // Expected unbounded result (same for with and without flag)
2256+ let expected_optimized_unbounded = vec ! [
22382257 "SortPreservingMergeExec: [a@0 ASC]" ,
22392258 " RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC" ,
22402259 " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1" ,
22412260 " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC]" ,
22422261 ] ;
2243- assert_optimized ! ( expected_input, expected_optimized, physical_plan, true ) ;
2244- Ok ( ( ) )
2245- }
22462262
2247- #[ tokio:: test]
2248- async fn test_with_lost_ordering_unbounded_parallelize_off ( ) -> Result < ( ) > {
2249- let schema = create_test_schema3 ( ) ?;
2250- let sort_exprs = vec ! [ sort_expr( "a" , & schema) ] ;
2251- // create an unbounded source
2252- let source = stream_exec_ordered ( & schema, sort_exprs) ;
2253- let repartition_rr = repartition_exec ( source) ;
2254- let repartition_hash = Arc :: new ( RepartitionExec :: try_new (
2255- repartition_rr,
2256- Partitioning :: Hash ( vec ! [ col( "c" , & schema) . unwrap( ) ] , 10 ) ,
2257- ) ?) as _ ;
2258- let coalesce_partitions = coalesce_partitions_exec ( repartition_hash) ;
2259- let physical_plan = sort_exec ( vec ! [ sort_expr( "a" , & schema) ] , coalesce_partitions) ;
2260-
2261- let expected_input = [ "SortExec: expr=[a@0 ASC]" ,
2263+ // Expected bounded results with and without flag
2264+ let expected_optimized_bounded = vec ! [
2265+ "SortExec: expr=[a@0 ASC]" ,
22622266 " CoalescePartitionsExec" ,
22632267 " RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10" ,
22642268 " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1" ,
2265- " StreamingTableExec: partition_sizes=1 , projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC]" ,
2269+ " CsvExec: file_groups={1 group: [[file_path]]} , projection=[a, b, c, d, e], output_ordering=[a@0 ASC], has_header=true " ,
22662270 ] ;
2267- let expected_optimized = [
2271+ let expected_optimized_bounded_parallelize_sort = vec ! [
22682272 "SortPreservingMergeExec: [a@0 ASC]" ,
2269- " RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC" ,
2270- " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1" ,
2271- " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC]" ,
2273+ " SortExec: expr=[a@0 ASC]" ,
2274+ " RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10" ,
2275+ " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1" ,
2276+ " CsvExec: file_groups={1 group: [[file_path]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], has_header=true" ,
22722277 ] ;
2273- assert_optimized ! ( expected_input, expected_optimized, physical_plan, false ) ;
2278+ let ( expected_input, expected_optimized, expected_optimized_sort_parallelize) =
2279+ if source_unbounded {
2280+ (
2281+ expected_input_unbounded,
2282+ expected_optimized_unbounded. clone ( ) ,
2283+ expected_optimized_unbounded,
2284+ )
2285+ } else {
2286+ (
2287+ expected_input_bounded,
2288+ expected_optimized_bounded,
2289+ expected_optimized_bounded_parallelize_sort,
2290+ )
2291+ } ;
2292+ assert_optimized ! (
2293+ expected_input,
2294+ expected_optimized,
2295+ physical_plan. clone( ) ,
2296+ false
2297+ ) ;
2298+ assert_optimized ! (
2299+ expected_input,
2300+ expected_optimized_sort_parallelize,
2301+ physical_plan,
2302+ true
2303+ ) ;
22742304 Ok ( ( ) )
22752305 }
22762306
0 commit comments