@@ -25,8 +25,8 @@ use std::sync::Arc;
2525use  super :: dml:: CopyTo ; 
2626use  super :: DdlStatement ; 
2727use  crate :: dml:: CopyOptions ; 
28- use  crate :: expr:: { Alias ,  Exists ,  InSubquery ,  Placeholder } ; 
29- use  crate :: expr_rewriter:: create_col_from_scalar_expr; 
28+ use  crate :: expr:: { Alias ,  Exists ,  InSubquery ,  Placeholder ,   Sort   as   SortExpr } ; 
29+ use  crate :: expr_rewriter:: { create_col_from_scalar_expr,  normalize_cols } ; 
3030use  crate :: logical_plan:: display:: { GraphvizVisitor ,  IndentVisitor } ; 
3131use  crate :: logical_plan:: extension:: UserDefinedLogicalNode ; 
3232use  crate :: logical_plan:: { DmlStatement ,  Statement } ; 
@@ -163,7 +163,8 @@ impl LogicalPlan {
163163            } )  => projected_schema, 
164164            LogicalPlan :: Projection ( Projection  {  schema,  .. } )  => schema, 
165165            LogicalPlan :: Filter ( Filter  {  input,  .. } )  => input. schema ( ) , 
166-             LogicalPlan :: Distinct ( Distinct  {  input } )  => input. schema ( ) , 
166+             LogicalPlan :: Distinct ( Distinct :: All ( input) )  => input. schema ( ) , 
167+             LogicalPlan :: Distinct ( Distinct :: On ( DistinctOn  {  schema,  .. } ) )  => schema, 
167168            LogicalPlan :: Window ( Window  {  schema,  .. } )  => schema, 
168169            LogicalPlan :: Aggregate ( Aggregate  {  schema,  .. } )  => schema, 
169170            LogicalPlan :: Sort ( Sort  {  input,  .. } )  => input. schema ( ) , 
@@ -367,6 +368,16 @@ impl LogicalPlan {
367368            LogicalPlan :: Unnest ( Unnest  {  column,  .. } )  => { 
368369                f ( & Expr :: Column ( column. clone ( ) ) ) 
369370            } 
371+             LogicalPlan :: Distinct ( Distinct :: On ( DistinctOn  { 
372+                 on_expr, 
373+                 select_expr, 
374+                 sort_expr, 
375+                 ..
376+             } ) )  => on_expr
377+                 . iter ( ) 
378+                 . chain ( select_expr. iter ( ) ) 
379+                 . chain ( sort_expr. clone ( ) . unwrap_or ( vec ! [ ] ) . iter ( ) ) 
380+                 . try_for_each ( f) , 
370381            // plans without expressions 
371382            LogicalPlan :: EmptyRelation ( _) 
372383            | LogicalPlan :: Subquery ( _) 
@@ -377,7 +388,7 @@ impl LogicalPlan {
377388            | LogicalPlan :: Analyze ( _) 
378389            | LogicalPlan :: Explain ( _) 
379390            | LogicalPlan :: Union ( _) 
380-             | LogicalPlan :: Distinct ( _ ) 
391+             | LogicalPlan :: Distinct ( Distinct :: All ( _ ) ) 
381392            | LogicalPlan :: Dml ( _) 
382393            | LogicalPlan :: Ddl ( _) 
383394            | LogicalPlan :: Copy ( _) 
@@ -405,7 +416,9 @@ impl LogicalPlan {
405416            LogicalPlan :: Union ( Union  {  inputs,  .. } )  => { 
406417                inputs. iter ( ) . map ( |arc| arc. as_ref ( ) ) . collect ( ) 
407418            } 
408-             LogicalPlan :: Distinct ( Distinct  {  input } )  => vec ! [ input] , 
419+             LogicalPlan :: Distinct ( 
420+                 Distinct :: All ( input)  | Distinct :: On ( DistinctOn  {  input,  .. } ) , 
421+             )  => vec ! [ input] , 
409422            LogicalPlan :: Explain ( explain)  => vec ! [ & explain. plan] , 
410423            LogicalPlan :: Analyze ( analyze)  => vec ! [ & analyze. input] , 
411424            LogicalPlan :: Dml ( write)  => vec ! [ & write. input] , 
@@ -461,8 +474,11 @@ impl LogicalPlan {
461474                    Ok ( Some ( agg. group_expr . as_slice ( ) [ 0 ] . clone ( ) ) ) 
462475                } 
463476            } 
477+             LogicalPlan :: Distinct ( Distinct :: On ( DistinctOn  {  select_expr,  .. } ) )  => { 
478+                 Ok ( Some ( select_expr[ 0 ] . clone ( ) ) ) 
479+             } 
464480            LogicalPlan :: Filter ( Filter  {  input,  .. } ) 
465-             | LogicalPlan :: Distinct ( Distinct   {   input,  ..  } ) 
481+             | LogicalPlan :: Distinct ( Distinct :: All ( input) ) 
466482            | LogicalPlan :: Sort ( Sort  {  input,  .. } ) 
467483            | LogicalPlan :: Limit ( Limit  {  input,  .. } ) 
468484            | LogicalPlan :: Repartition ( Repartition  {  input,  .. } ) 
@@ -823,10 +839,29 @@ impl LogicalPlan {
823839                inputs :  inputs. iter ( ) . cloned ( ) . map ( Arc :: new) . collect ( ) , 
824840                schema :  schema. clone ( ) , 
825841            } ) ) , 
826-             LogicalPlan :: Distinct ( Distinct  {  .. } )  => { 
827-                 Ok ( LogicalPlan :: Distinct ( Distinct  { 
828-                     input :  Arc :: new ( inputs[ 0 ] . clone ( ) ) , 
829-                 } ) ) 
842+             LogicalPlan :: Distinct ( distinct)  => { 
843+                 let  distinct = match  distinct { 
844+                     Distinct :: All ( _)  => Distinct :: All ( Arc :: new ( inputs[ 0 ] . clone ( ) ) ) , 
845+                     Distinct :: On ( DistinctOn  { 
846+                         on_expr, 
847+                         select_expr, 
848+                         ..
849+                     } )  => { 
850+                         let  sort_expr = expr. split_off ( on_expr. len ( )  + select_expr. len ( ) ) ; 
851+                         let  select_expr = expr. split_off ( on_expr. len ( ) ) ; 
852+                         Distinct :: On ( DistinctOn :: try_new ( 
853+                             expr, 
854+                             select_expr, 
855+                             if  !sort_expr. is_empty ( )  { 
856+                                 Some ( sort_expr) 
857+                             }  else  { 
858+                                 None 
859+                             } , 
860+                             Arc :: new ( inputs[ 0 ] . clone ( ) ) , 
861+                         ) ?) 
862+                     } 
863+                 } ; 
864+                 Ok ( LogicalPlan :: Distinct ( distinct) ) 
830865            } 
831866            LogicalPlan :: Analyze ( a)  => { 
832867                assert ! ( expr. is_empty( ) ) ; 
@@ -1064,7 +1099,9 @@ impl LogicalPlan {
10641099            LogicalPlan :: Subquery ( _)  => None , 
10651100            LogicalPlan :: SubqueryAlias ( SubqueryAlias  {  input,  .. } )  => input. max_rows ( ) , 
10661101            LogicalPlan :: Limit ( Limit  {  fetch,  .. } )  => * fetch, 
1067-             LogicalPlan :: Distinct ( Distinct  {  input } )  => input. max_rows ( ) , 
1102+             LogicalPlan :: Distinct ( 
1103+                 Distinct :: All ( input)  | Distinct :: On ( DistinctOn  {  input,  .. } ) , 
1104+             )  => input. max_rows ( ) , 
10681105            LogicalPlan :: Values ( v)  => Some ( v. values . len ( ) ) , 
10691106            LogicalPlan :: Unnest ( _)  => None , 
10701107            LogicalPlan :: Ddl ( _) 
@@ -1667,9 +1704,21 @@ impl LogicalPlan {
16671704                    LogicalPlan :: Statement ( statement)  => { 
16681705                        write ! ( f,  "{}" ,  statement. display( ) ) 
16691706                    } 
1670-                     LogicalPlan :: Distinct ( Distinct  {  .. } )  => { 
1671-                         write ! ( f,  "Distinct:" ) 
1672-                     } 
1707+                     LogicalPlan :: Distinct ( distinct)  => match  distinct { 
1708+                         Distinct :: All ( _)  => write ! ( f,  "Distinct:" ) , 
1709+                         Distinct :: On ( DistinctOn  { 
1710+                             on_expr, 
1711+                             select_expr, 
1712+                             sort_expr, 
1713+                             ..
1714+                         } )  => write ! ( 
1715+                             f, 
1716+                             "DistinctOn: on_expr=[[{}]], select_expr=[[{}]], sort_expr=[[{}]]" , 
1717+                             expr_vec_fmt!( on_expr) , 
1718+                             expr_vec_fmt!( select_expr) , 
1719+                             if  let  Some ( sort_expr)  = sort_expr {  expr_vec_fmt!( sort_expr)  }  else {  "" . to_string( )  } , 
1720+                         ) , 
1721+                     } , 
16731722                    LogicalPlan :: Explain  {  .. }  => write ! ( f,  "Explain" ) , 
16741723                    LogicalPlan :: Analyze  {  .. }  => write ! ( f,  "Analyze" ) , 
16751724                    LogicalPlan :: Union ( _)  => write ! ( f,  "Union" ) , 
@@ -2132,9 +2181,93 @@ pub struct Limit {
21322181
21332182/// Removes duplicate rows from the input 
21342183#[ derive( Clone ,  PartialEq ,  Eq ,  Hash ) ]  
2135- pub  struct  Distinct  { 
2184+ pub  enum  Distinct  { 
2185+     /// Plain `DISTINCT` referencing all selection expressions 
2186+      All ( Arc < LogicalPlan > ) , 
2187+     /// The `Postgres` addition, allowing separate control over DISTINCT'd and selected columns 
2188+      On ( DistinctOn ) , 
2189+ } 
2190+ 
2191+ /// Removes duplicate rows from the input 
2192+ #[ derive( Clone ,  PartialEq ,  Eq ,  Hash ) ]  
2193+ pub  struct  DistinctOn  { 
2194+     /// The `DISTINCT ON` clause expression list 
2195+      pub  on_expr :  Vec < Expr > , 
2196+     /// The selected projection expression list 
2197+      pub  select_expr :  Vec < Expr > , 
2198+     /// The `ORDER BY` clause, whose initial expressions must match those of the `ON` clause when 
2199+      /// present. Note that those matching expressions actually wrap the `ON` expressions with 
2200+      /// additional info pertaining to the sorting procedure (i.e. ASC/DESC, and NULLS FIRST/LAST). 
2201+      pub  sort_expr :  Option < Vec < Expr > > , 
21362202    /// The logical plan that is being DISTINCT'd 
21372203     pub  input :  Arc < LogicalPlan > , 
2204+     /// The schema description of the DISTINCT ON output 
2205+      pub  schema :  DFSchemaRef , 
2206+ } 
2207+ 
2208+ impl  DistinctOn  { 
2209+     /// Create a new `DistinctOn` struct. 
2210+      pub  fn  try_new ( 
2211+         on_expr :  Vec < Expr > , 
2212+         select_expr :  Vec < Expr > , 
2213+         sort_expr :  Option < Vec < Expr > > , 
2214+         input :  Arc < LogicalPlan > , 
2215+     )  -> Result < Self >  { 
2216+         if  on_expr. is_empty ( )  { 
2217+             return  plan_err ! ( "No `ON` expressions provided" ) ; 
2218+         } 
2219+ 
2220+         let  on_expr = normalize_cols ( on_expr,  input. as_ref ( ) ) ?; 
2221+ 
2222+         let  schema = DFSchema :: new_with_metadata ( 
2223+             exprlist_to_fields ( & select_expr,  & input) ?, 
2224+             input. schema ( ) . metadata ( ) . clone ( ) , 
2225+         ) ?; 
2226+ 
2227+         let  mut  distinct_on = DistinctOn  { 
2228+             on_expr, 
2229+             select_expr, 
2230+             sort_expr :  None , 
2231+             input, 
2232+             schema :  Arc :: new ( schema) , 
2233+         } ; 
2234+ 
2235+         if  let  Some ( sort_expr)  = sort_expr { 
2236+             distinct_on = distinct_on. with_sort_expr ( sort_expr) ?; 
2237+         } 
2238+ 
2239+         Ok ( distinct_on) 
2240+     } 
2241+ 
2242+     /// Try to update `self` with a new sort expressions. 
2243+      /// 
2244+      /// Validates that the sort expressions are a super-set of the `ON` expressions. 
2245+      pub  fn  with_sort_expr ( mut  self ,  sort_expr :  Vec < Expr > )  -> Result < Self >  { 
2246+         let  sort_expr = normalize_cols ( sort_expr,  self . input . as_ref ( ) ) ?; 
2247+ 
2248+         // Check that the left-most sort expressions are the same as the `ON` expressions. 
2249+         let  mut  matched = true ; 
2250+         for  ( on,  sort)  in  self . on_expr . iter ( ) . zip ( sort_expr. iter ( ) )  { 
2251+             match  sort { 
2252+                 Expr :: Sort ( SortExpr  {  expr,  .. } )  => { 
2253+                     if  on != & * * expr { 
2254+                         matched = false ; 
2255+                         break ; 
2256+                     } 
2257+                 } 
2258+                 _ => return  plan_err ! ( "Not a sort expression: {sort}" ) , 
2259+             } 
2260+         } 
2261+ 
2262+         if  self . on_expr . len ( )  > sort_expr. len ( )  || !matched { 
2263+             return  plan_err ! ( 
2264+                 "SELECT DISTINCT ON expressions must match initial ORDER BY expressions" 
2265+             ) ; 
2266+         } 
2267+ 
2268+         self . sort_expr  = Some ( sort_expr) ; 
2269+         Ok ( self ) 
2270+     } 
21382271} 
21392272
21402273/// Aggregates its input based on a set of grouping and aggregate 
0 commit comments