Skip to content

Stack overflow calling EqvalenceProperties::project with certain order #12700

@alamb

Description

@alamb

Describe the bug

Given equivalence sort orders like this:

[
  [a,b,c,d],
  [a,c,b,d], // note b,d are swapped
]

When EqvalenceProperties::project is called to project a, b, c, d the stack overflows

To Reproduce

Reproducer:

    #[test]
    fn project_equivalence_properties_test_multi() ->   Result<()> {
    // test multiple input orderings with equivalence properties
        let input_schema = Arc::new(Schema::new(vec![
            Field::new("a", DataType::Int64, true),
            Field::new("b", DataType::Int64, true),
            Field::new("c", DataType::Int64, true),
            Field::new("d", DataType::Int64, true),
        ]));

        let mut input_properties = EquivalenceProperties::new(Arc::clone(&input_schema));
        // add equivalent ordering [a, b, c, d]
        input_properties.add_new_ordering(vec![
            parse_sort_expr("a", &input_schema),
            parse_sort_expr("b", &input_schema),
            parse_sort_expr("c", &input_schema),
            parse_sort_expr("d", &input_schema),
        ]);

        // add equivalent ordering [a, c, b, d]
        input_properties.add_new_ordering(vec![
            parse_sort_expr("a", &input_schema),
            parse_sort_expr("c", &input_schema),
            parse_sort_expr("b", &input_schema), // NB b and c are swapped
            parse_sort_expr("d", &input_schema),
        ]);

        let proj_exprs = vec![
            (col("a", &input_schema)?, "a".to_string()),
            (col("b", &input_schema)?, "b".to_string()),
            (col("c", &input_schema)?, "c".to_string()),
            (col("d", &input_schema)?, "d".to_string()),
        ];
        let projection_mapping = ProjectionMapping::try_new(&proj_exprs, &input_schema)?;
        let out_properties = input_properties.project(&projection_mapping, input_schema);
        
        assert_eq!(out_properties.to_string(), "order: [[a@0 ASC,b@1 ASC,c@2 ASC]]");
        Ok(())
    }
Full Diff

andrewlamb@Andrews-MacBook-Pro-2:~/Software/datafusion2$ git diff
diff --git a/datafusion/physical-expr/src/equivalence/properties.rs b/datafusion/physical-expr/src/equivalence/properties.rs
index dc59a1eb8..dc3929188 100644
--- a/datafusion/physical-expr/src/equivalence/properties.rs
+++ b/datafusion/physical-expr/src/equivalence/properties.rs
@@ -1755,6 +1761,46 @@ mod tests {
         Ok(())
     }

+    #[test]
+    fn project_equivalence_properties_test_multi() ->   Result<()> {
+    // test multiple input orderings with equivalence properties
+        let input_schema = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Int64, true),
+            Field::new("b", DataType::Int64, true),
+            Field::new("c", DataType::Int64, true),
+            Field::new("d", DataType::Int64, true),
+        ]));
+
+        let mut input_properties = EquivalenceProperties::new(Arc::clone(&input_schema));
+        // add equivalent ordering [a, b, c, d]
+        input_properties.add_new_ordering(vec![
+            parse_sort_expr("a", &input_schema),
+            parse_sort_expr("b", &input_schema),
+            parse_sort_expr("c", &input_schema),
+            parse_sort_expr("d", &input_schema),
+        ]);
+
+        // add equivalent ordering [a, c, b, d]
+        input_properties.add_new_ordering(vec![
+            parse_sort_expr("a", &input_schema),
+            parse_sort_expr("c", &input_schema),
+            parse_sort_expr("b", &input_schema), // NB b and c are swapped
+            parse_sort_expr("d", &input_schema),
+        ]);
+
+        let proj_exprs = vec![
+            (col("a", &input_schema)?, "a".to_string()),
+            (col("b", &input_schema)?, "b".to_string()),
+            (col("c", &input_schema)?, "c".to_string()),
+            (col("d", &input_schema)?, "d".to_string()),
+        ];
+        let projection_mapping = ProjectionMapping::try_new(&proj_exprs, &input_schema)?;
+        let out_properties = input_properties.project(&projection_mapping, input_schema);
+
+        assert_eq!(out_properties.to_string(), "order: [[a@0 ASC,b@1 ASC,c@2 ASC]]");
+        Ok(())
+    }
+
     #[test]
     fn test_join_equivalence_properties() -> Result<()> {
         let schema = create_test_schema()?;
@@ -3108,4 +3154,36 @@ mod tests {
             assert!(lhs_orderings.contains(rhs_ordering), "{}", err_msg);
         }
     }
+
+    /// Converts a string to a physical sort expression
+    ///
+    /// # Example
+    /// * `"a"` -> (`"a"`, `SortOptions::default()`)
+    /// * `"a ASC"` -> (`"a"`, `SortOptions { descending: false, nulls_first: false }`)
+    fn parse_sort_expr(name: &str, schema: &SchemaRef) -> PhysicalSortExpr {
+        let mut parts = name.split_whitespace();
+        let name = parts.next().expect("empty sort expression");
+        let mut sort_expr = PhysicalSortExpr::new(
+            col(name, schema).expect("invalid column name"),
+            SortOptions::default(),
+        );
+
+        if let Some(options) = parts.next() {
+            sort_expr = match options {
+                "ASC" => sort_expr.asc(),
+                "DESC" => sort_expr.desc(),
+                _ => panic!(
+                    "unknown sort options. Expected 'ASC' or 'DESC', got {}",
+                    options
+                ),
+            }
+        }
+
+        assert!(
+            parts.next().is_none(),
+            "unexpected tokens in column name. Expected 'name' / 'name ASC' / 'name DESC' but got  '{name}'"
+        );
+
+        sort_expr
+    }

Then run

cargo test --package datafusion-physical-expr --lib equivalence::properties::tests::project_equivalence_properties_test_multi -- --exact
   Compiling datafusion-physical-expr v42.0.0 (/Users/andrewlamb/Software/datafusion2/datafusion/physical-expr)
    Finished `test` profile [unoptimized + debuginfo] target(s) in 1.65s
     Running unittests src/lib.rs (target/debug/deps/datafusion_physical_expr-a6fc7a49851ed9b5)

running 1 test

thread 'equivalence::properties::tests::project_equivalence_properties_test_multi' has overflowed its stack
fatal runtime error: stack overflow
error: test failed, to rerun pass `-p datafusion-physical-expr --lib`

Caused by:
  process didn't exit successfully: `/Users/andrewlamb/Software/datafusion2/target/debug/deps/datafusion_physical_expr-a6fc7a49851ed9b5 'equivalence::properties::tests::project_equivalence_properties_test_multi' --exact` (signal: 6, SIGABRT: process abort signal)

Expected behavior

Test should pass (expected output will need to be updated)

Additional context

I found this while working on #12446 / #12562

Metadata

Metadata

Assignees

Labels

bugSomething isn't working

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions