-
Notifications
You must be signed in to change notification settings - Fork 28.9k
[SPARK-12161][SQL] Ignore order of predicates in cache matching #10163
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
579b5a2
0a09698
85f1ebc
1a2b534
5fcb85c
8f93c6a
6eb6fdd
02fc878
bcb6df0
360bb2b
94837d6
0de3d7e
13ce03f
9f6df41
e63df88
9043afd
da46b1c
ba29f0b
c81aa46
cc8fdfe
b8ba919
a0e8c4a
07128b3
4af3622
99626a4
2efca2f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -228,7 +228,8 @@ case class InSet(child: Expression, hset: Set[Any]) extends UnaryExpression with | |
| } | ||
| } | ||
|
|
||
| case class And(left: Expression, right: Expression) extends BinaryOperator with Predicate { | ||
| case class And(left: Expression, right: Expression) extends BinaryOperator | ||
| with Predicate with PredicateHelper{ | ||
|
|
||
| override def inputType: AbstractDataType = BooleanType | ||
|
|
||
|
|
@@ -252,6 +253,27 @@ case class And(left: Expression, right: Expression) extends BinaryOperator with | |
| } | ||
| } | ||
|
|
||
| override def semanticEquals(other: Expression): Boolean = this.getClass == other.getClass && { | ||
| // Non-deterministic expressions cannot be semantic equal | ||
| if (!deterministic || !other.deterministic) return false | ||
|
|
||
| // We already know both expressions are And, so we can tolerate ordering different | ||
| // Recursively call semanticEquals on subexpressions to check the equivalency of two seqs. | ||
| var elements1 = splitConjunctivePredicates(this) | ||
| val elements2 = splitConjunctivePredicates(other) | ||
| // We can recursively call semanticEquals to check the equivalency for subexpressions, but | ||
| // there is no simple solution to compare the equivalency of sequence of expressions. | ||
| // Expression class doesn't have order, so we couldn't sort them. We can neither use | ||
| // set comparison as Set doesn't support custom compare function, which is semanticEquals. | ||
| // To check the equivalency of elements1 and elements2, we first compare their size. Then | ||
| // for each element in elements2, we remove its first semantically equivalent expression from | ||
| // elements1. If they are semantically equivalent, elements1 should be empty at the end. | ||
| elements1.size == elements2.size && { | ||
| for (e <- elements2) elements1 = removeFirstSemanticEquivalent(elements1, e) | ||
| elements1.isEmpty | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry I may missed something here, can we just write: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider this example There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ah I see, this makes sense. cc @liancheng There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| } | ||
| } | ||
|
|
||
| override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { | ||
| val eval1 = left.gen(ctx) | ||
| val eval2 = right.gen(ctx) | ||
|
|
@@ -277,7 +299,8 @@ case class And(left: Expression, right: Expression) extends BinaryOperator with | |
| } | ||
|
|
||
|
|
||
| case class Or(left: Expression, right: Expression) extends BinaryOperator with Predicate { | ||
| case class Or(left: Expression, right: Expression) extends BinaryOperator | ||
| with Predicate with PredicateHelper { | ||
|
|
||
| override def inputType: AbstractDataType = BooleanType | ||
|
|
||
|
|
@@ -301,6 +324,26 @@ case class Or(left: Expression, right: Expression) extends BinaryOperator with P | |
| } | ||
| } | ||
|
|
||
| override def semanticEquals(other: Expression): Boolean = this.getClass == other.getClass && { | ||
| // Non-deterministic expressions cannot be semantic equal | ||
| if (!deterministic || !other.deterministic) return false | ||
|
|
||
| // We know both expressions are Or, so we can tolerate ordering different | ||
| var elements1 = splitDisjunctivePredicates(this) | ||
| val elements2 = splitDisjunctivePredicates(other) | ||
| // We can recursively call semanticEquals to check the equivalency for subexpressions, but | ||
| // there is no simple solution to compare the equivalency of sequence of expressions. | ||
| // Expression class doesn't have order, so we couldn't sort them. We can neither use | ||
| // set comparison as Set doesn't support custom compare function, which is semanticEquals. | ||
| // To check the equivalency of elements1 and elements2, we first compare their size. Then | ||
| // for each element in elements2, we remove its first semantically equivalent expression from | ||
| // elements1. If they are semantically equivalent, elements1 should be empty at the end. | ||
| elements1.size == elements2.size && { | ||
| for (e <- elements2) elements1 = removeFirstSemanticEquivalent(elements1, e) | ||
| elements1.isEmpty | ||
| } | ||
| } | ||
|
|
||
| override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { | ||
| val eval1 = left.gen(ctx) | ||
| val eval2 = right.gen(ctx) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -127,33 +127,38 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging { | |
| cleanLeft.children.size == cleanRight.children.size && { | ||
| logDebug( | ||
| s"[${cleanRight.cleanArgs.mkString(", ")}] == [${cleanLeft.cleanArgs.mkString(", ")}]") | ||
| cleanRight.cleanArgs == cleanLeft.cleanArgs | ||
| } && | ||
| (cleanLeft.children, cleanRight.children).zipped.forall(_ sameResult _) | ||
| cleanLeft.cleanArgs.zip(cleanRight.cleanArgs).forall { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how about we just change this to: then we can just improve |
||
| case (e1: Expression, e2: Expression) => e1 semanticEquals e2 | ||
| case (a1, a2) => a1 == a2 | ||
| } | ||
| } && (cleanLeft.children, cleanRight.children).zipped.forall(_ sameResult _) | ||
| } | ||
|
|
||
| /** Clean an expression so that differences in expression id should not affect equality */ | ||
| def cleanExpression(e: Expression, input: Seq[Attribute]): Expression = e match { | ||
| case a: Alias => | ||
| // As the root of the expression, Alias will always take an arbitrary exprId, we need | ||
| // to erase that for equality testing. | ||
| val cleanedExprId = Alias(a.child, a.name)(ExprId(-1), a.qualifiers) | ||
| BindReferences.bindReference(cleanedExprId, input, allowFailures = true) | ||
| case other => BindReferences.bindReference(other, input, allowFailures = true) | ||
| } | ||
|
|
||
|
|
||
| /** Args that have cleaned such that differences in expression id should not affect equality */ | ||
| protected lazy val cleanArgs: Seq[Any] = { | ||
| val input = children.flatMap(_.output) | ||
| def cleanExpression(e: Expression) = e match { | ||
| case a: Alias => | ||
| // As the root of the expression, Alias will always take an arbitrary exprId, we need | ||
| // to erase that for equality testing. | ||
| val cleanedExprId = Alias(a.child, a.name)(ExprId(-1), a.qualifiers) | ||
| BindReferences.bindReference(cleanedExprId, input, allowFailures = true) | ||
| case other => BindReferences.bindReference(other, input, allowFailures = true) | ||
| } | ||
|
|
||
| productIterator.map { | ||
| // Children are checked using sameResult above. | ||
| case tn: TreeNode[_] if containsChild(tn) => null | ||
| case e: Expression => cleanExpression(e) | ||
| case e: Expression => cleanExpression(e, input) | ||
| case s: Option[_] => s.map { | ||
| case e: Expression => cleanExpression(e) | ||
| case e: Expression => cleanExpression(e, input) | ||
| case other => other | ||
| } | ||
| case s: Seq[_] => s.map { | ||
| case e: Expression => cleanExpression(e) | ||
| case e: Expression => cleanExpression(e, input) | ||
| case other => other | ||
| } | ||
| case other => other | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry I didn't clarify it clearly. I mean we can override
semanticEqualsin concrete expressions likeOr,And, etc. And we don't need to support all commutative operators at once, you can only finish the predicates parts in this PR and open follow-up PRs for other parts(likeAdd,Multiply). Let's do it step-by-step :)