Skip to content

Commit a0af525

Browse files
committed
address comments
1 parent 0c484f1 commit a0af525

File tree

3 files changed

+55
-30
lines changed

3 files changed

+55
-30
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ package org.apache.spark.sql.catalyst.expressions
3030
* by `hashCode`.
3131
* - [[EqualTo]] and [[EqualNullSafe]] are reordered by `hashCode`.
3232
* - Other comparisons ([[GreaterThan]], [[LessThan]]) are reversed by `hashCode`.
33+
* - Elements in [[In]] are reordered by `hashCode`.
3334
*/
3435
object Canonicalize {
3536
def execute(e: Expression): Expression = {
@@ -86,8 +87,7 @@ object Canonicalize {
8687
case Not(LessThanOrEqual(l, r)) => GreaterThan(l, r)
8788

8889
// order the list in the In operator
89-
case In(value, list) =>
90-
In(value, list.sortBy(_.semanticHash()))
90+
case In(value, list) => In(value, list.sortBy(_.hashCode()))
9191

9292
case _ => e
9393
}
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.catalyst.expressions
19+
20+
import org.apache.spark.SparkFunSuite
21+
import org.apache.spark.sql.catalyst.dsl.plans._
22+
import org.apache.spark.sql.catalyst.plans.logical.Range
23+
24+
class CanonicalizeSuite extends SparkFunSuite {
25+
26+
test("SPARK-24276: IN expression with different order are semantically equal") {
27+
val range = Range(1, 1, 1, 1)
28+
val idAttr = range.output.head
29+
30+
val in1 = In(idAttr, Seq(Literal(1), Literal(2)))
31+
val in2 = In(idAttr, Seq(Literal(2), Literal(1)))
32+
val in3 = In(idAttr, Seq(Literal(1), Literal(2), Literal(3)))
33+
34+
assert(in1.canonicalized.semanticHash() == in2.canonicalized.semanticHash())
35+
assert(in1.canonicalized.semanticHash() != in3.canonicalized.semanticHash())
36+
37+
assert(range.where(in1).sameResult(range.where(in2)))
38+
assert(!range.where(in1).sameResult(range.where(in3)))
39+
40+
val arrays1 = In(idAttr, Seq(CreateArray(Seq(Literal(1), Literal(2))),
41+
CreateArray(Seq(Literal(2), Literal(1)))))
42+
val arrays2 = In(idAttr, Seq(CreateArray(Seq(Literal(2), Literal(1))),
43+
CreateArray(Seq(Literal(1), Literal(2)))))
44+
val arrays3 = In(idAttr, Seq(CreateArray(Seq(Literal(1), Literal(2))),
45+
CreateArray(Seq(Literal(3), Literal(1)))))
46+
47+
assert(arrays1.canonicalized.semanticHash() == arrays2.canonicalized.semanticHash())
48+
assert(arrays1.canonicalized.semanticHash() != arrays3.canonicalized.semanticHash())
49+
50+
assert(range.where(arrays1).sameResult(range.where(arrays2)))
51+
assert(!range.where(arrays1).sameResult(range.where(arrays3)))
52+
}
53+
}

sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2266,34 +2266,6 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
22662266
checkAnswer(df, df.collect())
22672267
}
22682268

2269-
test("SPARK-24276: IN returns sameResult if the order of literals is different") {
2270-
val df = spark.range(1)
2271-
val p1 = df.where($"id".isin(1, 2))
2272-
val p2 = df.where($"id".isin(2, 1))
2273-
val p3 = df.where($"id".isin(1, 2, 3))
2274-
2275-
assert(p1.queryExecution.executedPlan.sameResult(p2.queryExecution.executedPlan))
2276-
assert(!p1.queryExecution.executedPlan.sameResult(p3.queryExecution.executedPlan))
2277-
2278-
val h1 = p1.queryExecution.logical.canonicalized.semanticHash()
2279-
val h2 = p2.queryExecution.logical.canonicalized.semanticHash()
2280-
val h3 = p3.queryExecution.logical.canonicalized.semanticHash()
2281-
assert(h1 == h2)
2282-
assert(h1 != h3)
2283-
2284-
Seq(Array(1, 2)).toDF("id").createOrReplaceTempView("t")
2285-
val arrays1 = sql("select * from t where id in (array(1, 2), array(2, 1))")
2286-
val arrays2 = sql("select * from t where id in (array(2, 1), array(1, 2))")
2287-
val arrays3 = sql("select * from t where id in (array(1, 2), array(3, 1))")
2288-
assert(arrays1.queryExecution.executedPlan.sameResult(arrays2.queryExecution.executedPlan))
2289-
assert(!arrays1.queryExecution.executedPlan.sameResult(arrays3.queryExecution.executedPlan))
2290-
val arraysHash1 = arrays1.queryExecution.logical.canonicalized.semanticHash()
2291-
val arraysHash2 = arrays2.queryExecution.logical.canonicalized.semanticHash()
2292-
val arraysHash3 = arrays3.queryExecution.logical.canonicalized.semanticHash()
2293-
assert(arraysHash1 == arraysHash2)
2294-
assert(arraysHash1 != arraysHash3)
2295-
}
2296-
22972269
test("SPARK-24313: access map with binary keys") {
22982270
val mapWithBinaryKey = map(lit(Array[Byte](1.toByte)), lit(1))
22992271
checkAnswer(spark.range(1).select(mapWithBinaryKey.getItem(Array[Byte](1.toByte))), Row(1))

0 commit comments

Comments
 (0)