[SPARK-6201] [SQL] promote string and do widen types for IN

adrian-wang · yhuai · commit 7212897dc6c6 · 2015-05-06T10:31:48.000-07:00
huangjs Acutally spark sql will first go through analysis period, in which we do widen types and promote strings, and then optimization, where constant IN will be converted into INSET. So it turn out that we only need to fix this for IN. Author: Daoyuan Wang <daoyuan.wang@intel.com> Closes #4945 from adrian-wang/inset and squashes the following commits: 71e05cc [Daoyuan Wang] minor fix 581fa1c [Daoyuan Wang] mysql way f3f7baf [Daoyuan Wang] address comments 5eed4bc [Daoyuan Wang] promote string and do widen types for IN (cherry picked from commit c3eb441) Signed-off-by: Yin Huai <yhuai@databricks.com>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
@@ -69,6 +69,7 @@ trait HiveTypeCoercion {
   val typeCoercionRules =
     PropagateTypes ::
     ConvertNaNs ::
+    InConversion ::
     WidenTypes ::
     PromoteStrings ::
     DecimalPrecision ::
@@ -287,6 +288,16 @@ trait HiveTypeCoercion {
     }
   }
 
+  /**
+   * Convert all expressions in in() list to the left operator type
+   */
+  object InConversion extends Rule[LogicalPlan] {
+    def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
+      case i @ In(a, b) if b.exists(_.dataType != a.dataType) =>
+        i.makeCopy(Array(a, b.map(Cast(_, a.dataType))))
+    }
+  }
+
   // scalastyle:off
   /**
    * Calculates and propagates precision for fixed-precision decimals. Hive has a number of
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -310,8 +310,8 @@ object OptimizeIn extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
     case q: LogicalPlan => q transformExpressionsDown {
       case In(v, list) if !list.exists(!_.isInstanceOf[Literal]) =>
-          val hSet = list.map(e => e.eval(null))
-          InSet(v, HashSet() ++ hSet)
+        val hSet = list.map(e => e.eval(null))
+        InSet(v, HashSet() ++ hSet)
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -120,6 +120,15 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
       Row(1, 1) :: Nil)
   }
 
+  test("SPARK-6201 IN type conversion") {
+    jsonRDD(sparkContext.parallelize(Seq("{\"a\": \"1\"}}", "{\"a\": \"2\"}}", "{\"a\": \"3\"}}")))
+      .registerTempTable("d")
+
+    checkAnswer(
+      sql("select * from d where d.a in (1,2)"),
+      Seq(Row("1"), Row("2")))
+  }
+
   test("SPARK-3176 Added Parser of SQL ABS()") {
     checkAnswer(
       sql("SELECT ABS(-1.3)"),

Original file line number	Diff line number	Diff line change
`@@ -310,8 +310,8 @@ object OptimizeIn extends Rule[LogicalPlan] {`
`310`	`310`	`def apply(plan: LogicalPlan): LogicalPlan = plan transform {`
`311`	`311`	`case q: LogicalPlan => q transformExpressionsDown {`
`312`	`312`	`case In(v, list) if !list.exists(!_.isInstanceOf[Literal]) =>`
`313`		`- val hSet = list.map(e => e.eval(null))`
`314`		`- InSet(v, HashSet() ++ hSet)`
	`313`	`+ val hSet = list.map(e => e.eval(null))`
	`314`	`+ InSet(v, HashSet() ++ hSet)`
`315`	`315`	`}`
`316`	`316`	`}`
`317`	`317`	`}`