pdeyhim
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala‎
Lines changed: 10 additions & 8 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala‎
Lines changed: 10 additions & 8 deletions
diff --git a/‎sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala‎
Lines changed: 1 addition & 1 deletion b/‎sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala‎
Lines changed: 5 additions & 34 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala‎
Lines changed: 5 additions & 34 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala‎
Lines changed: 1 addition & 1 deletion b/‎sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala‎
Lines changed: 13 additions & 2 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala‎
Lines changed: 13 additions & 2 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/api/java/JavaSchemaRDD.scala‎
Lines changed: 1 addition & 1 deletion b/‎sql/core/src/main/scala/org/apache/spark/sql/api/java/JavaSchemaRDD.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala‎
Lines changed: 5 additions & 6 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala‎
Lines changed: 5 additions & 6 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala‎
Lines changed: 53 additions & 28 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala‎
Lines changed: 53 additions & 28 deletions
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.plans.logical
 
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Attribute}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, BoundReference}
 import org.apache.spark.sql.catalyst.types.StringType
 
 /**
@@ -26,35 +26,37 @@ import org.apache.spark.sql.catalyst.types.StringType
  */
 abstract class Command extends LeafNode {
   self: Product =>
-  def output: Seq[Attribute] = Seq.empty  // TODO: SPARK-2081 should fix this
+  def output: Seq[Attribute] = Seq.empty
 }
 
 /**
  * Returned for commands supported by a given parser, but not catalyst.  In general these are DDL
  * commands that are passed directly to another system.
  */
-case class NativeCommand(cmd: String) extends Command
+case class NativeCommand(cmd: String) extends Command {
+  override def output =
+    Seq(BoundReference(0, AttributeReference("result", StringType, nullable = false)()))
+}
 
 /**
  * Commands of the form "SET (key) (= value)".
  */
 case class SetCommand(key: Option[String], value: Option[String]) extends Command {
   override def output = Seq(
-    AttributeReference("key", StringType, nullable = false)(),
-    AttributeReference("value", StringType, nullable = false)()
-  )
+    BoundReference(0, AttributeReference("key", StringType, nullable = false)()),
+    BoundReference(1, AttributeReference("value", StringType, nullable = false)()))
 }
 
 /**
  * Returned by a parser when the users only wants to see what query plan would be executed, without
  * actually performing the execution.
  */
 case class ExplainCommand(plan: LogicalPlan) extends Command {
-  override def output = Seq(AttributeReference("plan", StringType, nullable = false)())
+  override def output =
+    Seq(BoundReference(0, AttributeReference("plan", StringType, nullable = false)()))
 }
 
 /**
  * Returned for the "CACHE TABLE tableName" and "UNCACHE TABLE tableName" command.
  */
 case class CacheCommand(tableName: String, doCache: Boolean) extends Command
-
 
@@ -161,7 +161,7 @@ class FilterPushdownSuite extends OptimizerTest {
 
     comparePlans(optimized, correctAnswer)
   }
-  
+
   test("joins: push down left outer join #1") {
     val x = testRelation.subquery('x)
     val y = testRelation.subquery('y)
 
@@ -31,7 +31,7 @@ import org.apache.spark.sql.catalyst.{ScalaReflection, dsl}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.types._
 import org.apache.spark.sql.catalyst.optimizer.Optimizer
-import org.apache.spark.sql.catalyst.plans.logical.{SetCommand, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 
 import org.apache.spark.sql.columnar.InMemoryRelation
@@ -147,14 +147,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    *
    * @group userf
    */
-  def sql(sqlText: String): SchemaRDD = {
-    val result = new SchemaRDD(this, parseSql(sqlText))
-    // We force query optimization to happen right away instead of letting it happen lazily like
-    // when using the query DSL.  This is so DDL commands behave as expected.  This is only
-    // generates the RDD lineage for DML queries, but do not perform any execution.
-    result.queryExecution.toRdd
-    result
-  }
+  def sql(sqlText: String): SchemaRDD = new SchemaRDD(this, parseSql(sqlText))
 
   /** Returns the specified table as a SchemaRDD */
   def table(tableName: String): SchemaRDD =
@@ -259,8 +252,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
   protected[sql] val planner = new SparkPlanner
 
   @transient
-  protected[sql] lazy val emptyResult =
-    sparkContext.parallelize(Seq(new GenericRow(Array[Any]()): Row), 1)
+  protected[sql] lazy val emptyResult = sparkContext.parallelize(Seq.empty[Row], 1)
 
   /**
    * Prepares a planned SparkPlan for execution by binding references to specific ordinals, and
@@ -280,35 +272,14 @@ class SQLContext(@transient val sparkContext: SparkContext)
   protected abstract class QueryExecution {
     def logical: LogicalPlan
 
-    def eagerlyProcess(plan: LogicalPlan): RDD[Row] = plan match {
-      case SetCommand(key, value) =>
-        // Only this case needs to be executed eagerly. The other cases will
-        // be taken care of when the actual results are being extracted.
-        // In the case of HiveContext, sqlConf is overridden to also pass the
-        // pair into its HiveConf.
-        if (key.isDefined && value.isDefined) {
-          set(key.get, value.get)
-        }
-        // It doesn't matter what we return here, since this is only used
-        // to force the evaluation to happen eagerly.  To query the results,
-        // one must use SchemaRDD operations to extract them.
-        emptyResult
-      case _ => executedPlan.execute()
-    }
-
     lazy val analyzed = analyzer(logical)
     lazy val optimizedPlan = optimizer(analyzed)
     // TODO: Don't just pick the first one...
     lazy val sparkPlan = planner(optimizedPlan).next()
     lazy val executedPlan: SparkPlan = prepareForExecution(sparkPlan)
 
     /** Internal version of the RDD. Avoids copies and has no schema */
-    lazy val toRdd: RDD[Row] = {
-      logical match {
-        case s: SetCommand => eagerlyProcess(s)
-        case _ => executedPlan.execute()
-      }
-    }
+    lazy val toRdd: RDD[Row] = executedPlan.execute()
 
     protected def stringOrError[A](f: => A): String =
       try f.toString catch { case e: Throwable => e.toString }
@@ -330,7 +301,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * TODO: We only support primitive types, add support for nested types.
    */
   private[sql] def inferSchema(rdd: RDD[Map[String, _]]): SchemaRDD = {
-    val schema = rdd.first.map { case (fieldName, obj) =>
+    val schema = rdd.first().map { case (fieldName, obj) =>
       val dataType = obj.getClass match {
         case c: Class[_] if c == classOf[java.lang.String] => StringType
         case c: Class[_] if c == classOf[java.lang.Integer] => IntegerType
 
@@ -97,7 +97,7 @@ import java.util.{Map => JMap}
 @AlphaComponent
 class SchemaRDD(
     @transient val sqlContext: SQLContext,
-    @transient protected[spark] val logicalPlan: LogicalPlan)
+    @transient val baseLogicalPlan: LogicalPlan)
   extends RDD[Row](sqlContext.sparkContext, Nil) with SchemaRDDLike {
 
   def baseSchemaRDD = this
 
@@ -20,13 +20,14 @@ package org.apache.spark.sql
 import org.apache.spark.annotation.{DeveloperApi, Experimental}
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.execution.SparkLogicalPlan
 
 /**
  * Contains functions that are shared between all SchemaRDD types (i.e., Scala, Java)
  */
 private[sql] trait SchemaRDDLike {
   @transient val sqlContext: SQLContext
-  @transient protected[spark] val logicalPlan: LogicalPlan
+  @transient val baseLogicalPlan: LogicalPlan
 
   private[sql] def baseSchemaRDD: SchemaRDD
 
@@ -48,7 +49,17 @@ private[sql] trait SchemaRDDLike {
    */
   @transient
   @DeveloperApi
-  lazy val queryExecution = sqlContext.executePlan(logicalPlan)
+  lazy val queryExecution = sqlContext.executePlan(baseLogicalPlan)
+
+  @transient protected[spark] val logicalPlan: LogicalPlan = baseLogicalPlan match {
+    // For various commands (like DDL) and queries with side effects, we force query optimization to
+    // happen right away to let these side effects take place eagerly.
+    case _: Command | _: InsertIntoTable | _: InsertIntoCreatedTable | _: WriteToFile =>
+      queryExecution.toRdd
+      SparkLogicalPlan(queryExecution.executedPlan)
+    case _ =>
+      baseLogicalPlan
+  }
 
   override def toString =
     s"""${super.toString}
 
@@ -37,7 +37,7 @@ import org.apache.spark.storage.StorageLevel
  */
 class JavaSchemaRDD(
      @transient val sqlContext: SQLContext,
-     @transient protected[spark] val logicalPlan: LogicalPlan)
+     @transient val baseLogicalPlan: LogicalPlan)
   extends JavaRDDLike[Row, JavaRDD[Row]]
   with SchemaRDDLike {
 
 
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution
 
-import org.apache.spark.sql.{SQLConf, SQLContext, execution}
+import org.apache.spark.sql.{SQLContext, execution}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning._
 import org.apache.spark.sql.catalyst.plans._
@@ -157,7 +157,7 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
         InsertIntoParquetTable(relation, planLater(child), overwrite=true)(sparkContext) :: Nil
       case logical.InsertIntoTable(table: ParquetRelation, partition, child, overwrite) =>
         InsertIntoParquetTable(table, planLater(child), overwrite)(sparkContext) :: Nil
-      case PhysicalOperation(projectList, filters: Seq[Expression], relation: ParquetRelation) => {
+      case PhysicalOperation(projectList, filters: Seq[Expression], relation: ParquetRelation) =>
         val prunePushedDownFilters =
           if (sparkContext.conf.getBoolean(ParquetFilters.PARQUET_FILTER_PUSHDOWN_ENABLED, true)) {
             (filters: Seq[Expression]) => {
@@ -186,7 +186,6 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
           filters,
           prunePushedDownFilters,
           ParquetTableScan(_, relation, filters)(sparkContext)) :: Nil
-      }
 
       case _ => Nil
     }
@@ -250,12 +249,12 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
   case class CommandStrategy(context: SQLContext) extends Strategy {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
       case logical.SetCommand(key, value) =>
-        Seq(execution.SetCommandPhysical(key, value, plan.output)(context))
+        Seq(execution.SetCommand(key, value, plan.output)(context))
       case logical.ExplainCommand(child) =>
         val executedPlan = context.executePlan(child).executedPlan
-        Seq(execution.ExplainCommandPhysical(executedPlan, plan.output)(context))
+        Seq(execution.ExplainCommand(executedPlan, plan.output)(context))
       case logical.CacheCommand(tableName, cache) =>
-        Seq(execution.CacheCommandPhysical(tableName, cache)(context))
+        Seq(execution.CacheCommand(tableName, cache)(context))
       case _ => Nil
     }
   }
 
@@ -22,45 +22,69 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{SQLContext, Row}
 import org.apache.spark.sql.catalyst.expressions.{GenericRow, Attribute}
 
+trait Command {
+  /**
+   * A concrete command should override this lazy field to wrap up any side effects caused by the
+   * command or any other computation that should be evaluated exactly once. The value of this field
+   * can be used as the contents of the corresponding RDD generated from the physical plan of this
+   * command.
+   *
+   * The `execute()` method of all the physical command classes should reference `sideEffectResult`
+   * so that the command can be executed eagerly right after the command query is created.
+   */
+  protected[sql] lazy val sideEffectResult: Seq[Any] = Seq.empty[Any]
+}
+
 /**
  * :: DeveloperApi ::
  */
 @DeveloperApi
-case class SetCommandPhysical(key: Option[String], value: Option[String], output: Seq[Attribute])
-                             (@transient context: SQLContext) extends LeafNode {
-  def execute(): RDD[Row] = (key, value) match {
-    // Set value for key k; the action itself would
-    // have been performed in QueryExecution eagerly.
-    case (Some(k), Some(v)) => context.emptyResult
+case class SetCommand(
+    key: Option[String], value: Option[String], output: Seq[Attribute])(
+    @transient context: SQLContext)
+  extends LeafNode with Command {
+
+  override protected[sql] lazy val sideEffectResult: Seq[(String, String)] = (key, value) match {
+    // Set value for key k.
+    case (Some(k), Some(v)) =>
+      context.set(k, v)
+      Array(k -> v)
+
     // Query the value bound to key k.
-    case (Some(k), None) =>
-      val resultString = context.getOption(k) match {
-        case Some(v) => s"$k=$v"
-        case None => s"$k is undefined"
-      }
-      context.sparkContext.parallelize(Seq(new GenericRow(Array[Any](resultString))), 1)
+    case (Some(k), _) =>
+      Array(k -> context.getOption(k).getOrElse("<undefined>"))
+
     // Query all key-value pairs that are set in the SQLConf of the context.
     case (None, None) =>
-      val pairs = context.getAll
-      val rows = pairs.map { case (k, v) =>
-        new GenericRow(Array[Any](s"$k=$v"))
-      }.toSeq
-      // Assume config parameters can fit into one split (machine) ;)
-      context.sparkContext.parallelize(rows, 1)
-    // The only other case is invalid semantics and is impossible.
-    case _ => context.emptyResult
+      context.getAll
+
+    case _ =>
+      throw new IllegalArgumentException()
   }
+
+  def execute(): RDD[Row] = {
+    val rows = sideEffectResult.map { case (k, v) => new GenericRow(Array[Any](k, v)) }
+    context.sparkContext.parallelize(rows, 1)
+  }
+
+  override def otherCopyArgs = context :: Nil
 }
 
 /**
  * :: DeveloperApi ::
  */
 @DeveloperApi
-case class ExplainCommandPhysical(child: SparkPlan, output: Seq[Attribute])
-                                 (@transient context: SQLContext) extends UnaryNode {
+case class ExplainCommand(
+    child: SparkPlan, output: Seq[Attribute])(
+    @transient context: SQLContext)
+  extends UnaryNode with Command {
+
+  // Actually "EXPLAIN" command doesn't cause any side effect.
+  override protected[sql] lazy val sideEffectResult: Seq[String] = this.toString.split("\n")
+
   def execute(): RDD[Row] = {
-    val planString = new GenericRow(Array[Any](child.toString))
-    context.sparkContext.parallelize(Seq(planString))
+    val explanation = sideEffectResult.mkString("\n")
+    context.sparkContext.parallelize(Seq(new GenericRow(Array[Any](explanation))), 1)
   }
 
   override def otherCopyArgs = context :: Nil
@@ -70,19 +94,20 @@ case class ExplainCommandPhysical(child: SparkPlan, output: Seq[Attribute])
  * :: DeveloperApi ::
  */
 @DeveloperApi
-case class CacheCommandPhysical(tableName: String, doCache: Boolean)(@transient context: SQLContext)
-  extends LeafNode {
+case class CacheCommand(tableName: String, doCache: Boolean)(@transient context: SQLContext)
+  extends LeafNode with Command {
 
-  lazy val commandSideEffect = {
+  override protected[sql] lazy val sideEffectResult = {
     if (doCache) {
       context.cacheTable(tableName)
     } else {
       context.uncacheTable(tableName)
     }
+    Seq.empty[Any]
   }
 
   override def execute(): RDD[Row] = {
-    commandSideEffect
+    sideEffectResult
     context.emptyResult
   }
Original file line number	Diff line number	Diff line change
`@@ -161,7 +161,7 @@ class FilterPushdownSuite extends OptimizerTest {`
`161`	`161`
`162`	`162`	`comparePlans(optimized, correctAnswer)`
`163`	`163`	`}`
`164`		`-`
	`164`	`+`
`165`	`165`	`test("joins: push down left outer join #1") {`
`166`	`166`	`val x = testRelation.subquery('x)`
`167`	`167`	`val y = testRelation.subquery('y)`