Merge pull request #15 from shivaram/sparkr-groupby-retrain

rxin · rxin · commit 5f923c0c00d4 · 2015-05-08T01:19:30.000-07:00
Revert workaround in SparkR to retain grouped cols
diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R
@@ -103,9 +103,7 @@ setMethod("agg",
                 }
               }
               jcols <- lapply(cols, function(c) { c@jc })
-              # the GroupedData.agg(col, cols*) API does not contain grouping Column
-              sdf <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "aggWithGrouping",
-                                 x@sgd, listToSeq(jcols))
+              sdf <- callJMethod(x@sgd, "agg", jcols[[1]], listToSeq(jcols[-1]))
             } else {
               stop("agg can only support Column or character")
             }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
@@ -72,17 +72,6 @@ private[r] object SQLUtils {
     sqlContext.createDataFrame(rowRDD, schema)
   }
 
-  // A helper to include grouping columns in Agg()
-  def aggWithGrouping(gd: GroupedData, exprs: Column*): DataFrame = {
-    val aggExprs = exprs.map { col =>
-      col.expr match {
-        case expr: NamedExpression => expr
-        case expr: Expression => Alias(expr, expr.simpleString)()
-      }
-    }
-    gd.toDF(aggExprs)
-  }
-
   def dfToRowRDD(df: DataFrame): JavaRDD[Array[Byte]] = {
     df.map(r => rowToRBytes(r))
   }

Original file line number	Diff line number	Diff line change
`@@ -103,9 +103,7 @@ setMethod("agg",`
`103`	`103`	`}`
`104`	`104`	`}`
`105`	`105`	`jcols <- lapply(cols, function(c) { c@jc })`
`106`		`- # the GroupedData.agg(col, cols*) API does not contain grouping Column`
`107`		`- sdf <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "aggWithGrouping",`
`108`		`- x@sgd, listToSeq(jcols))`
	`106`	`+ sdf <- callJMethod(x@sgd, "agg", jcols[[1]], listToSeq(jcols[-1]))`
`109`	`107`	`} else {`
`110`	`108`	`stop("agg can only support Column or character")`
`111`	`109`	`}`