[SPARK-16046][DOCS] Aggregations in the Spark SQL programming guide. Improved consistency

aokolnychyi · aokolnychyi · commit 0b17e132b24d · 2017-01-21T21:19:14.000+01:00
diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
@@ -384,8 +384,8 @@ For example:
 
 ## Aggregations
 
-The [built-in DataFrames functions](api/scala/index.html#org.apache.spark.sql.functions$) mentioned 
-before provide such common aggregations as `count()`, `countDistinct()`, `avg()`, `max()`, `min()`, etc.
+The [built-in DataFrames functions](api/scala/index.html#org.apache.spark.sql.functions$) provide common
+aggregations such as `count()`, `countDistinct()`, `avg()`, `max()`, `min()`, etc.
 While those functions are designed for DataFrames, Spark SQL also has type-safe versions for some of them in 
 [Scala](api/scala/index.html#org.apache.spark.sql.expressions.scalalang.typed$) and 
 [Java](api/java/org/apache/spark/sql/expressions/javalang/typed.html) to work with strongly typed Datasets.
diff --git a/examples/src/main/java/org/apache/spark/examples/sql/JavaUserDefinedTypedAggregation.java b/examples/src/main/java/org/apache/spark/examples/sql/JavaUserDefinedTypedAggregation.java
@@ -102,9 +102,11 @@ public Average reduce(Average buffer, Employee employee) {
     }
     // Merge two intermediate values
     public Average merge(Average b1, Average b2) {
-      long newSum = b1.getSum() + b2.getSum();
-      long newCount = b1.getCount() + b2.getCount();
-      return new Average(newSum, newCount);
+      long mergedSum = b1.getSum() + b2.getSum();
+      long mergedCount = b1.getCount() + b2.getCount();
+      b1.setSum(mergedSum);
+      b1.setCount(mergedCount);
+      return b1;
     }
     // Transform the output of the reduction
     public Double finish(Average reduction) {
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/UserDefinedTypedAggregation.scala b/examples/src/main/scala/org/apache/spark/examples/sql/UserDefinedTypedAggregation.scala
@@ -40,7 +40,11 @@ object UserDefinedTypedAggregation {
       buffer
     }
     // Merge two intermediate values
-    def merge(b1: Average, b2: Average): Average = Average(b1.sum + b2.sum, b1.count + b2.count)
+    def merge(b1: Average, b2: Average): Average = {
+      b1.sum += b2.sum
+      b1.count += b2.count
+      b1
+    }
     // Transform the output of the reduction
     def finish(reduction: Average): Double = reduction.sum.toDouble / reduction.count
     // Specifies the Encoder for the intermediate value type