fix conflict in sparkr.md

Narine Kokhlikyan · Narine Kokhlikyan · commit 44ee864d1a78 · 2016-07-31T19:06:25.000-07:00
diff --git a/docs/sparkr.md b/docs/sparkr.md
@@ -328,23 +328,25 @@ The output of function should be a `data.frame`. Schema specifies the row format
 {% highlight r %}
 
 # Determine six waiting times with the largest eruption time in minutes.
-schema <- structType(structField("waiting", "double"), structField("max_eruption", "double"))
+schema <- structType(structField("max_eruption", "double"))
 result <- gapply(
     df,
     "waiting",
     function(key, x) {
-        y <- data.frame(key, max(x$eruptions))
+        y <- data.frame(max(x$eruptions))
     },
     schema)
+colnames(result) <- c("waiting", "max_eruption")
+
 head(collect(arrange(result, "max_eruption", decreasing = TRUE)))
 
 ##    waiting   max_eruption
-##1      64       5.100
-##2      69       5.067
-##3      71       5.033
-##4      87       5.000
-##5      63       4.933
-##6      89       4.900
+##1      96       5.100
+##2      76       5.067
+##3      77       5.033
+##4      88       5.000
+##5      86       4.933
+##6      82       4.900
 {% endhighlight %}
 </div>
 
@@ -359,19 +361,19 @@ result <- gapplyCollect(
     df,
     "waiting",
     function(key, x) {
-        y <- data.frame(key, max(x$eruptions))
-        colnames(y) <- c("waiting", "max_eruption")
-        y
+        y <- data.frame(max(x$eruptions))
     })
+colnames(result) <- c("waiting", "max_eruption")
+
 head(result[order(result$max_eruption, decreasing = TRUE), ])
 
 ##    waiting   max_eruption
-##1      64       5.100
-##2      69       5.067
-##3      71       5.033
-##4      87       5.000
-##5      63       4.933
-##6      89       4.900
+##1      96       5.100
+##2      76       5.067
+##3      77       5.033
+##4      88       5.000
+##5      86       4.933
+##6      82       4.900
 
 {% endhighlight %}
 </div>
@@ -445,61 +447,6 @@ head(result[order(result$max_eruption, decreasing = TRUE), ])
 </tr>
 </table>
 
-<<<<<<< HEAD
-<div data-lang="r"  markdown="1">
-{% highlight r %}
-
-# Determine six waiting times with the largest eruption time in minutes.
-schema <- structType(structField("max_eruption", "double"))
-result <- gapply(
-    df,
-    "waiting",
-    function(key, x) {
-        y <- data.frame(max(x$eruptions))
-    },
-    schema)
-colnames(result) <- c("waiting", "max_eruption")
-
-head(collect(arrange(result, "max_eruption", decreasing = TRUE)))
-
-##    waiting   max_eruption
-##1      96       5.100
-##2      76       5.067
-##3      77       5.033
-##4      88       5.000
-##5      86       4.933
-##6      82       4.900
-{% endhighlight %}
-</div>
-
-##### gapplyCollect
-Like `gapply`, applies a function to each partition of a `SparkDataFrame` and collect the result back to R data.frame. The output of the function should be a `data.frame`. But, the schema is not required to be passed. Note that `gapplyCollect` can fail if the output of UDF run on all the partition cannot be pulled to the driver and fit in driver memory.
-
-<div data-lang="r"  markdown="1">
-{% highlight r %}
-
-# Determine six waiting times with the largest eruption time in minutes.
-result <- gapplyCollect(
-    df,
-    "waiting",
-    function(key, x) {
-        y <- data.frame(max(x$eruptions))
-    })
-colnames(result) <- c("waiting", "max_eruption")
-
-head(result[order(result$max_eruption, decreasing = TRUE), ])
-
-##    waiting   max_eruption
-##1      96       5.100
-##2      76       5.067
-##3      77       5.033
-##4      88       5.000
-##5      86       4.933
-##6      82       4.900
-
-{% endhighlight %}
-</div>
-
 #### Run local R functions distributed using `spark.lapply`
 
 ##### spark.lapply