Skip to content

Commit a3bee70

Browse files
committed
Merge remote-tracking branch 'origin/master' into useIsolatedClient
2 parents a6f5df1 + c3eb441 commit a3bee70

File tree

228 files changed

+7685
-2641
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

228 files changed

+7685
-2641
lines changed

.rat-excludes

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ spark-env.sh.template
3030
log4j-defaults.properties
3131
bootstrap-tooltip.js
3232
jquery-1.11.1.min.js
33+
d3.min.js
34+
dagre-d3.min.js
35+
graphlib-dot.min.js
3336
sorttable.js
3437
vis.min.js
3538
vis.min.css

LICENSE

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -643,6 +643,36 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
643643
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
644644
THE SOFTWARE.
645645

646+
========================================================================
647+
For d3 (core/src/main/resources/org/apache/spark/ui/static/d3.min.js):
648+
========================================================================
649+
650+
Copyright (c) 2010-2015, Michael Bostock
651+
All rights reserved.
652+
653+
Redistribution and use in source and binary forms, with or without
654+
modification, are permitted provided that the following conditions are met:
655+
656+
* Redistributions of source code must retain the above copyright notice, this
657+
list of conditions and the following disclaimer.
658+
659+
* Redistributions in binary form must reproduce the above copyright notice,
660+
this list of conditions and the following disclaimer in the documentation
661+
and/or other materials provided with the distribution.
662+
663+
* The name Michael Bostock may not be used to endorse or promote products
664+
derived from this software without specific prior written permission.
665+
666+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
667+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
668+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
669+
DISCLAIMED. IN NO EVENT SHALL MICHAEL BOSTOCK BE LIABLE FOR ANY DIRECT,
670+
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
671+
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
672+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
673+
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
674+
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
675+
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
646676

647677
========================================================================
648678
For Scala Interpreter classes (all .scala files in repl/src/main/scala

R/pkg/NAMESPACE

Lines changed: 14 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -1,117 +1,36 @@
1-
#exportPattern("^[[:alpha:]]+")
2-
exportClasses("RDD")
3-
exportClasses("Broadcast")
4-
exportMethods(
5-
"aggregateByKey",
6-
"aggregateRDD",
7-
"cache",
8-
"cartesian",
9-
"checkpoint",
10-
"coalesce",
11-
"cogroup",
12-
"collect",
13-
"collectAsMap",
14-
"collectPartition",
15-
"combineByKey",
16-
"count",
17-
"countByKey",
18-
"countByValue",
19-
"distinct",
20-
"Filter",
21-
"filterRDD",
22-
"first",
23-
"flatMap",
24-
"flatMapValues",
25-
"fold",
26-
"foldByKey",
27-
"foreach",
28-
"foreachPartition",
29-
"fullOuterJoin",
30-
"glom",
31-
"groupByKey",
32-
"intersection",
33-
"join",
34-
"keyBy",
35-
"keys",
36-
"length",
37-
"lapply",
38-
"lapplyPartition",
39-
"lapplyPartitionsWithIndex",
40-
"leftOuterJoin",
41-
"lookup",
42-
"map",
43-
"mapPartitions",
44-
"mapPartitionsWithIndex",
45-
"mapValues",
46-
"maximum",
47-
"minimum",
48-
"numPartitions",
49-
"partitionBy",
50-
"persist",
51-
"pipeRDD",
52-
"reduce",
53-
"reduceByKey",
54-
"reduceByKeyLocally",
55-
"repartition",
56-
"rightOuterJoin",
57-
"sampleByKey",
58-
"sampleRDD",
59-
"saveAsTextFile",
60-
"saveAsObjectFile",
61-
"sortBy",
62-
"sortByKey",
63-
"subtract",
64-
"subtractByKey",
65-
"sumRDD",
66-
"take",
67-
"takeOrdered",
68-
"takeSample",
69-
"top",
70-
"unionRDD",
71-
"unpersist",
72-
"value",
73-
"values",
74-
"zipPartitions",
75-
"zipRDD",
76-
"zipWithIndex",
77-
"zipWithUniqueId"
78-
)
1+
# Imports from base R
2+
importFrom(methods, setGeneric, setMethod, setOldClass)
3+
useDynLib(SparkR, stringHashCode)
794

805
# S3 methods exported
81-
export(
82-
"textFile",
83-
"objectFile",
84-
"parallelize",
85-
"hashCode",
86-
"includePackage",
87-
"broadcast",
88-
"setBroadcastValue",
89-
"setCheckpointDir"
90-
)
916
export("sparkR.init")
927
export("sparkR.stop")
938
export("print.jobj")
94-
useDynLib(SparkR, stringHashCode)
95-
importFrom(methods, setGeneric, setMethod, setOldClass)
96-
97-
# SparkRSQL
989

9910
exportClasses("DataFrame")
10011

101-
exportMethods("columns",
12+
exportMethods("cache",
13+
"collect",
14+
"columns",
15+
"count",
16+
"describe",
10217
"distinct",
10318
"dtypes",
10419
"except",
10520
"explain",
10621
"filter",
22+
"first",
10723
"groupBy",
10824
"head",
10925
"insertInto",
11026
"intersect",
11127
"isLocal",
28+
"join",
29+
"length",
11230
"limit",
11331
"orderBy",
11432
"names",
33+
"persist",
11534
"printSchema",
11635
"registerTempTable",
11736
"repartition",
@@ -125,9 +44,11 @@ exportMethods("columns",
12544
"show",
12645
"showDF",
12746
"sortDF",
47+
"take",
12848
"toJSON",
12949
"toRDD",
13050
"unionAll",
51+
"unpersist",
13152
"where",
13253
"withColumn",
13354
"withColumnRenamed")

R/pkg/R/DataFrame.R

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ setMethod("isLocal",
167167
setMethod("showDF",
168168
signature(x = "DataFrame"),
169169
function(x, numRows = 20) {
170-
cat(callJMethod(x@sdf, "showString", numToInt(numRows)), "\n")
170+
callJMethod(x@sdf, "showString", numToInt(numRows))
171171
})
172172

173173
#' show
@@ -1276,3 +1276,40 @@ setMethod("saveAsTable",
12761276
callJMethod(df@sdf, "saveAsTable", tableName, source, jmode, options)
12771277
})
12781278

1279+
#' describe
1280+
#'
1281+
#' Computes statistics for numeric columns.
1282+
#' If no columns are given, this function computes statistics for all numerical columns.
1283+
#'
1284+
#' @param x A DataFrame to be computed.
1285+
#' @param col A string of name
1286+
#' @param ... Additional expressions
1287+
#' @return A DataFrame
1288+
#' @rdname describe
1289+
#' @export
1290+
#' @examples
1291+
#'\dontrun{
1292+
#' sc <- sparkR.init()
1293+
#' sqlCtx <- sparkRSQL.init(sc)
1294+
#' path <- "path/to/file.json"
1295+
#' df <- jsonFile(sqlCtx, path)
1296+
#' describe(df)
1297+
#' describe(df, "col1")
1298+
#' describe(df, "col1", "col2")
1299+
#' }
1300+
setMethod("describe",
1301+
signature(x = "DataFrame", col = "character"),
1302+
function(x, col, ...) {
1303+
colList <- list(col, ...)
1304+
sdf <- callJMethod(x@sdf, "describe", listToSeq(colList))
1305+
dataFrame(sdf)
1306+
})
1307+
1308+
#' @rdname describe
1309+
setMethod("describe",
1310+
signature(x = "DataFrame"),
1311+
function(x) {
1312+
colList <- as.list(c(columns(x)))
1313+
sdf <- callJMethod(x@sdf, "describe", listToSeq(colList))
1314+
dataFrame(sdf)
1315+
})

R/pkg/R/RDD.R

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -797,7 +797,7 @@ setMethod("first",
797797
#' @aliases distinct,RDD-method
798798
setMethod("distinct",
799799
signature(x = "RDD"),
800-
function(x, numPartitions = SparkR::numPartitions(x)) {
800+
function(x, numPartitions = SparkR:::numPartitions(x)) {
801801
identical.mapped <- lapply(x, function(x) { list(x, NULL) })
802802
reduced <- reduceByKey(identical.mapped,
803803
function(x, y) { x },
@@ -993,7 +993,7 @@ setMethod("coalesce",
993993
signature(x = "RDD", numPartitions = "numeric"),
994994
function(x, numPartitions, shuffle = FALSE) {
995995
numPartitions <- numToInt(numPartitions)
996-
if (shuffle || numPartitions > SparkR::numPartitions(x)) {
996+
if (shuffle || numPartitions > SparkR:::numPartitions(x)) {
997997
func <- function(partIndex, part) {
998998
set.seed(partIndex) # partIndex as seed
999999
start <- as.integer(sample(numPartitions, 1) - 1)
@@ -1078,7 +1078,7 @@ setMethod("saveAsTextFile",
10781078
#' @aliases sortBy,RDD,RDD-method
10791079
setMethod("sortBy",
10801080
signature(x = "RDD", func = "function"),
1081-
function(x, func, ascending = TRUE, numPartitions = SparkR::numPartitions(x)) {
1081+
function(x, func, ascending = TRUE, numPartitions = SparkR:::numPartitions(x)) {
10821082
values(sortByKey(keyBy(x, func), ascending, numPartitions))
10831083
})
10841084

@@ -1552,7 +1552,7 @@ setMethod("cartesian",
15521552
#' @aliases subtract,RDD
15531553
setMethod("subtract",
15541554
signature(x = "RDD", other = "RDD"),
1555-
function(x, other, numPartitions = SparkR::numPartitions(x)) {
1555+
function(x, other, numPartitions = SparkR:::numPartitions(x)) {
15561556
mapFunction <- function(e) { list(e, NA) }
15571557
rdd1 <- map(x, mapFunction)
15581558
rdd2 <- map(other, mapFunction)
@@ -1583,7 +1583,7 @@ setMethod("subtract",
15831583
#' @aliases intersection,RDD
15841584
setMethod("intersection",
15851585
signature(x = "RDD", other = "RDD"),
1586-
function(x, other, numPartitions = SparkR::numPartitions(x)) {
1586+
function(x, other, numPartitions = SparkR:::numPartitions(x)) {
15871587
rdd1 <- map(x, function(v) { list(v, NA) })
15881588
rdd2 <- map(other, function(v) { list(v, NA) })
15891589

R/pkg/R/generics.R

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,10 @@ setGeneric("value", function(bcast) { standardGeneric("value") })
384384
#' @export
385385
setGeneric("columns", function(x) {standardGeneric("columns") })
386386

387+
#' @rdname describe
388+
#' @export
389+
setGeneric("describe", function(x, col, ...) { standardGeneric("describe") })
390+
387391
#' @rdname schema
388392
#' @export
389393
setGeneric("dtypes", function(x) { standardGeneric("dtypes") })

R/pkg/R/pairRDD.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -739,7 +739,7 @@ setMethod("cogroup",
739739
#' @aliases sortByKey,RDD,RDD-method
740740
setMethod("sortByKey",
741741
signature(x = "RDD"),
742-
function(x, ascending = TRUE, numPartitions = SparkR::numPartitions(x)) {
742+
function(x, ascending = TRUE, numPartitions = SparkR:::numPartitions(x)) {
743743
rangeBounds <- list()
744744

745745
if (numPartitions > 1) {
@@ -806,7 +806,7 @@ setMethod("sortByKey",
806806
#' @aliases subtractByKey,RDD
807807
setMethod("subtractByKey",
808808
signature(x = "RDD", other = "RDD"),
809-
function(x, other, numPartitions = SparkR::numPartitions(x)) {
809+
function(x, other, numPartitions = SparkR:::numPartitions(x)) {
810810
filterFunction <- function(elem) {
811811
iters <- elem[[2]]
812812
(length(iters[[1]]) > 0) && (length(iters[[2]]) == 0)

R/pkg/inst/tests/test_broadcast.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ test_that("using broadcast variable", {
2929
randomMatBr <- broadcast(sc, randomMat)
3030

3131
useBroadcast <- function(x) {
32-
sum(value(randomMatBr) * x)
32+
sum(SparkR:::value(randomMatBr) * x)
3333
}
3434
actual <- collect(lapply(rrdd, useBroadcast))
3535
expected <- list(sum(randomMat) * 1, sum(randomMat) * 2)

R/pkg/inst/tests/test_sparkSQL.R

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -641,7 +641,7 @@ test_that("toJSON() returns an RDD of the correct values", {
641641

642642
test_that("showDF()", {
643643
df <- jsonFile(sqlCtx, jsonPath)
644-
expect_output(showDF(df), "age name \nnull Michael\n30 Andy \n19 Justin ")
644+
expect_output(showDF(df), "+----+-------+\n| age| name|\n+----+-------+\n|null|Michael|\n| 30| Andy|\n| 19| Justin|\n+----+-------+\n")
645645
})
646646

647647
test_that("isLocal()", {
@@ -705,5 +705,16 @@ test_that("parquetFile works with multiple input paths", {
705705
expect_true(count(parquetDF) == count(df)*2)
706706
})
707707

708+
test_that("describe() on a DataFrame", {
709+
df <- jsonFile(sqlCtx, jsonPath)
710+
stats <- describe(df, "age")
711+
expect_true(collect(stats)[1, "summary"] == "count")
712+
expect_true(collect(stats)[2, "age"] == 24.5)
713+
expect_true(collect(stats)[3, "age"] == 5.5)
714+
stats <- describe(df)
715+
expect_true(collect(stats)[4, "name"] == "Andy")
716+
expect_true(collect(stats)[5, "age"] == 30.0)
717+
})
718+
708719
unlink(parquetPath)
709720
unlink(jsonPath)

R/pkg/inst/tests/test_utils.R

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,10 @@ test_that("cleanClosure on R functions", {
9292
}
9393
newF <- cleanClosure(f)
9494
env <- environment(newF)
95-
expect_equal(length(ls(env)), 3) # Only "g", "l" and "f". No "base", "field" or "defUse".
95+
# TODO(shivaram): length(ls(env)) is 4 here for some reason and `lapply` is included in `env`.
96+
# Disabling this test till we debug this.
97+
#
98+
# expect_equal(length(ls(env)), 3) # Only "g", "l" and "f". No "base", "field" or "defUse".
9699
expect_true("g" %in% ls(env))
97100
expect_true("l" %in% ls(env))
98101
expect_true("f" %in% ls(env))

0 commit comments

Comments
 (0)