Skip to content

Commit a409460

Browse files
authored
Merge pull request apache#91 from mesosphere/add-r-docker-tests
Add R to docker image and tests
2 parents 48b08bf + a1b7fd6 commit a409460

File tree

3 files changed

+76
-6
lines changed

3 files changed

+76
-6
lines changed

docker/Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ RUN apt-get update && \
3535
RUN add-apt-repository ppa:openjdk-r/ppa
3636
RUN apt-get update && \
3737
apt-get install -y openjdk-8-jdk curl
38+
RUN apt-get install -y r-base
3839

3940
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64
4041
ENV MESOS_NATIVE_JAVA_LIBRARY /usr/lib/libmesos.so

tests/jobs/dataframe.R

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
library(SparkR)
19+
20+
# Initialize SparkSession
21+
sparkR.session(appName = "SparkR-DataFrame-example")
22+
23+
# Create a simple local data.frame
24+
localDF <- data.frame(name=c("John", "Smith", "Sarah"), age=c(19, 23, 18))
25+
26+
# Convert local data frame to a SparkDataFrame
27+
df <- createDataFrame(localDF)
28+
29+
# Print its schema
30+
printSchema(df)
31+
# root
32+
# |-- name: string (nullable = true)
33+
# |-- age: double (nullable = true)
34+
35+
# Create a DataFrame from a JSON file
36+
path <- file.path(Sys.getenv("SPARK_HOME"), "examples/src/main/resources/people.json")
37+
peopleDF <- read.json(path)
38+
printSchema(peopleDF)
39+
# root
40+
# |-- age: long (nullable = true)
41+
# |-- name: string (nullable = true)
42+
43+
# Register this DataFrame as a table.
44+
createOrReplaceTempView(peopleDF, "people")
45+
46+
# SQL statements can be run by using the sql methods
47+
teenagers <- sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
48+
49+
# Call collect to get a local data.frame
50+
teenagersLocalDF <- collect(teenagers)
51+
52+
# Print the teenagers in our dataset
53+
print(teenagersLocalDF)
54+
55+
# Stop the SparkSession now
56+
sparkR.session.stop()

tests/test.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,24 @@
1313
import shakedown
1414

1515

16-
def upload_file(file_path):
17-
conn = S3Connection(os.environ['AWS_ACCESS_KEY_ID'], os.environ['AWS_SECRET_ACCESS_KEY'])
18-
bucket = conn.get_bucket(os.environ['S3_BUCKET'])
19-
basename = os.path.basename(file_path)
20-
16+
def get_content_type(basename):
2117
if basename.endswith('.jar'):
2218
content_type = 'application/java-archive'
2319
elif basename.endswith('.py'):
2420
content_type = 'application/x-python'
21+
elif basename.endswith('.R'):
22+
content_type = 'application/R'
2523
else:
26-
raise ValueError("Unexpected file type: {}. Expected .jar or .py file.".format(basename))
24+
raise ValueError("Unexpected file type: {}. Expected .jar, .py, or .R file.".format(basename))
25+
return content_type
26+
27+
28+
def upload_file(file_path):
29+
conn = S3Connection(os.environ['AWS_ACCESS_KEY_ID'], os.environ['AWS_SECRET_ACCESS_KEY'])
30+
bucket = conn.get_bucket(os.environ['S3_BUCKET'])
31+
basename = os.path.basename(file_path)
32+
33+
content_type = get_content_type(basename)
2734

2835
key = Key(bucket, '{}/{}'.format(os.environ['S3_PREFIX'], basename))
2936
key.metadata = {'Content-Type': content_type}
@@ -96,6 +103,12 @@ def main():
96103
"Pi is roughly 3",
97104
py_file_path=py_file_path)
98105

106+
# TODO: enable R test when R is enabled in Spark (2.1)
107+
#r_script_path = os.path.join(script_dir, 'jobs', 'dataframe.R')
108+
#run_tests(r_script_path,
109+
# '',
110+
# "1 Justin")
111+
99112

100113
if __name__ == '__main__':
101114
main()

0 commit comments

Comments
 (0)