Merge pull request apache#91 from mesosphere/add-r-docker-tests

susanxhuynh · web-flow · commit a409460c9d2c · 2016-11-09T13:12:23.000-08:00
Add R to docker image and tests
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -35,6 +35,7 @@ RUN apt-get update && \
 RUN add-apt-repository ppa:openjdk-r/ppa
 RUN apt-get update && \
     apt-get install -y openjdk-8-jdk curl
+RUN apt-get install -y r-base
 
 ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64
 ENV MESOS_NATIVE_JAVA_LIBRARY /usr/lib/libmesos.so
diff --git a/tests/jobs/dataframe.R b/tests/jobs/dataframe.R
@@ -0,0 +1,56 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-DataFrame-example")
+
+# Create a simple local data.frame
+localDF <- data.frame(name=c("John", "Smith", "Sarah"), age=c(19, 23, 18))
+
+# Convert local data frame to a SparkDataFrame
+df <- createDataFrame(localDF)
+
+# Print its schema
+printSchema(df)
+# root
+#  |-- name: string (nullable = true)
+#  |-- age: double (nullable = true)
+
+# Create a DataFrame from a JSON file
+path <- file.path(Sys.getenv("SPARK_HOME"), "examples/src/main/resources/people.json")
+peopleDF <- read.json(path)
+printSchema(peopleDF)
+# root
+#  |-- age: long (nullable = true)
+#  |-- name: string (nullable = true)
+
+# Register this DataFrame as a table.
+createOrReplaceTempView(peopleDF, "people")
+
+# SQL statements can be run by using the sql methods
+teenagers <- sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
+
+# Call collect to get a local data.frame
+teenagersLocalDF <- collect(teenagers)
+
+# Print the teenagers in our dataset
+print(teenagersLocalDF)
+
+# Stop the SparkSession now
+sparkR.session.stop()
diff --git a/tests/test.py b/tests/test.py
@@ -13,17 +13,24 @@
 import shakedown
 
 
-def upload_file(file_path):
-    conn = S3Connection(os.environ['AWS_ACCESS_KEY_ID'], os.environ['AWS_SECRET_ACCESS_KEY'])
-    bucket = conn.get_bucket(os.environ['S3_BUCKET'])
-    basename = os.path.basename(file_path)
-
+def get_content_type(basename):
     if basename.endswith('.jar'):
         content_type = 'application/java-archive'
     elif basename.endswith('.py'):
         content_type = 'application/x-python'
+    elif basename.endswith('.R'):
+        content_type = 'application/R'
     else:
-        raise ValueError("Unexpected file type: {}. Expected .jar or .py file.".format(basename))
+        raise ValueError("Unexpected file type: {}. Expected .jar, .py, or .R file.".format(basename))
+    return content_type
+
+
+def upload_file(file_path):
+    conn = S3Connection(os.environ['AWS_ACCESS_KEY_ID'], os.environ['AWS_SECRET_ACCESS_KEY'])
+    bucket = conn.get_bucket(os.environ['S3_BUCKET'])
+    basename = os.path.basename(file_path)
+
+    content_type = get_content_type(basename)
 
     key = Key(bucket, '{}/{}'.format(os.environ['S3_PREFIX'], basename))
     key.metadata = {'Content-Type': content_type}
@@ -96,6 +103,12 @@ def main():
         "Pi is roughly 3",
         py_file_path=py_file_path)
 
+    # TODO: enable R test when R is enabled in Spark (2.1)
+    #r_script_path = os.path.join(script_dir, 'jobs', 'dataframe.R')
+    #run_tests(r_script_path,
+    #    '',
+    #    "1 Justin")
+
 
 if __name__ == '__main__':
     main()