diff --git a/circle.yml b/circle.yml
index 27265b7067c98..0800afbbff2fc 100644
--- a/circle.yml
+++ b/circle.yml
@@ -3,10 +3,24 @@ machine:
version: oraclejdk8
post:
- sudo apt-get --assume-yes install r-base r-base-dev
- - pyenv global 2.7.11 3.4.4 #pypy-4.0.1
+ - |
+ if [[ ! -d ${CONDA_ROOT} ]]; then
+ echo "Installing Miniconda...";
+ wget --quiet https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh &&
+ bash Miniconda-latest-Linux-x86_64.sh -b -p ${CONDA_ROOT};
+ else
+ echo "Using cached Miniconda install";
+ fi
+ - ln -s ~/miniconda $(pyenv root)/versions/miniconda3-latest
+ - 'pyenv versions | grep -q miniconda3-latest/envs/python2 || $CONDA_BIN create -y -n python2 python==2.7.11 numpy'
+ - 'pyenv versions | grep -q miniconda3-latest/envs/python3 || $CONDA_BIN create -y -n python3 python==3.4.4 numpy'
+ - pyenv global miniconda3-latest/envs/python2 miniconda3-latest/envs/python3 #pypy-4.0.1
+ - pyenv rehash
environment:
TERM: dumb
R_HOME: /usr/lib/R
+ CONDA_BIN: $HOME/miniconda/bin/conda
+ CONDA_ROOT: $HOME/miniconda
checkout:
post:
@@ -17,23 +31,24 @@ checkout:
- echo "host=api.bintray.com" >> .credentials
dependencies:
- pre:
- - PYENV_VERSION=2.7.11 pip install numpy
- - PYENV_VERSION=3.4.4 pip install numpy
- #- PYENV_VERSION=pypy-4.0.1 pip install numpy
override:
- |
if [[ -d build_classes ]]; then
# Copy contents into current build directory
- rsync -a build_classes/ .
+ rsync --info=stats2,misc1,flist0 -a build_classes/ .
fi
- ./build/mvn -DskipTests -Phadoop-palantir -Pkinesis-asl -Pkubernetes -Pmesos -Pyarn -Phive-thriftserver -Phive -Psparkr install
# Copy all of */target/scala_2.11/classes to build_classes/
- >
- rsync --info=progress2 -a --delete-excluded --prune-empty-dirs --exclude build_classes/ --include '**/target/scala-2.1?/***'
- --include '**/target/analysis/***' --include '**/' --exclude '*' . build_classes/
+ rsync --info=stats2,misc1,flist0 -a --delete-excluded --prune-empty-dirs --exclude build_classes/ --exclude 'target/streams'
+ --include 'target/***' --include '**/' --exclude '*' . build_classes/
+ - |
+ # Make sbt fetch all the external deps to ~/.ivy2 so it gets cached
+ ./build/sbt -Phadoop-palantir -Pkinesis-asl -Pkubernetes -Pmesos -Pyarn -Phive-thriftserver -Phive -Psparkr externalDependencyClasspath
cache_directories:
- "build_classes"
+ - "build"
+ - "~/miniconda"
general:
artifacts:
@@ -46,48 +61,15 @@ test:
parallel: true
timeout: 1800
max-runtime: 14400
- - ? |
- set -euo pipefail
- version=$(git describe --tags)
-
- publish_artifacts() {
- tmp_settings="tmp-settings.xml"
- echo "" > $tmp_settings
- echo "bintray-palantir-release$BINTRAY_USERNAME" >> $tmp_settings
- echo "$BINTRAY_PASSWORD" >> $tmp_settings
- echo "" >> $tmp_settings
-
- ./build/mvn versions:set -DnewVersion=$version
- ./build/mvn --settings $tmp_settings -DskipTests -Phadoop-palantir -Pkinesis-asl -Pkubernetes -Pmesos -Pyarn -Phive-thriftserver -Phive -Psparkr deploy
- }
-
- make_dist() {
- dist_name="$1"
- build_flags="$2"
- shift 2
- dist_version="${version}-${dist_name}"
- file_name="spark-dist-${dist_version}.tgz"
- ./dev/make-distribution.sh --name $dist_name --tgz "$@" $build_flags
- curl -u $BINTRAY_USERNAME:$BINTRAY_PASSWORD -T $file_name "https://api.bintray.com/content/palantir/releases/spark/${version}/org/apache/spark/spark-dist/${dist_version}/${file_name}"
- }
-
- case $CIRCLE_NODE_INDEX in
- 0)
- publish_artifacts
- make_dist hadoop-2.8.0-palantir2 "-Phadoop-palantir -Pkinesis-asl -Pkubernetes -Pmesos -Pyarn -Phive-thriftserver -Phive -Psparkr"
- make_dist without-hadoop "-Phadoop-provided -Pkubernetes -Pmesos -Pyarn -Psparkr" --clean
- ;;
- esac
- :
- parallel: true
- timeout: 1200
deployment:
release:
tag: /[0-9]+(?:\.[0-9]+)+-palantir[0-9]+(?:-kubernetes[0-9]+)?/
commands:
+ - dev/publish.sh
- curl -u $BINTRAY_USERNAME:$BINTRAY_PASSWORD -X POST https://api.bintray.com/content/palantir/releases/spark/$(git describe --tags)/publish
snapshot:
branch: master
commands:
+ - dev/publish.sh
- curl -u $BINTRAY_USERNAME:$BINTRAY_PASSWORD -X POST https://api.bintray.com/content/palantir/releases/spark/$(git describe --tags)/publish
diff --git a/dev/publish.sh b/dev/publish.sh
index 0d170d950b428..bc2262d9b2a85 100755
--- a/dev/publish.sh
+++ b/dev/publish.sh
@@ -1,17 +1,31 @@
#!/usr/bin/env bash
+
set -euo pipefail
+version=$(git describe --tags)
+
+PALANTIR_FLAGS=(-Phadoop-palantir -Pkinesis-asl -Pkubernetes -Pmesos -Pyarn -Phive-thriftserver -Phive -Psparkr)
+
+publish_artifacts() {
+ tmp_settings="tmp-settings.xml"
+ echo "" > $tmp_settings
+ echo "bintray-palantir-release$BINTRAY_USERNAME" >> $tmp_settings
+ echo "$BINTRAY_PASSWORD" >> $tmp_settings
+ echo "" >> $tmp_settings
+
+ ./build/mvn versions:set -DnewVersion=$version
+ ./build/mvn --settings $tmp_settings -DskipTests "${PALANTIR_FLAGS[@]}" deploy
+}
+
+make_dist() {
+ dist_name="$1"
+ build_flags="$2"
+ shift 2
+ dist_version="${version}-${dist_name}"
+ file_name="spark-dist-${dist_version}.tgz"
+ ./dev/make-distribution.sh --name $dist_name --tgz "$@" $build_flags
+ curl -u $BINTRAY_USERNAME:$BINTRAY_PASSWORD -T $file_name "https://api.bintray.com/content/palantir/releases/spark/${version}/org/apache/spark/spark-dist/${dist_version}/${file_name}"
+}
-FLAGS="-Psparkr -Phive -Phive-thriftserver -Pyarn -Pmesos"
-case $CIRCLE_NODE_INDEX in
-0)
- ./build/sbt -Phadoop-2.7 -Pmesos -Pkinesis-asl -Pyarn -Phive-thriftserver -Phive publish
- ;;
-1)
- ./dev/make-distribution.sh --name without-hadoop --tgz "-Psparkr -Phadoop-provided -Pyarn -Pmesos" \
- 2>&1 > binary-release-without-hadoop.log
- ;;
-2)
- ./dev/make-distribution.sh --name hadoop2.7 --tgz "-Phadoop2.7 $FLAGS" \
- 2>&1 > binary-release-hadoop2.7.log
- ;;
-esac
+publish_artifacts
+make_dist hadoop-2.8.0-palantir2 "${PALANTIR_FLAGS[*]}"
+make_dist without-hadoop "-Phadoop-provided -Pkubernetes -Pmesos -Pyarn -Psparkr" --clean
diff --git a/dev/run-tests.py b/dev/run-tests.py
index d754fb8371612..8ce30f54faace 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -270,7 +270,7 @@ def exec_maven(mvn_args=()):
kill_zinc_on_port(zinc_port)
-def exec_sbt(sbt_args=()):
+def exec_sbt(sbt_args=(), exit_on_failure=True):
"""Will call SBT in the current directory with the list of mvn_args passed
in and returns the subprocess for any further processing"""
@@ -295,7 +295,9 @@ def exec_sbt(sbt_args=()):
retcode = sbt_proc.wait()
if retcode != 0:
- exit_from_command_with_retcode(sbt_cmd, retcode)
+ if exit_on_failure:
+ exit_from_command_with_retcode(sbt_cmd, retcode)
+ return sbt_cmd, retcode
def get_hadoop_profiles(hadoop_version):
@@ -387,8 +389,14 @@ def run_scala_tests_maven(test_profiles):
def run_scala_tests_sbt(test_modules, test_profiles):
-
- sbt_test_goals = list(itertools.chain.from_iterable(m.sbt_test_goals for m in test_modules))
+ if 'CIRCLE_TEST_REPORTS' in os.environ:
+ # The test task in the circle configuration runs only the appropriate test for the current
+ # circle node, then copies the results to CIRCLE_TEST_REPORTS.
+ # We are not worried about running only the `test_modules`, since we always run the whole
+ # suite in circle anyway.
+ sbt_test_goals = ['circle:test']
+ else:
+ sbt_test_goals = list(itertools.chain.from_iterable(m.sbt_test_goals for m in test_modules))
if not sbt_test_goals:
return
@@ -400,11 +408,6 @@ def run_scala_tests_sbt(test_modules, test_profiles):
exec_sbt(profiles_and_goals)
- if 'CIRCLE_TEST_REPORTS' in os.environ:
- copy_tests_cmd = test_profiles + ["copyTestReportsToCircle"]
- print("[info] Copying SBT test reports to Circle: ", " ".join(copy_tests_cmd))
- exec_sbt(copy_tests_cmd)
-
def run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags):
"""Function to properly execute all tests passed in as a set from the
diff --git a/project/CirclePlugin.scala b/project/CirclePlugin.scala
new file mode 100644
index 0000000000000..c1858c7153276
--- /dev/null
+++ b/project/CirclePlugin.scala
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import scala.annotation.tailrec
+
+import sbt._
+import sbt.Keys._
+import sbt.plugins.JvmPlugin
+
+//noinspection ScalaStyle
+object CirclePlugin extends AutoPlugin {
+ lazy val Circle = config("circle").extend(Test).hide
+
+ case class ProjectTests(project: ProjectRef, tests: Seq[TestDefinition])
+
+ val circleTestsByProject = taskKey[Option[Seq[ProjectTests]]]("The tests that should be run under this circle node, if circle is set up")
+ val copyTestReportsToCircle: TaskKey[Boolean] = taskKey("Copy the test reports to circle. Expects CIRCLE_TEST_REPORTS to be defined")
+
+ override def projectConfigurations: Seq[Configuration] = List(Circle)
+
+ override def requires: Plugins = JvmPlugin
+
+ override def trigger: PluginTrigger = allRequirements
+
+ private[this] lazy val testsByProject = Def.task {
+ // Defaults.detectTests is basically the value of Keys.definedTests, but since we're
+ // overriding the latter depending on the value of this task, we can't depend on it
+ ProjectTests(thisProjectRef.value, Defaults.detectTests.value)
+ }
+
+ override def globalSettings: Seq[Def.Setting[_]] = List(
+ circleTestsByProject := {
+ if (sys.env contains "CIRCLE_NODE_INDEX") {
+ val index = sys.env("CIRCLE_NODE_INDEX").toInt
+ val total = sys.env("CIRCLE_NODE_TOTAL").toInt
+ val byProject: Seq[ProjectTests] = testsByProject.all(ScopeFilter(inAnyProject, inConfigurations(Test))).value
+
+ // need a stable sort of projects
+ val sortedProjects = byProject.sortBy(_.project.project).toList
+
+ val totalTests = sortedProjects.iterator.map(_.tests.size).sum
+ val from = index * totalTests / total
+ val to = (index + 1) * totalTests / total
+
+ // We allow a slice of [from, to) from all tests across all projects (in the order of sortedProjects)
+ // We then filter out every other
+
+ @tailrec
+ def process(projectsLeft: List[ProjectTests], testsSoFar: Int, acc: List[ProjectTests]): List[ProjectTests] = {
+ val from1 = from - testsSoFar
+ val to1 = to - testsSoFar
+ projectsLeft match {
+ case ProjectTests(proj, tests) :: rest =>
+ val out = ProjectTests(proj, tests.iterator.zipWithIndex.collect {
+ case (td, idx) if idx >= from1 && idx < to1 => td
+ }.toList)
+ process(rest, testsSoFar + tests.size, out :: acc)
+ case _ =>
+ acc
+ }
+ }
+ Some(process(sortedProjects, 0, Nil))
+ } else {
+ None
+ }
+ }
+ )
+
+ override def projectSettings: Seq[Def.Setting[_]] = inConfig(Circle)(Defaults.testSettings ++ List(
+ // Copy over important changes of the += kind from TestSettings.settings into the Circle config
+ envVars := (envVars in Test).value,
+ javaOptions := (javaOptions in Test).value,
+ testOptions := (testOptions in Test).value,
+ resourceGenerators := (resourceGenerators in Test).value,
+ // NOTE: this is because of dependencies like:
+ // org.apache.spark:spark-tags:2.2.0-SNAPSHOT:test->test
+ // That somehow don't get resolved properly in the 'circle' ivy configuration even though it extends test
+ // To test, copare:
+ // > show unsafe/test:fullClasspath
+ // > show unsafe/circle:fullClasspath
+ fullClasspath := (fullClasspath in Test).value,
+
+ copyTestReportsToCircle := {
+ val log = streams.value.log
+ val reportsDir = target.value / "test-reports"
+ val circleReports = sys.env.get("CIRCLE_TEST_REPORTS")
+ val projectName = thisProjectRef.value.project
+ val `project had tests for this circle node` = definedTests.value.nonEmpty
+
+ circleReports.map { circle =>
+ if (!reportsDir.exists()) {
+ if (`project had tests for this circle node`) {
+ sys.error(s"Found no test reports from $projectName to circle, " +
+ "though there were tests defined for this node.")
+ } else {
+ // There were no tests for this node, do nothing.
+ false
+ }
+ } else {
+ IO.copyDirectory(reportsDir, file(circle) / projectName)
+ log.info(s"Copied test reports from $projectName to circle.")
+ true
+ }
+ }.getOrElse(sys.error(s"Expected CIRCLE_TEST_REPORTS to be defined."))
+ },
+
+ definedTests := {
+ val testsByProject = (circleTestsByProject in Global).value
+ .getOrElse(sys.error("We are not running in circle."))
+ val thisProj = thisProjectRef.value
+
+ testsByProject.collectFirst {
+ case ProjectTests(`thisProj`, tests) => tests
+ }.getOrElse(sys.error(s"Didn't find any tests for $thisProj in the global circleTestsByProject. " +
+ s"Only projects found: ${testsByProject.map(_.project)}"))
+ },
+
+ test := (test, copyTestReportsToCircle) { (test, copy) =>
+ test.doFinally(copy.map(_ => ()))
+ }.value
+ ))
+}
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index f351f9cfb0e87..88efb147f1d1a 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -21,6 +21,7 @@ import java.nio.file.Files
import scala.io.Source
import scala.util.Properties
import scala.collection.JavaConverters._
+import scala.collection.mutable
import scala.collection.mutable.Stack
import sbt._
@@ -85,10 +86,10 @@ object SparkBuild extends PomBuild {
import BuildCommons._
import scala.collection.mutable.Map
- val projectsMap: Map[String, Seq[Setting[_]]] = Map.empty
+ val projectsMap: mutable.Map[String, Seq[Setting[_]]] = mutable.Map.empty
override val profiles = {
- val profiles = Properties.envOrNone("SBT_MAVEN_PROFILES") match {
+ val profiles = Properties.propOrNone("sbt.maven.profiles") orElse Properties.envOrNone("SBT_MAVEN_PROFILES") match {
case None => Seq("sbt")
case Some(v) =>
v.split("(\\s+|,)").filterNot(_.isEmpty).map(_.trim.replaceAll("-P", "")).toSeq
@@ -410,6 +411,10 @@ object SparkBuild extends PomBuild {
else x.settings(Seq[Setting[_]](): _*)
} ++ Seq[Project](OldDeps.project)
}
+
+ override def settings: Seq[Def.Setting[_]] = super.settings ++ inScope(Global)(List(
+ updateOptions := updateOptions.value.withCachedResolution(true)
+ ))
}
object Core {
@@ -757,8 +762,6 @@ object CopyDependencies {
object TestSettings {
import BuildCommons._
- val copyTestReportsToCircle: TaskKey[Boolean] = taskKey("Copy the test reports to circle if CIRCLE_TEST_REPORTS is defined")
-
private val scalaBinaryVersion =
if (System.getProperty("scala-2.10") == "true") {
"2.10"
@@ -843,15 +846,7 @@ object TestSettings {
"org.apache.spark.util.collection"
).mkString(":"),
"-doc-title", "Spark " + version.value.replaceAll("-SNAPSHOT", "") + " ScalaDoc"
- ),
- copyTestReportsToCircle := {
- val reportsDir = target.value / "test-reports"
- val circleReports = sys.env.get("CIRCLE_TEST_REPORTS")
- circleReports.filter(_ => reportsDir.exists).exists { circle =>
- IO.copyDirectory(reportsDir, file(circle) / thisProjectRef.value.project)
- true
- }
- }
+ )
)
}