diff --git a/circle.yml b/circle.yml index 27265b7067c98..0800afbbff2fc 100644 --- a/circle.yml +++ b/circle.yml @@ -3,10 +3,24 @@ machine: version: oraclejdk8 post: - sudo apt-get --assume-yes install r-base r-base-dev - - pyenv global 2.7.11 3.4.4 #pypy-4.0.1 + - | + if [[ ! -d ${CONDA_ROOT} ]]; then + echo "Installing Miniconda..."; + wget --quiet https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh && + bash Miniconda-latest-Linux-x86_64.sh -b -p ${CONDA_ROOT}; + else + echo "Using cached Miniconda install"; + fi + - ln -s ~/miniconda $(pyenv root)/versions/miniconda3-latest + - 'pyenv versions | grep -q miniconda3-latest/envs/python2 || $CONDA_BIN create -y -n python2 python==2.7.11 numpy' + - 'pyenv versions | grep -q miniconda3-latest/envs/python3 || $CONDA_BIN create -y -n python3 python==3.4.4 numpy' + - pyenv global miniconda3-latest/envs/python2 miniconda3-latest/envs/python3 #pypy-4.0.1 + - pyenv rehash environment: TERM: dumb R_HOME: /usr/lib/R + CONDA_BIN: $HOME/miniconda/bin/conda + CONDA_ROOT: $HOME/miniconda checkout: post: @@ -17,23 +31,24 @@ checkout: - echo "host=api.bintray.com" >> .credentials dependencies: - pre: - - PYENV_VERSION=2.7.11 pip install numpy - - PYENV_VERSION=3.4.4 pip install numpy - #- PYENV_VERSION=pypy-4.0.1 pip install numpy override: - | if [[ -d build_classes ]]; then # Copy contents into current build directory - rsync -a build_classes/ . + rsync --info=stats2,misc1,flist0 -a build_classes/ . fi - ./build/mvn -DskipTests -Phadoop-palantir -Pkinesis-asl -Pkubernetes -Pmesos -Pyarn -Phive-thriftserver -Phive -Psparkr install # Copy all of */target/scala_2.11/classes to build_classes/ - > - rsync --info=progress2 -a --delete-excluded --prune-empty-dirs --exclude build_classes/ --include '**/target/scala-2.1?/***' - --include '**/target/analysis/***' --include '**/' --exclude '*' . build_classes/ + rsync --info=stats2,misc1,flist0 -a --delete-excluded --prune-empty-dirs --exclude build_classes/ --exclude 'target/streams' + --include 'target/***' --include '**/' --exclude '*' . build_classes/ + - | + # Make sbt fetch all the external deps to ~/.ivy2 so it gets cached + ./build/sbt -Phadoop-palantir -Pkinesis-asl -Pkubernetes -Pmesos -Pyarn -Phive-thriftserver -Phive -Psparkr externalDependencyClasspath cache_directories: - "build_classes" + - "build" + - "~/miniconda" general: artifacts: @@ -46,48 +61,15 @@ test: parallel: true timeout: 1800 max-runtime: 14400 - - ? | - set -euo pipefail - version=$(git describe --tags) - - publish_artifacts() { - tmp_settings="tmp-settings.xml" - echo "" > $tmp_settings - echo "bintray-palantir-release$BINTRAY_USERNAME" >> $tmp_settings - echo "$BINTRAY_PASSWORD" >> $tmp_settings - echo "" >> $tmp_settings - - ./build/mvn versions:set -DnewVersion=$version - ./build/mvn --settings $tmp_settings -DskipTests -Phadoop-palantir -Pkinesis-asl -Pkubernetes -Pmesos -Pyarn -Phive-thriftserver -Phive -Psparkr deploy - } - - make_dist() { - dist_name="$1" - build_flags="$2" - shift 2 - dist_version="${version}-${dist_name}" - file_name="spark-dist-${dist_version}.tgz" - ./dev/make-distribution.sh --name $dist_name --tgz "$@" $build_flags - curl -u $BINTRAY_USERNAME:$BINTRAY_PASSWORD -T $file_name "https://api.bintray.com/content/palantir/releases/spark/${version}/org/apache/spark/spark-dist/${dist_version}/${file_name}" - } - - case $CIRCLE_NODE_INDEX in - 0) - publish_artifacts - make_dist hadoop-2.8.0-palantir2 "-Phadoop-palantir -Pkinesis-asl -Pkubernetes -Pmesos -Pyarn -Phive-thriftserver -Phive -Psparkr" - make_dist without-hadoop "-Phadoop-provided -Pkubernetes -Pmesos -Pyarn -Psparkr" --clean - ;; - esac - : - parallel: true - timeout: 1200 deployment: release: tag: /[0-9]+(?:\.[0-9]+)+-palantir[0-9]+(?:-kubernetes[0-9]+)?/ commands: + - dev/publish.sh - curl -u $BINTRAY_USERNAME:$BINTRAY_PASSWORD -X POST https://api.bintray.com/content/palantir/releases/spark/$(git describe --tags)/publish snapshot: branch: master commands: + - dev/publish.sh - curl -u $BINTRAY_USERNAME:$BINTRAY_PASSWORD -X POST https://api.bintray.com/content/palantir/releases/spark/$(git describe --tags)/publish diff --git a/dev/publish.sh b/dev/publish.sh index 0d170d950b428..bc2262d9b2a85 100755 --- a/dev/publish.sh +++ b/dev/publish.sh @@ -1,17 +1,31 @@ #!/usr/bin/env bash + set -euo pipefail +version=$(git describe --tags) + +PALANTIR_FLAGS=(-Phadoop-palantir -Pkinesis-asl -Pkubernetes -Pmesos -Pyarn -Phive-thriftserver -Phive -Psparkr) + +publish_artifacts() { + tmp_settings="tmp-settings.xml" + echo "" > $tmp_settings + echo "bintray-palantir-release$BINTRAY_USERNAME" >> $tmp_settings + echo "$BINTRAY_PASSWORD" >> $tmp_settings + echo "" >> $tmp_settings + + ./build/mvn versions:set -DnewVersion=$version + ./build/mvn --settings $tmp_settings -DskipTests "${PALANTIR_FLAGS[@]}" deploy +} + +make_dist() { + dist_name="$1" + build_flags="$2" + shift 2 + dist_version="${version}-${dist_name}" + file_name="spark-dist-${dist_version}.tgz" + ./dev/make-distribution.sh --name $dist_name --tgz "$@" $build_flags + curl -u $BINTRAY_USERNAME:$BINTRAY_PASSWORD -T $file_name "https://api.bintray.com/content/palantir/releases/spark/${version}/org/apache/spark/spark-dist/${dist_version}/${file_name}" +} -FLAGS="-Psparkr -Phive -Phive-thriftserver -Pyarn -Pmesos" -case $CIRCLE_NODE_INDEX in -0) - ./build/sbt -Phadoop-2.7 -Pmesos -Pkinesis-asl -Pyarn -Phive-thriftserver -Phive publish - ;; -1) - ./dev/make-distribution.sh --name without-hadoop --tgz "-Psparkr -Phadoop-provided -Pyarn -Pmesos" \ - 2>&1 > binary-release-without-hadoop.log - ;; -2) - ./dev/make-distribution.sh --name hadoop2.7 --tgz "-Phadoop2.7 $FLAGS" \ - 2>&1 > binary-release-hadoop2.7.log - ;; -esac +publish_artifacts +make_dist hadoop-2.8.0-palantir2 "${PALANTIR_FLAGS[*]}" +make_dist without-hadoop "-Phadoop-provided -Pkubernetes -Pmesos -Pyarn -Psparkr" --clean diff --git a/dev/run-tests.py b/dev/run-tests.py index d754fb8371612..8ce30f54faace 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -270,7 +270,7 @@ def exec_maven(mvn_args=()): kill_zinc_on_port(zinc_port) -def exec_sbt(sbt_args=()): +def exec_sbt(sbt_args=(), exit_on_failure=True): """Will call SBT in the current directory with the list of mvn_args passed in and returns the subprocess for any further processing""" @@ -295,7 +295,9 @@ def exec_sbt(sbt_args=()): retcode = sbt_proc.wait() if retcode != 0: - exit_from_command_with_retcode(sbt_cmd, retcode) + if exit_on_failure: + exit_from_command_with_retcode(sbt_cmd, retcode) + return sbt_cmd, retcode def get_hadoop_profiles(hadoop_version): @@ -387,8 +389,14 @@ def run_scala_tests_maven(test_profiles): def run_scala_tests_sbt(test_modules, test_profiles): - - sbt_test_goals = list(itertools.chain.from_iterable(m.sbt_test_goals for m in test_modules)) + if 'CIRCLE_TEST_REPORTS' in os.environ: + # The test task in the circle configuration runs only the appropriate test for the current + # circle node, then copies the results to CIRCLE_TEST_REPORTS. + # We are not worried about running only the `test_modules`, since we always run the whole + # suite in circle anyway. + sbt_test_goals = ['circle:test'] + else: + sbt_test_goals = list(itertools.chain.from_iterable(m.sbt_test_goals for m in test_modules)) if not sbt_test_goals: return @@ -400,11 +408,6 @@ def run_scala_tests_sbt(test_modules, test_profiles): exec_sbt(profiles_and_goals) - if 'CIRCLE_TEST_REPORTS' in os.environ: - copy_tests_cmd = test_profiles + ["copyTestReportsToCircle"] - print("[info] Copying SBT test reports to Circle: ", " ".join(copy_tests_cmd)) - exec_sbt(copy_tests_cmd) - def run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags): """Function to properly execute all tests passed in as a set from the diff --git a/project/CirclePlugin.scala b/project/CirclePlugin.scala new file mode 100644 index 0000000000000..c1858c7153276 --- /dev/null +++ b/project/CirclePlugin.scala @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import scala.annotation.tailrec + +import sbt._ +import sbt.Keys._ +import sbt.plugins.JvmPlugin + +//noinspection ScalaStyle +object CirclePlugin extends AutoPlugin { + lazy val Circle = config("circle").extend(Test).hide + + case class ProjectTests(project: ProjectRef, tests: Seq[TestDefinition]) + + val circleTestsByProject = taskKey[Option[Seq[ProjectTests]]]("The tests that should be run under this circle node, if circle is set up") + val copyTestReportsToCircle: TaskKey[Boolean] = taskKey("Copy the test reports to circle. Expects CIRCLE_TEST_REPORTS to be defined") + + override def projectConfigurations: Seq[Configuration] = List(Circle) + + override def requires: Plugins = JvmPlugin + + override def trigger: PluginTrigger = allRequirements + + private[this] lazy val testsByProject = Def.task { + // Defaults.detectTests is basically the value of Keys.definedTests, but since we're + // overriding the latter depending on the value of this task, we can't depend on it + ProjectTests(thisProjectRef.value, Defaults.detectTests.value) + } + + override def globalSettings: Seq[Def.Setting[_]] = List( + circleTestsByProject := { + if (sys.env contains "CIRCLE_NODE_INDEX") { + val index = sys.env("CIRCLE_NODE_INDEX").toInt + val total = sys.env("CIRCLE_NODE_TOTAL").toInt + val byProject: Seq[ProjectTests] = testsByProject.all(ScopeFilter(inAnyProject, inConfigurations(Test))).value + + // need a stable sort of projects + val sortedProjects = byProject.sortBy(_.project.project).toList + + val totalTests = sortedProjects.iterator.map(_.tests.size).sum + val from = index * totalTests / total + val to = (index + 1) * totalTests / total + + // We allow a slice of [from, to) from all tests across all projects (in the order of sortedProjects) + // We then filter out every other + + @tailrec + def process(projectsLeft: List[ProjectTests], testsSoFar: Int, acc: List[ProjectTests]): List[ProjectTests] = { + val from1 = from - testsSoFar + val to1 = to - testsSoFar + projectsLeft match { + case ProjectTests(proj, tests) :: rest => + val out = ProjectTests(proj, tests.iterator.zipWithIndex.collect { + case (td, idx) if idx >= from1 && idx < to1 => td + }.toList) + process(rest, testsSoFar + tests.size, out :: acc) + case _ => + acc + } + } + Some(process(sortedProjects, 0, Nil)) + } else { + None + } + } + ) + + override def projectSettings: Seq[Def.Setting[_]] = inConfig(Circle)(Defaults.testSettings ++ List( + // Copy over important changes of the += kind from TestSettings.settings into the Circle config + envVars := (envVars in Test).value, + javaOptions := (javaOptions in Test).value, + testOptions := (testOptions in Test).value, + resourceGenerators := (resourceGenerators in Test).value, + // NOTE: this is because of dependencies like: + // org.apache.spark:spark-tags:2.2.0-SNAPSHOT:test->test + // That somehow don't get resolved properly in the 'circle' ivy configuration even though it extends test + // To test, copare: + // > show unsafe/test:fullClasspath + // > show unsafe/circle:fullClasspath + fullClasspath := (fullClasspath in Test).value, + + copyTestReportsToCircle := { + val log = streams.value.log + val reportsDir = target.value / "test-reports" + val circleReports = sys.env.get("CIRCLE_TEST_REPORTS") + val projectName = thisProjectRef.value.project + val `project had tests for this circle node` = definedTests.value.nonEmpty + + circleReports.map { circle => + if (!reportsDir.exists()) { + if (`project had tests for this circle node`) { + sys.error(s"Found no test reports from $projectName to circle, " + + "though there were tests defined for this node.") + } else { + // There were no tests for this node, do nothing. + false + } + } else { + IO.copyDirectory(reportsDir, file(circle) / projectName) + log.info(s"Copied test reports from $projectName to circle.") + true + } + }.getOrElse(sys.error(s"Expected CIRCLE_TEST_REPORTS to be defined.")) + }, + + definedTests := { + val testsByProject = (circleTestsByProject in Global).value + .getOrElse(sys.error("We are not running in circle.")) + val thisProj = thisProjectRef.value + + testsByProject.collectFirst { + case ProjectTests(`thisProj`, tests) => tests + }.getOrElse(sys.error(s"Didn't find any tests for $thisProj in the global circleTestsByProject. " + + s"Only projects found: ${testsByProject.map(_.project)}")) + }, + + test := (test, copyTestReportsToCircle) { (test, copy) => + test.doFinally(copy.map(_ => ())) + }.value + )) +} diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index f351f9cfb0e87..88efb147f1d1a 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -21,6 +21,7 @@ import java.nio.file.Files import scala.io.Source import scala.util.Properties import scala.collection.JavaConverters._ +import scala.collection.mutable import scala.collection.mutable.Stack import sbt._ @@ -85,10 +86,10 @@ object SparkBuild extends PomBuild { import BuildCommons._ import scala.collection.mutable.Map - val projectsMap: Map[String, Seq[Setting[_]]] = Map.empty + val projectsMap: mutable.Map[String, Seq[Setting[_]]] = mutable.Map.empty override val profiles = { - val profiles = Properties.envOrNone("SBT_MAVEN_PROFILES") match { + val profiles = Properties.propOrNone("sbt.maven.profiles") orElse Properties.envOrNone("SBT_MAVEN_PROFILES") match { case None => Seq("sbt") case Some(v) => v.split("(\\s+|,)").filterNot(_.isEmpty).map(_.trim.replaceAll("-P", "")).toSeq @@ -410,6 +411,10 @@ object SparkBuild extends PomBuild { else x.settings(Seq[Setting[_]](): _*) } ++ Seq[Project](OldDeps.project) } + + override def settings: Seq[Def.Setting[_]] = super.settings ++ inScope(Global)(List( + updateOptions := updateOptions.value.withCachedResolution(true) + )) } object Core { @@ -757,8 +762,6 @@ object CopyDependencies { object TestSettings { import BuildCommons._ - val copyTestReportsToCircle: TaskKey[Boolean] = taskKey("Copy the test reports to circle if CIRCLE_TEST_REPORTS is defined") - private val scalaBinaryVersion = if (System.getProperty("scala-2.10") == "true") { "2.10" @@ -843,15 +846,7 @@ object TestSettings { "org.apache.spark.util.collection" ).mkString(":"), "-doc-title", "Spark " + version.value.replaceAll("-SNAPSHOT", "") + " ScalaDoc" - ), - copyTestReportsToCircle := { - val reportsDir = target.value / "test-reports" - val circleReports = sys.env.get("CIRCLE_TEST_REPORTS") - circleReports.filter(_ => reportsDir.exists).exists { circle => - IO.copyDirectory(reportsDir, file(circle) / thisProjectRef.value.project) - true - } - } + ) ) }