Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
0d232ad
only one container
Mar 7, 2017
effd212
Only clean in the case of container 2, because the flags have changed
dansanduleac Mar 9, 2017
a2c080d
Cache all built classes after the install, restore cache before calli…
dansanduleac Mar 9, 2017
baf7d82
Cache the target/analysis dirs as well
dansanduleac Mar 10, 2017
62922a2
Fix invocation of container 2
dansanduleac Mar 10, 2017
f32081f
Just do rsync -v
dansanduleac Mar 10, 2017
c23dba4
Make sbt fetch all the external deps to ~/.ivy2 so it gets cached
dansanduleac Mar 10, 2017
306f60b
Actually do this instead
dansanduleac Mar 10, 2017
b45e683
More rsync stats as to what was copied
dansanduleac Mar 10, 2017
f031d39
Include target/maven-status in the cache
dansanduleac Mar 11, 2017
61bb45f
Improve target/ caching I think
dansanduleac Mar 13, 2017
1b25ab3
Merge remote-tracking branch 'origin/rk/only-one-container' into ds/b…
dansanduleac Mar 14, 2017
601c7ac
Use circle parallelism to split SBT test goals by goals
dansanduleac Mar 14, 2017
1557a78
Fix run-tests.py prematurely exiting before copying the reports to ci…
dansanduleac Mar 11, 2017
0f7cd19
Download miniconda through pyenv, and create envs for the tests rathe…
dansanduleac Mar 12, 2017
e7d716f
Revert "Use circle parallelism to split SBT test goals by goals"
dansanduleac Mar 14, 2017
f45fd21
Don't do the publish until the tests have actually run
dansanduleac Mar 14, 2017
1430462
Move publishing to deployment which just calls dev/publish
dansanduleac Mar 14, 2017
3294221
Implement test-level parallelism across the entire set of tests run f…
dansanduleac Mar 14, 2017
1fbc191
Use SBT cached resolution for faster update, hopefully
dansanduleac Mar 14, 2017
2e4c6c5
Cache compilation results too in the dependencies stage
dansanduleac Mar 14, 2017
cb2cee7
fix style, add apache copyright
dansanduleac Mar 14, 2017
ad05d01
Set sbt's target to something else, so it doesn't clash with maven's …
dansanduleac Mar 14, 2017
da42682
Stop grep -v-ing for info
dansanduleac Mar 15, 2017
8167a57
Exclude target_sbt/ from stuff too
dansanduleac Mar 15, 2017
1bf9ec1
exclude target_sbt from rat tests
dansanduleac Mar 15, 2017
4b7ba2b
Revert "Stop grep -v-ing for info"
dansanduleac Mar 15, 2017
ec936ff
oops that has to be a list!
dansanduleac Mar 15, 2017
df47910
Fix cached resolution, it must be applied to the root project too whi…
dansanduleac Mar 15, 2017
015cbe4
Revert "Set sbt's target to something else, so it doesn't clash with …
dansanduleac Mar 15, 2017
64e29bf
Revert to caching just after mvn install, then only get deps with SBT
dansanduleac Mar 15, 2017
35e801f
Make it an error if circle test reports were expected but not found
dansanduleac Mar 15, 2017
82f4fc3
Fix always copying circle test reports
dansanduleac Mar 15, 2017
42a645f
Clean up the scopes
dansanduleac Mar 16, 2017
5c588c2
Make sure to copy over some settings that are only getting defined in…
dansanduleac Mar 16, 2017
28bd519
Fix classpath in the 'circle' configuration not being quite the one i…
dansanduleac Mar 16, 2017
965b7d5
Attempt to cache miniconda again, else we get throttled on many conta…
dansanduleac Mar 16, 2017
048e4d5
Merge branch 'master' into ds/build-perf
robert3005 Mar 16, 2017
d429cb0
Should actually cache ~/minconda, and cache build/ too while you're a…
dansanduleac Mar 16, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 25 additions & 43 deletions circle.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,24 @@ machine:
version: oraclejdk8
post:
- sudo apt-get --assume-yes install r-base r-base-dev
- pyenv global 2.7.11 3.4.4 #pypy-4.0.1
- |
if [[ ! -d ${CONDA_ROOT} ]]; then
echo "Installing Miniconda...";
wget --quiet https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh &&
bash Miniconda-latest-Linux-x86_64.sh -b -p ${CONDA_ROOT};
else
echo "Using cached Miniconda install";
fi
- ln -s ~/miniconda $(pyenv root)/versions/miniconda3-latest
- 'pyenv versions | grep -q miniconda3-latest/envs/python2 || $CONDA_BIN create -y -n python2 python==2.7.11 numpy'
- 'pyenv versions | grep -q miniconda3-latest/envs/python3 || $CONDA_BIN create -y -n python3 python==3.4.4 numpy'
- pyenv global miniconda3-latest/envs/python2 miniconda3-latest/envs/python3 #pypy-4.0.1
- pyenv rehash
environment:
TERM: dumb
R_HOME: /usr/lib/R
CONDA_BIN: $HOME/miniconda/bin/conda
CONDA_ROOT: $HOME/miniconda

checkout:
post:
Expand All @@ -17,23 +31,24 @@ checkout:
- echo "host=api.bintray.com" >> .credentials

dependencies:
pre:
- PYENV_VERSION=2.7.11 pip install numpy
- PYENV_VERSION=3.4.4 pip install numpy
#- PYENV_VERSION=pypy-4.0.1 pip install numpy
override:
- |
if [[ -d build_classes ]]; then
# Copy contents into current build directory
rsync -a build_classes/ .
rsync --info=stats2,misc1,flist0 -a build_classes/ .
fi
- ./build/mvn -DskipTests -Phadoop-palantir -Pkinesis-asl -Pkubernetes -Pmesos -Pyarn -Phive-thriftserver -Phive -Psparkr install
# Copy all of */target/scala_2.11/classes to build_classes/
- >
rsync --info=progress2 -a --delete-excluded --prune-empty-dirs --exclude build_classes/ --include '**/target/scala-2.1?/***'
--include '**/target/analysis/***' --include '**/' --exclude '*' . build_classes/
rsync --info=stats2,misc1,flist0 -a --delete-excluded --prune-empty-dirs --exclude build_classes/ --exclude 'target/streams'
--include 'target/***' --include '**/' --exclude '*' . build_classes/
- |
# Make sbt fetch all the external deps to ~/.ivy2 so it gets cached
./build/sbt -Phadoop-palantir -Pkinesis-asl -Pkubernetes -Pmesos -Pyarn -Phive-thriftserver -Phive -Psparkr externalDependencyClasspath
cache_directories:
- "build_classes"
- "build"
- "~/miniconda"

general:
artifacts:
Expand All @@ -46,48 +61,15 @@ test:
parallel: true
timeout: 1800
max-runtime: 14400
- ? |
set -euo pipefail
version=$(git describe --tags)

publish_artifacts() {
tmp_settings="tmp-settings.xml"
echo "<settings><servers><server>" > $tmp_settings
echo "<id>bintray-palantir-release</id><username>$BINTRAY_USERNAME</username>" >> $tmp_settings
echo "<password>$BINTRAY_PASSWORD</password>" >> $tmp_settings
echo "</server></servers></settings>" >> $tmp_settings

./build/mvn versions:set -DnewVersion=$version
./build/mvn --settings $tmp_settings -DskipTests -Phadoop-palantir -Pkinesis-asl -Pkubernetes -Pmesos -Pyarn -Phive-thriftserver -Phive -Psparkr deploy
}

make_dist() {
dist_name="$1"
build_flags="$2"
shift 2
dist_version="${version}-${dist_name}"
file_name="spark-dist-${dist_version}.tgz"
./dev/make-distribution.sh --name $dist_name --tgz "$@" $build_flags
curl -u $BINTRAY_USERNAME:$BINTRAY_PASSWORD -T $file_name "https://api.bintray.com/content/palantir/releases/spark/${version}/org/apache/spark/spark-dist/${dist_version}/${file_name}"
}

case $CIRCLE_NODE_INDEX in
0)
publish_artifacts
make_dist hadoop-2.8.0-palantir2 "-Phadoop-palantir -Pkinesis-asl -Pkubernetes -Pmesos -Pyarn -Phive-thriftserver -Phive -Psparkr"
make_dist without-hadoop "-Phadoop-provided -Pkubernetes -Pmesos -Pyarn -Psparkr" --clean
;;
esac
:
parallel: true
timeout: 1200

deployment:
release:
tag: /[0-9]+(?:\.[0-9]+)+-palantir[0-9]+(?:-kubernetes[0-9]+)?/
commands:
- dev/publish.sh
- curl -u $BINTRAY_USERNAME:$BINTRAY_PASSWORD -X POST https://api.bintray.com/content/palantir/releases/spark/$(git describe --tags)/publish
snapshot:
branch: master
commands:
- dev/publish.sh
- curl -u $BINTRAY_USERNAME:$BINTRAY_PASSWORD -X POST https://api.bintray.com/content/palantir/releases/spark/$(git describe --tags)/publish
42 changes: 28 additions & 14 deletions dev/publish.sh
Original file line number Diff line number Diff line change
@@ -1,17 +1,31 @@
#!/usr/bin/env bash

set -euo pipefail
version=$(git describe --tags)

PALANTIR_FLAGS=(-Phadoop-palantir -Pkinesis-asl -Pkubernetes -Pmesos -Pyarn -Phive-thriftserver -Phive -Psparkr)

publish_artifacts() {
tmp_settings="tmp-settings.xml"
echo "<settings><servers><server>" > $tmp_settings
echo "<id>bintray-palantir-release</id><username>$BINTRAY_USERNAME</username>" >> $tmp_settings
echo "<password>$BINTRAY_PASSWORD</password>" >> $tmp_settings
echo "</server></servers></settings>" >> $tmp_settings

./build/mvn versions:set -DnewVersion=$version
./build/mvn --settings $tmp_settings -DskipTests "${PALANTIR_FLAGS[@]}" deploy
}

make_dist() {
dist_name="$1"
build_flags="$2"
shift 2
dist_version="${version}-${dist_name}"
file_name="spark-dist-${dist_version}.tgz"
./dev/make-distribution.sh --name $dist_name --tgz "$@" $build_flags
curl -u $BINTRAY_USERNAME:$BINTRAY_PASSWORD -T $file_name "https://api.bintray.com/content/palantir/releases/spark/${version}/org/apache/spark/spark-dist/${dist_version}/${file_name}"
}

FLAGS="-Psparkr -Phive -Phive-thriftserver -Pyarn -Pmesos"
case $CIRCLE_NODE_INDEX in
0)
./build/sbt -Phadoop-2.7 -Pmesos -Pkinesis-asl -Pyarn -Phive-thriftserver -Phive publish
;;
1)
./dev/make-distribution.sh --name without-hadoop --tgz "-Psparkr -Phadoop-provided -Pyarn -Pmesos" \
2>&1 > binary-release-without-hadoop.log
;;
2)
./dev/make-distribution.sh --name hadoop2.7 --tgz "-Phadoop2.7 $FLAGS" \
2>&1 > binary-release-hadoop2.7.log
;;
esac
publish_artifacts
make_dist hadoop-2.8.0-palantir2 "${PALANTIR_FLAGS[*]}"
make_dist without-hadoop "-Phadoop-provided -Pkubernetes -Pmesos -Pyarn -Psparkr" --clean
21 changes: 12 additions & 9 deletions dev/run-tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ def exec_maven(mvn_args=()):
kill_zinc_on_port(zinc_port)


def exec_sbt(sbt_args=()):
def exec_sbt(sbt_args=(), exit_on_failure=True):
"""Will call SBT in the current directory with the list of mvn_args passed
in and returns the subprocess for any further processing"""

Expand All @@ -295,7 +295,9 @@ def exec_sbt(sbt_args=()):
retcode = sbt_proc.wait()

if retcode != 0:
exit_from_command_with_retcode(sbt_cmd, retcode)
if exit_on_failure:
exit_from_command_with_retcode(sbt_cmd, retcode)
return sbt_cmd, retcode


def get_hadoop_profiles(hadoop_version):
Expand Down Expand Up @@ -387,8 +389,14 @@ def run_scala_tests_maven(test_profiles):


def run_scala_tests_sbt(test_modules, test_profiles):

sbt_test_goals = list(itertools.chain.from_iterable(m.sbt_test_goals for m in test_modules))
if 'CIRCLE_TEST_REPORTS' in os.environ:
# The test task in the circle configuration runs only the appropriate test for the current
# circle node, then copies the results to CIRCLE_TEST_REPORTS.
# We are not worried about running only the `test_modules`, since we always run the whole
# suite in circle anyway.
sbt_test_goals = ['circle:test']
else:
sbt_test_goals = list(itertools.chain.from_iterable(m.sbt_test_goals for m in test_modules))

if not sbt_test_goals:
return
Expand All @@ -400,11 +408,6 @@ def run_scala_tests_sbt(test_modules, test_profiles):

exec_sbt(profiles_and_goals)

if 'CIRCLE_TEST_REPORTS' in os.environ:
copy_tests_cmd = test_profiles + ["copyTestReportsToCircle"]
print("[info] Copying SBT test reports to Circle: ", " ".join(copy_tests_cmd))
exec_sbt(copy_tests_cmd)


def run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags):
"""Function to properly execute all tests passed in as a set from the
Expand Down
136 changes: 136 additions & 0 deletions project/CirclePlugin.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import scala.annotation.tailrec

import sbt._
import sbt.Keys._
import sbt.plugins.JvmPlugin

//noinspection ScalaStyle
object CirclePlugin extends AutoPlugin {
lazy val Circle = config("circle").extend(Test).hide

case class ProjectTests(project: ProjectRef, tests: Seq[TestDefinition])

val circleTestsByProject = taskKey[Option[Seq[ProjectTests]]]("The tests that should be run under this circle node, if circle is set up")
val copyTestReportsToCircle: TaskKey[Boolean] = taskKey("Copy the test reports to circle. Expects CIRCLE_TEST_REPORTS to be defined")

override def projectConfigurations: Seq[Configuration] = List(Circle)

override def requires: Plugins = JvmPlugin

override def trigger: PluginTrigger = allRequirements

private[this] lazy val testsByProject = Def.task {
// Defaults.detectTests is basically the value of Keys.definedTests, but since we're
// overriding the latter depending on the value of this task, we can't depend on it
ProjectTests(thisProjectRef.value, Defaults.detectTests.value)
}

override def globalSettings: Seq[Def.Setting[_]] = List(
circleTestsByProject := {
if (sys.env contains "CIRCLE_NODE_INDEX") {
val index = sys.env("CIRCLE_NODE_INDEX").toInt
val total = sys.env("CIRCLE_NODE_TOTAL").toInt
val byProject: Seq[ProjectTests] = testsByProject.all(ScopeFilter(inAnyProject, inConfigurations(Test))).value

// need a stable sort of projects
val sortedProjects = byProject.sortBy(_.project.project).toList

val totalTests = sortedProjects.iterator.map(_.tests.size).sum
val from = index * totalTests / total
val to = (index + 1) * totalTests / total

// We allow a slice of [from, to) from all tests across all projects (in the order of sortedProjects)
// We then filter out every other

@tailrec
def process(projectsLeft: List[ProjectTests], testsSoFar: Int, acc: List[ProjectTests]): List[ProjectTests] = {
val from1 = from - testsSoFar
val to1 = to - testsSoFar
projectsLeft match {
case ProjectTests(proj, tests) :: rest =>
val out = ProjectTests(proj, tests.iterator.zipWithIndex.collect {
case (td, idx) if idx >= from1 && idx < to1 => td
}.toList)
process(rest, testsSoFar + tests.size, out :: acc)
case _ =>
acc
}
}
Some(process(sortedProjects, 0, Nil))
} else {
None
}
}
)

override def projectSettings: Seq[Def.Setting[_]] = inConfig(Circle)(Defaults.testSettings ++ List(
// Copy over important changes of the += kind from TestSettings.settings into the Circle config
envVars := (envVars in Test).value,
javaOptions := (javaOptions in Test).value,
testOptions := (testOptions in Test).value,
resourceGenerators := (resourceGenerators in Test).value,
// NOTE: this is because of dependencies like:
// org.apache.spark:spark-tags:2.2.0-SNAPSHOT:test->test
// That somehow don't get resolved properly in the 'circle' ivy configuration even though it extends test
// To test, copare:
// > show unsafe/test:fullClasspath
// > show unsafe/circle:fullClasspath
fullClasspath := (fullClasspath in Test).value,

copyTestReportsToCircle := {
val log = streams.value.log
val reportsDir = target.value / "test-reports"
val circleReports = sys.env.get("CIRCLE_TEST_REPORTS")
val projectName = thisProjectRef.value.project
val `project had tests for this circle node` = definedTests.value.nonEmpty

circleReports.map { circle =>
if (!reportsDir.exists()) {
if (`project had tests for this circle node`) {
sys.error(s"Found no test reports from $projectName to circle, " +
"though there were tests defined for this node.")
} else {
// There were no tests for this node, do nothing.
false
}
} else {
IO.copyDirectory(reportsDir, file(circle) / projectName)
log.info(s"Copied test reports from $projectName to circle.")
true
}
}.getOrElse(sys.error(s"Expected CIRCLE_TEST_REPORTS to be defined."))
},

definedTests := {
val testsByProject = (circleTestsByProject in Global).value
.getOrElse(sys.error("We are not running in circle."))
val thisProj = thisProjectRef.value

testsByProject.collectFirst {
case ProjectTests(`thisProj`, tests) => tests
}.getOrElse(sys.error(s"Didn't find any tests for $thisProj in the global circleTestsByProject. " +
s"Only projects found: ${testsByProject.map(_.project)}"))
},

test := (test, copyTestReportsToCircle) { (test, copy) =>
test.doFinally(copy.map(_ => ()))
}.value
))
}
21 changes: 8 additions & 13 deletions project/SparkBuild.scala
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import java.nio.file.Files
import scala.io.Source
import scala.util.Properties
import scala.collection.JavaConverters._
import scala.collection.mutable
import scala.collection.mutable.Stack

import sbt._
Expand Down Expand Up @@ -85,10 +86,10 @@ object SparkBuild extends PomBuild {
import BuildCommons._
import scala.collection.mutable.Map

val projectsMap: Map[String, Seq[Setting[_]]] = Map.empty
val projectsMap: mutable.Map[String, Seq[Setting[_]]] = mutable.Map.empty

override val profiles = {
val profiles = Properties.envOrNone("SBT_MAVEN_PROFILES") match {
val profiles = Properties.propOrNone("sbt.maven.profiles") orElse Properties.envOrNone("SBT_MAVEN_PROFILES") match {
case None => Seq("sbt")
case Some(v) =>
v.split("(\\s+|,)").filterNot(_.isEmpty).map(_.trim.replaceAll("-P", "")).toSeq
Expand Down Expand Up @@ -410,6 +411,10 @@ object SparkBuild extends PomBuild {
else x.settings(Seq[Setting[_]](): _*)
} ++ Seq[Project](OldDeps.project)
}

override def settings: Seq[Def.Setting[_]] = super.settings ++ inScope(Global)(List(
updateOptions := updateOptions.value.withCachedResolution(true)
))
}

object Core {
Expand Down Expand Up @@ -757,8 +762,6 @@ object CopyDependencies {
object TestSettings {
import BuildCommons._

val copyTestReportsToCircle: TaskKey[Boolean] = taskKey("Copy the test reports to circle if CIRCLE_TEST_REPORTS is defined")

private val scalaBinaryVersion =
if (System.getProperty("scala-2.10") == "true") {
"2.10"
Expand Down Expand Up @@ -843,15 +846,7 @@ object TestSettings {
"org.apache.spark.util.collection"
).mkString(":"),
"-doc-title", "Spark " + version.value.replaceAll("-SNAPSHOT", "") + " ScalaDoc"
),
copyTestReportsToCircle := {
val reportsDir = target.value / "test-reports"
val circleReports = sys.env.get("CIRCLE_TEST_REPORTS")
circleReports.filter(_ => reportsDir.exists).exists { circle =>
IO.copyDirectory(reportsDir, file(circle) / thisProjectRef.value.project)
true
}
}
)
)

}