Skip to content

Commit 16e1db2

Browse files
committed
Merge remote-tracking branch 'oss/master' into SPARK-33399-outputpartitioning
2 parents c66874a + d4cf148 commit 16e1db2

File tree

144 files changed

+3014
-1395
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

144 files changed

+3014
-1395
lines changed

.github/workflows/build_and_test.yml

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,28 @@ on:
1414
required: true
1515

1616
jobs:
17+
# This is on the top to give the most visibility in case of failures
18+
hadoop-2:
19+
name: Hadoop 2 build
20+
runs-on: ubuntu-20.04
21+
steps:
22+
- name: Checkout Spark repository
23+
uses: actions/checkout@v2
24+
- name: Cache Coursier local repository
25+
uses: actions/cache@v2
26+
with:
27+
path: ~/.cache/coursier
28+
key: hadoop-2-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
29+
restore-keys: |
30+
hadoop-2-coursier-
31+
- name: Install Java 8
32+
uses: actions/setup-java@v1
33+
with:
34+
java-version: 1.8
35+
- name: Build with SBT
36+
run: |
37+
./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Phadoop-2.7 compile test:compile
38+
1739
# Build: build Spark and run the tests for specified modules.
1840
build:
1941
name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})"
@@ -139,8 +161,8 @@ jobs:
139161
# Run the tests.
140162
- name: Run tests
141163
run: |
142-
# Hive tests become flaky when running in parallel as it's too intensive.
143-
if [[ "$MODULES_TO_TEST" == "hive" ]]; then export SERIAL_SBT_TESTS=1; fi
164+
# Hive and SQL tests become flaky when running in parallel as it's too intensive.
165+
if [[ "$MODULES_TO_TEST" == "hive" ]] || [[ "$MODULES_TO_TEST" == "sql" ]]; then export SERIAL_SBT_TESTS=1; fi
144166
mkdir -p ~/.m2
145167
./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS"
146168
rm -rf ~/.m2/repository/org/apache/spark

common/network-yarn/src/test/java/org/apache/spark/network/yarn/YarnShuffleServiceSuite.java

Lines changed: 0 additions & 61 deletions
This file was deleted.

core/src/main/scala/org/apache/spark/resource/ResourceProfile.scala

Lines changed: 123 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import org.apache.spark.annotation.{Evolving, Since}
2929
import org.apache.spark.internal.Logging
3030
import org.apache.spark.internal.config._
3131
import org.apache.spark.internal.config.Python.PYSPARK_EXECUTOR_MEMORY
32+
import org.apache.spark.util.Utils
3233

3334
/**
3435
* Resource profile to associate with an RDD. A ResourceProfile allows the user to
@@ -256,13 +257,16 @@ object ResourceProfile extends Logging {
256257
val UNKNOWN_RESOURCE_PROFILE_ID = -1
257258
val DEFAULT_RESOURCE_PROFILE_ID = 0
258259

260+
private[spark] val MEMORY_OVERHEAD_MIN_MIB = 384L
261+
259262
private lazy val nextProfileId = new AtomicInteger(0)
260263
private val DEFAULT_PROFILE_LOCK = new Object()
261264

262265
// The default resource profile uses the application level configs.
263266
// var so that it can be reset for testing purposes.
264267
@GuardedBy("DEFAULT_PROFILE_LOCK")
265268
private var defaultProfile: Option[ResourceProfile] = None
269+
private var defaultProfileExecutorResources: Option[DefaultProfileExecutorResources] = None
266270

267271
private[spark] def getNextProfileId: Int = nextProfileId.getAndIncrement()
268272

@@ -284,6 +288,14 @@ object ResourceProfile extends Logging {
284288
}
285289
}
286290

291+
private[spark] def getDefaultProfileExecutorResources(
292+
conf: SparkConf): DefaultProfileExecutorResources = {
293+
defaultProfileExecutorResources.getOrElse {
294+
getOrCreateDefaultProfile(conf)
295+
defaultProfileExecutorResources.get
296+
}
297+
}
298+
287299
private def getDefaultTaskResources(conf: SparkConf): Map[String, TaskResourceRequest] = {
288300
val cpusPerTask = conf.get(CPUS_PER_TASK)
289301
val treqs = new TaskResourceRequests().cpus(cpusPerTask)
@@ -293,20 +305,26 @@ object ResourceProfile extends Logging {
293305

294306
private def getDefaultExecutorResources(conf: SparkConf): Map[String, ExecutorResourceRequest] = {
295307
val ereqs = new ExecutorResourceRequests()
296-
ereqs.cores(conf.get(EXECUTOR_CORES))
297-
ereqs.memory(conf.get(EXECUTOR_MEMORY).toString)
298-
conf.get(EXECUTOR_MEMORY_OVERHEAD).map(mem => ereqs.memoryOverhead(mem.toString))
299-
conf.get(PYSPARK_EXECUTOR_MEMORY).map(mem => ereqs.pysparkMemory(mem.toString))
300-
if (conf.get(MEMORY_OFFHEAP_ENABLED)) {
301-
// Explicitly add suffix b as default unit of offHeapMemory is Mib
302-
ereqs.offHeapMemory(conf.get(MEMORY_OFFHEAP_SIZE).toString + "b")
303-
}
308+
val cores = conf.get(EXECUTOR_CORES)
309+
ereqs.cores(cores)
310+
val memory = conf.get(EXECUTOR_MEMORY)
311+
ereqs.memory(memory.toString)
312+
val overheadMem = conf.get(EXECUTOR_MEMORY_OVERHEAD)
313+
overheadMem.map(mem => ereqs.memoryOverhead(mem.toString))
314+
val pysparkMem = conf.get(PYSPARK_EXECUTOR_MEMORY)
315+
pysparkMem.map(mem => ereqs.pysparkMemory(mem.toString))
316+
val offheapMem = Utils.executorOffHeapMemorySizeAsMb(conf)
317+
ereqs.offHeapMemory(offheapMem.toString)
304318
val execReq = ResourceUtils.parseAllResourceRequests(conf, SPARK_EXECUTOR_PREFIX)
305319
execReq.foreach { req =>
306-
val name = req.id.resourceName
307-
ereqs.resource(name, req.amount, req.discoveryScript.orElse(""),
320+
ereqs.resource(req.id.resourceName, req.amount, req.discoveryScript.orElse(""),
308321
req.vendor.orElse(""))
309322
}
323+
val customResourceNames = execReq.map(_.id.resourceName).toSet
324+
val customResources = ereqs.requests.filter(v => customResourceNames.contains(v._1))
325+
defaultProfileExecutorResources =
326+
Some(DefaultProfileExecutorResources(cores, memory, offheapMem, pysparkMem,
327+
overheadMem, customResources))
310328
ereqs.requests
311329
}
312330

@@ -320,6 +338,7 @@ object ResourceProfile extends Logging {
320338
private[spark] def clearDefaultProfile(): Unit = {
321339
DEFAULT_PROFILE_LOCK.synchronized {
322340
defaultProfile = None
341+
defaultProfileExecutorResources = None
323342
}
324343
}
325344

@@ -342,6 +361,100 @@ object ResourceProfile extends Logging {
342361
rp.getTaskCpus.getOrElse(conf.get(CPUS_PER_TASK))
343362
}
344363

364+
/**
365+
* Get offHeap memory size from [[ExecutorResourceRequest]]
366+
* return 0 if MEMORY_OFFHEAP_ENABLED is false.
367+
*/
368+
private[spark] def executorOffHeapMemorySizeAsMb(sparkConf: SparkConf,
369+
execRequest: ExecutorResourceRequest): Long = {
370+
Utils.checkOffHeapEnabled(sparkConf, execRequest.amount)
371+
}
372+
373+
private[spark] case class ExecutorResourcesOrDefaults(
374+
cores: Int,
375+
executorMemoryMiB: Long,
376+
memoryOffHeapMiB: Long,
377+
pysparkMemoryMiB: Long,
378+
memoryOverheadMiB: Long,
379+
totalMemMiB: Long,
380+
customResources: Map[String, ExecutorResourceRequest])
381+
382+
private[spark] case class DefaultProfileExecutorResources(
383+
cores: Int,
384+
executorMemoryMiB: Long,
385+
memoryOffHeapMiB: Long,
386+
pysparkMemoryMiB: Option[Long],
387+
memoryOverheadMiB: Option[Long],
388+
customResources: Map[String, ExecutorResourceRequest])
389+
390+
private[spark] def calculateOverHeadMemory(
391+
overHeadMemFromConf: Option[Long],
392+
executorMemoryMiB: Long,
393+
overheadFactor: Double): Long = {
394+
overHeadMemFromConf.getOrElse(math.max((overheadFactor * executorMemoryMiB).toInt,
395+
ResourceProfile.MEMORY_OVERHEAD_MIN_MIB))
396+
}
397+
398+
/**
399+
* Gets the full list of resources to allow a cluster manager to request the appropriate
400+
* container. If the resource profile is not the default one we either get the resources
401+
* specified in the profile or fall back to the default profile resource size for everything
402+
* except for custom resources.
403+
*/
404+
private[spark] def getResourcesForClusterManager(
405+
rpId: Int,
406+
execResources: Map[String, ExecutorResourceRequest],
407+
overheadFactor: Double,
408+
conf: SparkConf,
409+
isPythonApp: Boolean,
410+
resourceMappings: Map[String, String]): ExecutorResourcesOrDefaults = {
411+
val defaultResources = getDefaultProfileExecutorResources(conf)
412+
// set all the default values, which may change for custom ResourceProfiles
413+
var cores = defaultResources.cores
414+
var executorMemoryMiB = defaultResources.executorMemoryMiB
415+
var memoryOffHeapMiB = defaultResources.memoryOffHeapMiB
416+
var pysparkMemoryMiB = defaultResources.pysparkMemoryMiB.getOrElse(0L)
417+
var memoryOverheadMiB = calculateOverHeadMemory(defaultResources.memoryOverheadMiB,
418+
executorMemoryMiB, overheadFactor)
419+
420+
val finalCustomResources = if (rpId != DEFAULT_RESOURCE_PROFILE_ID) {
421+
val customResources = new mutable.HashMap[String, ExecutorResourceRequest]
422+
execResources.foreach { case (r, execReq) =>
423+
r match {
424+
case ResourceProfile.MEMORY =>
425+
executorMemoryMiB = execReq.amount
426+
case ResourceProfile.OVERHEAD_MEM =>
427+
memoryOverheadMiB = execReq.amount
428+
case ResourceProfile.PYSPARK_MEM =>
429+
pysparkMemoryMiB = execReq.amount
430+
case ResourceProfile.OFFHEAP_MEM =>
431+
memoryOffHeapMiB = executorOffHeapMemorySizeAsMb(conf, execReq)
432+
case ResourceProfile.CORES =>
433+
cores = execReq.amount.toInt
434+
case rName =>
435+
val nameToUse = resourceMappings.get(rName).getOrElse(rName)
436+
customResources(nameToUse) = execReq
437+
}
438+
}
439+
customResources.toMap
440+
} else {
441+
defaultResources.customResources.map { case (rName, execReq) =>
442+
val nameToUse = resourceMappings.get(rName).getOrElse(rName)
443+
(nameToUse, execReq)
444+
}
445+
}
446+
// only add in pyspark memory if actually a python application
447+
val pysparkMemToUseMiB = if (isPythonApp) {
448+
pysparkMemoryMiB
449+
} else {
450+
0L
451+
}
452+
val totalMemMiB =
453+
(executorMemoryMiB + memoryOverheadMiB + memoryOffHeapMiB + pysparkMemToUseMiB)
454+
ExecutorResourcesOrDefaults(cores, executorMemoryMiB, memoryOffHeapMiB,
455+
pysparkMemToUseMiB, memoryOverheadMiB, totalMemMiB, finalCustomResources)
456+
}
457+
345458
private[spark] val PYSPARK_MEMORY_LOCAL_PROPERTY = "resource.pyspark.memory"
346459
private[spark] val EXECUTOR_CORES_LOCAL_PROPERTY = "resource.executor.cores"
347460
}

core/src/main/scala/org/apache/spark/resource/ResourceProfileManager.scala

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -52,18 +52,25 @@ private[spark] class ResourceProfileManager(sparkConf: SparkConf,
5252

5353
private val dynamicEnabled = Utils.isDynamicAllocationEnabled(sparkConf)
5454
private val master = sparkConf.getOption("spark.master")
55-
private val isNotYarn = master.isDefined && !master.get.equals("yarn")
56-
private val errorForTesting = !isTesting || sparkConf.get(RESOURCE_PROFILE_MANAGER_TESTING)
55+
private val isYarn = master.isDefined && master.get.equals("yarn")
56+
private val isK8s = master.isDefined && master.get.startsWith("k8s://")
57+
private val notRunningUnitTests = !isTesting
58+
private val testExceptionThrown = sparkConf.get(RESOURCE_PROFILE_MANAGER_TESTING)
5759

5860
// If we use anything except the default profile, its only supported on YARN right now.
5961
// Throw an exception if not supported.
6062
private[spark] def isSupported(rp: ResourceProfile): Boolean = {
6163
val isNotDefaultProfile = rp.id != ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID
62-
val notYarnAndNotDefaultProfile = isNotDefaultProfile && isNotYarn
63-
val YarnNotDynAllocAndNotDefaultProfile = isNotDefaultProfile && !isNotYarn && !dynamicEnabled
64-
if (errorForTesting && (notYarnAndNotDefaultProfile || YarnNotDynAllocAndNotDefaultProfile)) {
65-
throw new SparkException("ResourceProfiles are only supported on YARN with dynamic " +
66-
"allocation enabled.")
64+
val notYarnOrK8sAndNotDefaultProfile = isNotDefaultProfile && !(isYarn || isK8s)
65+
val YarnOrK8sNotDynAllocAndNotDefaultProfile =
66+
isNotDefaultProfile && (isYarn || isK8s) && !dynamicEnabled
67+
// We want the exception to be thrown only when we are specifically testing for the
68+
// exception or in a real application. Otherwise in all other testing scenarios we want
69+
// to skip throwing the exception so that we can test in other modes to make testing easier.
70+
if ((notRunningUnitTests || testExceptionThrown) &&
71+
(notYarnOrK8sAndNotDefaultProfile || YarnOrK8sNotDynAllocAndNotDefaultProfile)) {
72+
throw new SparkException("ResourceProfiles are only supported on YARN and Kubernetes " +
73+
"with dynamic allocation enabled.")
6774
}
6875
true
6976
}

core/src/main/scala/org/apache/spark/util/Utils.scala

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2971,6 +2971,27 @@ private[spark] object Utils extends Logging {
29712971
metadata.append("]")
29722972
metadata.toString
29732973
}
2974+
2975+
/**
2976+
* Convert MEMORY_OFFHEAP_SIZE to MB Unit, return 0 if MEMORY_OFFHEAP_ENABLED is false.
2977+
*/
2978+
def executorOffHeapMemorySizeAsMb(sparkConf: SparkConf): Int = {
2979+
val sizeInMB = Utils.memoryStringToMb(sparkConf.get(MEMORY_OFFHEAP_SIZE).toString)
2980+
checkOffHeapEnabled(sparkConf, sizeInMB).toInt
2981+
}
2982+
2983+
/**
2984+
* return 0 if MEMORY_OFFHEAP_ENABLED is false.
2985+
*/
2986+
def checkOffHeapEnabled(sparkConf: SparkConf, offHeapSize: Long): Long = {
2987+
if (sparkConf.get(MEMORY_OFFHEAP_ENABLED)) {
2988+
require(offHeapSize > 0,
2989+
s"${MEMORY_OFFHEAP_SIZE.key} must be > 0 when ${MEMORY_OFFHEAP_ENABLED.key} == true")
2990+
offHeapSize
2991+
} else {
2992+
0
2993+
}
2994+
}
29742995
}
29752996

29762997
private[util] object CallerContext extends Logging {

0 commit comments

Comments
 (0)