Skip to content

Commit 6eef78d

Browse files
committed
Merge pull request apache#345 from colorant/yarn
support distributing extra files to worker for yarn client mode So that user doesn't need to package all dependency into one assemble jar as spark app jar
2 parents bb6a39a + 67af803 commit 6eef78d

File tree

4 files changed

+7
-3
lines changed

4 files changed

+7
-3
lines changed

docs/running-on-yarn.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,8 @@ For example:
114114
SPARK_YARN_APP_JAR=examples/target/scala-{{site.SCALA_VERSION}}/spark-examples-assembly-{{site.SPARK_VERSION}}.jar \
115115
MASTER=yarn-client ./bin/spark-shell
116116

117+
You can also send extra files to yarn cluster for worker to use by exporting SPARK_YARN_DIST_FILES=file1,file2... etc.
118+
117119
# Building Spark for Hadoop/YARN 2.2.x
118120

119121
See [Building Spark with Maven](building-with-maven.html) for instructions on how to build Spark using the Maven process.

yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ class Client(args: ClientArguments, conf: Configuration, sparkConf: SparkConf)
102102
(System.getenv("SPARK_JAR") == null) -> "Error: You must set SPARK_JAR environment variable!",
103103
(args.userJar == null) -> "Error: You must specify a user jar!",
104104
(args.userClass == null) -> "Error: You must specify a user class!",
105-
(args.numWorkers <= 0) -> "Error: You must specify atleast 1 worker!",
105+
(args.numWorkers <= 0) -> "Error: You must specify at least 1 worker!",
106106
(args.amMemory <= YarnAllocationHandler.MEMORY_OVERHEAD) -> ("Error: AM memory size must be " +
107107
"greater than: " + YarnAllocationHandler.MEMORY_OVERHEAD),
108108
(args.workerMemory <= YarnAllocationHandler.MEMORY_OVERHEAD) -> ("Error: Worker memory size " +

yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ private[spark] class YarnClientSchedulerBackend(
3939
val defaultWorkerNumber = "1"
4040

4141
val userJar = System.getenv("SPARK_YARN_APP_JAR")
42+
val distFiles = System.getenv("SPARK_YARN_DIST_FILES")
4243
var workerCores = System.getenv("SPARK_WORKER_CORES")
4344
var workerMemory = System.getenv("SPARK_WORKER_MEMORY")
4445
var workerNumber = System.getenv("SPARK_WORKER_INSTANCES")
@@ -64,7 +65,8 @@ private[spark] class YarnClientSchedulerBackend(
6465
"--worker-memory", workerMemory,
6566
"--worker-cores", workerCores,
6667
"--num-workers", workerNumber,
67-
"--master-class", "org.apache.spark.deploy.yarn.WorkerLauncher"
68+
"--master-class", "org.apache.spark.deploy.yarn.WorkerLauncher",
69+
"--files", distFiles
6870
)
6971

7072
val args = new ClientArguments(argsArray, conf)

yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ class Client(args: ClientArguments, conf: Configuration, sparkConf: SparkConf)
122122
(System.getenv("SPARK_JAR") == null) -> "Error: You must set SPARK_JAR environment variable!",
123123
(args.userJar == null) -> "Error: You must specify a user jar!",
124124
(args.userClass == null) -> "Error: You must specify a user class!",
125-
(args.numWorkers <= 0) -> "Error: You must specify atleast 1 worker!",
125+
(args.numWorkers <= 0) -> "Error: You must specify at least 1 worker!",
126126
(args.amMemory <= YarnAllocationHandler.MEMORY_OVERHEAD) -> ("Error: AM memory size must be" +
127127
"greater than: " + YarnAllocationHandler.MEMORY_OVERHEAD),
128128
(args.workerMemory <= YarnAllocationHandler.MEMORY_OVERHEAD) -> ("Error: Worker memory size" +

0 commit comments

Comments
 (0)