-
Couldn't load subscription status.
- Fork 117
Download remotely-located resources on driver startup. #240
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -365,7 +365,7 @@ package object config extends Logging { | |
| " resource staging server to download jars.") | ||
| .internal() | ||
| .stringConf | ||
| .createWithDefault(INIT_CONTAINER_DOWNLOAD_JARS_SECRET_PATH) | ||
| .createWithDefault(INIT_CONTAINER_SUBMITTED_FILES_DOWNLOAD_JARS_SECRET_PATH) | ||
|
|
||
| private[spark] val INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER = | ||
| ConfigBuilder("spark.kubernetes.driver.initcontainer.downloadFilesResourceIdentifier") | ||
|
|
@@ -380,30 +380,62 @@ package object config extends Logging { | |
| " resource staging server to download files.") | ||
| .internal() | ||
| .stringConf | ||
| .createWithDefault(INIT_CONTAINER_DOWNLOAD_FILES_SECRET_PATH) | ||
| .createWithDefault(INIT_CONTAINER_SUBMITTED_FILES_DOWNLOAD_FILES_SECRET_PATH) | ||
|
|
||
| private[spark] val INIT_CONTAINER_REMOTE_JARS = | ||
| ConfigBuilder("spark.kubernetes.driver.initcontainer.remoteJars") | ||
| .doc("Comma-separated list of jar URIs to download in the init-container. This is inferred" + | ||
| " from spark.jars.") | ||
| .internal() | ||
| .stringConf | ||
| .createOptional | ||
|
|
||
| private[spark] val INIT_CONTAINER_REMOTE_FILES = | ||
| ConfigBuilder("spark.kubernetes.driver.initcontainer.remoteFiles") | ||
| .doc("Comma-separated list of file URIs to download in the init-container. This is inferred" + | ||
| " from spark.files.") | ||
| .internal() | ||
| .stringConf | ||
| .createOptional | ||
|
|
||
| private[spark] val INIT_CONTAINER_DOCKER_IMAGE = | ||
| ConfigBuilder("spark.kubernetes.driver.initcontainer.docker.image") | ||
| .doc("Image for the driver's init-container that downloads mounted dependencies.") | ||
| .stringConf | ||
| .createWithDefault(s"spark-driver-init:$sparkVersion") | ||
|
|
||
| private[spark] val DRIVER_LOCAL_JARS_DOWNLOAD_LOCATION = | ||
| ConfigBuilder("spark.kubernetes.driver.mountdependencies.jarsDownloadDir") | ||
| private[spark] val DRIVER_SUBMITTED_JARS_DOWNLOAD_LOCATION = | ||
| ConfigBuilder("spark.kubernetes.driver.mountdependencies.submittedJars.downloadDir") | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The config string is getting long and unwieldy. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We probably want to indicate that this is where the jars are being downloaded to. |
||
| .doc("Location to download local jars to in the driver. When using spark-submit, this" + | ||
| " directory must be empty and will be mounted as an empty directory volume on the" + | ||
| " driver pod.") | ||
| .stringConf | ||
| .createWithDefault("/var/spark-data/spark-local-jars") | ||
|
|
||
| private[spark] val DRIVER_LOCAL_FILES_DOWNLOAD_LOCATION = | ||
| ConfigBuilder("spark.kubernetes.driver.mountdependencies.filesDownloadDir") | ||
| private[spark] val DRIVER_SUBMITTED_FILES_DOWNLOAD_LOCATION = | ||
| ConfigBuilder("spark.kubernetes.driver.mountdependencies.submittedFiles.downloadDir") | ||
| .doc("Location to download local files to in the driver. When using spark-submit, this" + | ||
| " directory must be empty and will be mounted as an empty directory volume on the" + | ||
| " driver pod.") | ||
| .stringConf | ||
| .createWithDefault("/var/spark-data/spark-local-files") | ||
|
|
||
| private[spark] val DRIVER_REMOTE_JARS_DOWNLOAD_LOCATION = | ||
| ConfigBuilder("spark.kubernetes.driver.mountdependencies.remoteJars.downloadDir") | ||
| .doc("Location to download remotely-located (e.g. HDFS) jars to in the driver. When" + | ||
| " using spark-submit, this directory must be empty and will be mounted as an empty" + | ||
| " directory volume on the driver pod.") | ||
| .stringConf | ||
| .createWithDefault("/var/spark-data/spark-remote-jars") | ||
|
|
||
| private[spark] val DRIVER_REMOTE_FILES_DOWNLOAD_LOCATION = | ||
| ConfigBuilder("spark.kubernetes.driver.mountdependencies.remoteFiles.downloadDir") | ||
| .doc("Location to download remotely-located (e.g. HDFS) files to in the driver. When" + | ||
| " using spark-submit, this directory must be empty and will be mounted as an empty" + | ||
| " directory volume on the driver pod.") | ||
| .stringConf | ||
| .createWithDefault("/var/spark-data/spark-remote-files") | ||
|
|
||
| private[spark] val DRIVER_MOUNT_DEPENDENCIES_INIT_TIMEOUT = | ||
| ConfigBuilder("spark.kubernetes.mountdependencies.mountTimeout") | ||
| .doc("Timeout before aborting the attempt to download and unpack local dependencies from" + | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -70,7 +70,6 @@ package object constants { | |
| private[spark] val ENV_EXECUTOR_ID = "SPARK_EXECUTOR_ID" | ||
| private[spark] val ENV_EXECUTOR_POD_IP = "SPARK_EXECUTOR_POD_IP" | ||
| private[spark] val ENV_DRIVER_MEMORY = "SPARK_DRIVER_MEMORY" | ||
| private[spark] val ENV_UPLOADED_JARS_DIR = "SPARK_UPLOADED_JARS_DIR" | ||
| private[spark] val ENV_SUBMIT_EXTRA_CLASSPATH = "SPARK_SUBMIT_EXTRA_CLASSPATH" | ||
| private[spark] val ENV_MOUNTED_CLASSPATH = "SPARK_MOUNTED_CLASSPATH" | ||
| private[spark] val ENV_DRIVER_MAIN_CLASS = "SPARK_DRIVER_CLASS" | ||
|
|
@@ -92,25 +91,59 @@ package object constants { | |
|
|
||
| // V2 submission init container | ||
| private[spark] val INIT_CONTAINER_ANNOTATION = "pod.beta.kubernetes.io/init-containers" | ||
| private[spark] val INIT_CONTAINER_SECRETS_VOLUME_NAME = "dependency-secret" | ||
| private[spark] val INIT_CONTAINER_SECRETS_VOLUME_MOUNT_PATH = "/mnt/secrets/spark-init" | ||
| private[spark] val INIT_CONTAINER_DOWNLOAD_JARS_SECRET_KEY = "downloadJarsSecret" | ||
| private[spark] val INIT_CONTAINER_DOWNLOAD_FILES_SECRET_KEY = "downloadFilesSecret" | ||
| private[spark] val INIT_CONTAINER_TRUSTSTORE_SECRET_KEY = "trustStore" | ||
| private[spark] val INIT_CONTAINER_DOWNLOAD_JARS_SECRET_PATH = | ||
| s"$INIT_CONTAINER_SECRETS_VOLUME_MOUNT_PATH/$INIT_CONTAINER_DOWNLOAD_JARS_SECRET_KEY" | ||
| private[spark] val INIT_CONTAINER_DOWNLOAD_FILES_SECRET_PATH = | ||
| s"$INIT_CONTAINER_SECRETS_VOLUME_MOUNT_PATH/$INIT_CONTAINER_DOWNLOAD_FILES_SECRET_KEY" | ||
| private[spark] val INIT_CONTAINER_TRUSTSTORE_PATH = | ||
| s"$INIT_CONTAINER_SECRETS_VOLUME_MOUNT_PATH/$INIT_CONTAINER_TRUSTSTORE_SECRET_KEY" | ||
| private[spark] val INIT_CONTAINER_DOWNLOAD_CREDENTIALS_PATH = | ||
| "/mnt/secrets/kubernetes-credentials" | ||
| private[spark] val INIT_CONTAINER_CONFIG_MAP_KEY = "init-driver" | ||
| private[spark] val INIT_CONTAINER_PROPERTIES_FILE_VOLUME = "init-container-properties" | ||
| private[spark] val INIT_CONTAINER_PROPERTIES_FILE_MOUNT_PATH = "/etc/spark-init/" | ||
| private[spark] val INIT_CONTAINER_PROPERTIES_FILE_NAME = "init-driver.properties" | ||
| private[spark] val INIT_CONTAINER_PROPERTIES_FILE_PATH = | ||
| s"$INIT_CONTAINER_PROPERTIES_FILE_MOUNT_PATH/$INIT_CONTAINER_PROPERTIES_FILE_NAME" | ||
| private[spark] val DOWNLOAD_JARS_VOLUME_NAME = "download-jars" | ||
| private[spark] val DOWNLOAD_FILES_VOLUME_NAME = "download-files" | ||
|
|
||
| // Init container for downloading submitted files from the staging server. | ||
| private[spark] val INIT_CONTAINER_SUBMITTED_FILES_CONTAINER_NAME = | ||
| "spark-driver-download-submitted-files" | ||
| private[spark] val INIT_CONTAINER_SUBMITTED_FILES_SECRETS_VOLUME_NAME = | ||
| "resource-staging-server-secret" | ||
| private[spark] val INIT_CONTAINER_SUBMITTED_FILES_SECRETS_VOLUME_MOUNT_PATH = | ||
| "/mnt/secrets/spark-init" | ||
| private[spark] val INIT_CONTAINER_SUBMITTED_FILES_DOWNLOAD_JARS_SECRET_KEY = | ||
| "downloadSubmittedJarsSecret" | ||
| private[spark] val INIT_CONTAINER_SUBMITTED_FILES_DOWNLOAD_FILES_SECRET_KEY = | ||
| "downloadSubmittedFilesSecret" | ||
| private[spark] val INIT_CONTAINER_SUBMITTED_FILES_TRUSTSTORE_SECRET_KEY = "trustStore" | ||
| private[spark] val INIT_CONTAINER_SUBMITTED_FILES_DOWNLOAD_JARS_SECRET_PATH = | ||
| s"$INIT_CONTAINER_SUBMITTED_FILES_SECRETS_VOLUME_MOUNT_PATH/" + | ||
| s"$INIT_CONTAINER_SUBMITTED_FILES_DOWNLOAD_JARS_SECRET_KEY" | ||
| private[spark] val INIT_CONTAINER_SUBMITTED_FILES_DOWNLOAD_FILES_SECRET_PATH = | ||
| s"$INIT_CONTAINER_SUBMITTED_FILES_SECRETS_VOLUME_MOUNT_PATH/" + | ||
| s"$INIT_CONTAINER_SUBMITTED_FILES_DOWNLOAD_FILES_SECRET_KEY" | ||
| private[spark] val INIT_CONTAINER_SUBMITTED_FILES_TRUSTSTORE_PATH = | ||
| s"$INIT_CONTAINER_SUBMITTED_FILES_SECRETS_VOLUME_MOUNT_PATH/" + | ||
| s"$INIT_CONTAINER_SUBMITTED_FILES_TRUSTSTORE_SECRET_KEY" | ||
| private[spark] val INIT_CONTAINER_SUBMITTED_FILES_CONFIG_MAP_KEY = | ||
| "download-submitted-files" | ||
| private[spark] val INIT_CONTAINER_SUBMITTED_FILES_PROPERTIES_FILE_VOLUME = | ||
| "download-submitted-files-properties" | ||
| private[spark] val INIT_CONTAINER_SUBMITTED_FILES_PROPERTIES_FILE_MOUNT_PATH = | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's a lot of growth in complexity here in terms of mount paths and other parameters. Is it possible for us to use fewer init-containers, or group these better? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could download all of the jars from all locations to the same directory (resource staging server and remote) as well as the files, but I'm concerned about file name conflicts and how to deduplicate those. Perhaps we should explicitly forbid that multiple URIs end with the same file name. @aash for thoughts. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Having identically named jars coming from different places sounds like an anti-pattern. I don't think prohibiting that is all that bad. There's already the contract that file names must be unique as they're all downloaded into the cwd |
||
| "/etc/spark-init/download-submitted-files" | ||
| private[spark] val INIT_CONTAINER_SUBMITTED_FILES_PROPERTIES_FILE_NAME = | ||
| "init-driver-download-submitted-files.properties" | ||
| private[spark] val INIT_CONTAINER_SUBMITTED_FILES_PROPERTIES_FILE_PATH = | ||
| s"$INIT_CONTAINER_SUBMITTED_FILES_PROPERTIES_FILE_MOUNT_PATH/" + | ||
| s"$INIT_CONTAINER_SUBMITTED_FILES_PROPERTIES_FILE_NAME" | ||
| private[spark] val INIT_CONTAINER_SUBMITTED_FILES_DOWNLOAD_JARS_VOLUME_NAME = | ||
| "download-submitted-jars" | ||
| private[spark] val INIT_CONTAINER_SUBMITTED_FILES_DOWNLOAD_FILES_VOLUME_NAME = | ||
| "download-submitted-files" | ||
|
|
||
| // Init container for fetching remote dependencies. | ||
| private[spark] val INIT_CONTAINER_REMOTE_FILES_CONTAINER_NAME = | ||
| "spark-driver-download-remote-files" | ||
| private[spark] val INIT_CONTAINER_REMOTE_FILES_CONFIG_MAP_KEY = | ||
| "download-remote-files" | ||
| private[spark] val INIT_CONTAINER_REMOTE_FILES_PROPERTIES_FILE_VOLUME = | ||
| "download-remote-files-properties" | ||
| private[spark] val INIT_CONTAINER_REMOTE_FILES_PROPERTIES_FILE_MOUNT_PATH = | ||
| "/etc/spark-init/download-remote-files" | ||
| private[spark] val INIT_CONTAINER_REMOTE_FILES_PROPERTIES_FILE_NAME = | ||
| "init-driver-download-remote-files.properties" | ||
| private[spark] val INIT_CONTAINER_REMOTE_FILES_PROPERTIES_FILE_PATH = | ||
| s"$INIT_CONTAINER_REMOTE_FILES_PROPERTIES_FILE_MOUNT_PATH/" + | ||
| s"$INIT_CONTAINER_REMOTE_FILES_PROPERTIES_FILE_NAME" | ||
| private[spark] val INIT_CONTAINER_REMOTE_FILES_DOWNLOAD_JARS_VOLUME_NAME = "download-remote-jars" | ||
| private[spark] val INIT_CONTAINER_REMOTE_FILES_DOWNLOAD_FILES_VOLUME_NAME = | ||
| "download-remote-files" | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: inferred -> calculated