diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-1.2 b/dev/deps/spark-deps-hadoop-2.7-hive-1.2
new file mode 100644
index 0000000000000..24e9d0197ec65
--- /dev/null
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-1.2
@@ -0,0 +1,210 @@
+JLargeArrays/1.5//JLargeArrays-1.5.jar
+JTransforms/3.1//JTransforms-3.1.jar
+JavaEWAH/0.3.2//JavaEWAH-0.3.2.jar
+RoaringBitmap/0.7.45//RoaringBitmap-0.7.45.jar
+ST4/4.0.4//ST4-4.0.4.jar
+activation/1.1.1//activation-1.1.1.jar
+aircompressor/0.10//aircompressor-0.10.jar
+algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar
+antlr-runtime/3.4//antlr-runtime-3.4.jar
+antlr/2.7.7//antlr-2.7.7.jar
+antlr4-runtime/4.7.1//antlr4-runtime-4.7.1.jar
+aopalliance-repackaged/2.6.1//aopalliance-repackaged-2.6.1.jar
+aopalliance/1.0//aopalliance-1.0.jar
+apache-log4j-extras/1.2.17//apache-log4j-extras-1.2.17.jar
+apacheds-i18n/2.0.0-M15//apacheds-i18n-2.0.0-M15.jar
+apacheds-kerberos-codec/2.0.0-M15//apacheds-kerberos-codec-2.0.0-M15.jar
+api-asn1-api/1.0.0-M20//api-asn1-api-1.0.0-M20.jar
+api-util/1.0.0-M20//api-util-1.0.0-M20.jar
+arpack_combined_all/0.1//arpack_combined_all-0.1.jar
+arrow-format/0.15.1//arrow-format-0.15.1.jar
+arrow-memory/0.15.1//arrow-memory-0.15.1.jar
+arrow-vector/0.15.1//arrow-vector-0.15.1.jar
+audience-annotations/0.5.0//audience-annotations-0.5.0.jar
+automaton/1.11-8//automaton-1.11-8.jar
+avro-ipc/1.8.2//avro-ipc-1.8.2.jar
+avro-mapred/1.8.2/hadoop2/avro-mapred-1.8.2-hadoop2.jar
+avro/1.8.2//avro-1.8.2.jar
+bonecp/0.8.0.RELEASE//bonecp-0.8.0.RELEASE.jar
+breeze-macros_2.12/1.0//breeze-macros_2.12-1.0.jar
+breeze_2.12/1.0//breeze_2.12-1.0.jar
+cats-kernel_2.12/2.0.0-M4//cats-kernel_2.12-2.0.0-M4.jar
+chill-java/0.9.5//chill-java-0.9.5.jar
+chill_2.12/0.9.5//chill_2.12-0.9.5.jar
+commons-beanutils/1.9.4//commons-beanutils-1.9.4.jar
+commons-cli/1.2//commons-cli-1.2.jar
+commons-codec/1.10//commons-codec-1.10.jar
+commons-collections/3.2.2//commons-collections-3.2.2.jar
+commons-compiler/3.0.15//commons-compiler-3.0.15.jar
+commons-compress/1.8.1//commons-compress-1.8.1.jar
+commons-configuration/1.6//commons-configuration-1.6.jar
+commons-crypto/1.0.0//commons-crypto-1.0.0.jar
+commons-dbcp/1.4//commons-dbcp-1.4.jar
+commons-digester/1.8//commons-digester-1.8.jar
+commons-httpclient/3.1//commons-httpclient-3.1.jar
+commons-io/2.4//commons-io-2.4.jar
+commons-lang/2.6//commons-lang-2.6.jar
+commons-lang3/3.9//commons-lang3-3.9.jar
+commons-logging/1.1.3//commons-logging-1.1.3.jar
+commons-math3/3.4.1//commons-math3-3.4.1.jar
+commons-net/3.1//commons-net-3.1.jar
+commons-pool/1.5.4//commons-pool-1.5.4.jar
+commons-text/1.6//commons-text-1.6.jar
+compress-lzf/1.0.3//compress-lzf-1.0.3.jar
+core/1.1.2//core-1.1.2.jar
+curator-client/2.7.1//curator-client-2.7.1.jar
+curator-framework/2.7.1//curator-framework-2.7.1.jar
+curator-recipes/2.7.1//curator-recipes-2.7.1.jar
+datanucleus-api-jdo/3.2.6//datanucleus-api-jdo-3.2.6.jar
+datanucleus-core/3.2.10//datanucleus-core-3.2.10.jar
+datanucleus-rdbms/3.2.9//datanucleus-rdbms-3.2.9.jar
+derby/10.12.1.1//derby-10.12.1.1.jar
+flatbuffers-java/1.9.0//flatbuffers-java-1.9.0.jar
+generex/1.0.2//generex-1.0.2.jar
+gson/2.2.4//gson-2.2.4.jar
+guava/14.0.1//guava-14.0.1.jar
+guice-servlet/3.0//guice-servlet-3.0.jar
+guice/3.0//guice-3.0.jar
+hadoop-annotations/2.7.4//hadoop-annotations-2.7.4.jar
+hadoop-auth/2.7.4//hadoop-auth-2.7.4.jar
+hadoop-client/2.7.4//hadoop-client-2.7.4.jar
+hadoop-common/2.7.4//hadoop-common-2.7.4.jar
+hadoop-hdfs/2.7.4//hadoop-hdfs-2.7.4.jar
+hadoop-mapreduce-client-app/2.7.4//hadoop-mapreduce-client-app-2.7.4.jar
+hadoop-mapreduce-client-common/2.7.4//hadoop-mapreduce-client-common-2.7.4.jar
+hadoop-mapreduce-client-core/2.7.4//hadoop-mapreduce-client-core-2.7.4.jar
+hadoop-mapreduce-client-jobclient/2.7.4//hadoop-mapreduce-client-jobclient-2.7.4.jar
+hadoop-mapreduce-client-shuffle/2.7.4//hadoop-mapreduce-client-shuffle-2.7.4.jar
+hadoop-yarn-api/2.7.4//hadoop-yarn-api-2.7.4.jar
+hadoop-yarn-client/2.7.4//hadoop-yarn-client-2.7.4.jar
+hadoop-yarn-common/2.7.4//hadoop-yarn-common-2.7.4.jar
+hadoop-yarn-server-common/2.7.4//hadoop-yarn-server-common-2.7.4.jar
+hadoop-yarn-server-web-proxy/2.7.4//hadoop-yarn-server-web-proxy-2.7.4.jar
+hk2-api/2.6.1//hk2-api-2.6.1.jar
+hk2-locator/2.6.1//hk2-locator-2.6.1.jar
+hk2-utils/2.6.1//hk2-utils-2.6.1.jar
+htrace-core/3.1.0-incubating//htrace-core-3.1.0-incubating.jar
+httpclient/4.5.6//httpclient-4.5.6.jar
+httpcore/4.4.12//httpcore-4.4.12.jar
+istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar
+ivy/2.4.0//ivy-2.4.0.jar
+jackson-annotations/2.10.0//jackson-annotations-2.10.0.jar
+jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar
+jackson-core/2.10.0//jackson-core-2.10.0.jar
+jackson-databind/2.10.0//jackson-databind-2.10.0.jar
+jackson-dataformat-yaml/2.10.0//jackson-dataformat-yaml-2.10.0.jar
+jackson-datatype-jsr310/2.10.3//jackson-datatype-jsr310-2.10.3.jar
+jackson-jaxrs/1.9.13//jackson-jaxrs-1.9.13.jar
+jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar
+jackson-module-jaxb-annotations/2.10.0//jackson-module-jaxb-annotations-2.10.0.jar
+jackson-module-paranamer/2.10.0//jackson-module-paranamer-2.10.0.jar
+jackson-module-scala_2.12/2.10.0//jackson-module-scala_2.12-2.10.0.jar
+jackson-xc/1.9.13//jackson-xc-1.9.13.jar
+jakarta.activation-api/1.2.1//jakarta.activation-api-1.2.1.jar
+jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar
+jakarta.inject/2.6.1//jakarta.inject-2.6.1.jar
+jakarta.validation-api/2.0.2//jakarta.validation-api-2.0.2.jar
+jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar
+jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar
+janino/3.0.15//janino-3.0.15.jar
+javassist/3.25.0-GA//javassist-3.25.0-GA.jar
+javax.inject/1//javax.inject-1.jar
+javax.servlet-api/3.1.0//javax.servlet-api-3.1.0.jar
+javolution/5.5.1//javolution-5.5.1.jar
+jaxb-api/2.2.2//jaxb-api-2.2.2.jar
+jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar
+jcl-over-slf4j/1.7.16//jcl-over-slf4j-1.7.16.jar
+jdo-api/3.0.1//jdo-api-3.0.1.jar
+jersey-client/2.30//jersey-client-2.30.jar
+jersey-common/2.30//jersey-common-2.30.jar
+jersey-container-servlet-core/2.30//jersey-container-servlet-core-2.30.jar
+jersey-container-servlet/2.30//jersey-container-servlet-2.30.jar
+jersey-hk2/2.30//jersey-hk2-2.30.jar
+jersey-media-jaxb/2.30//jersey-media-jaxb-2.30.jar
+jersey-server/2.30//jersey-server-2.30.jar
+jetty-sslengine/6.1.26//jetty-sslengine-6.1.26.jar
+jetty-util/6.1.26//jetty-util-6.1.26.jar
+jetty/6.1.26//jetty-6.1.26.jar
+jline/2.14.6//jline-2.14.6.jar
+joda-time/2.10.5//joda-time-2.10.5.jar
+jodd-core/3.5.2//jodd-core-3.5.2.jar
+jpam/1.1//jpam-1.1.jar
+json4s-ast_2.12/3.6.6//json4s-ast_2.12-3.6.6.jar
+json4s-core_2.12/3.6.6//json4s-core_2.12-3.6.6.jar
+json4s-jackson_2.12/3.6.6//json4s-jackson_2.12-3.6.6.jar
+json4s-scalap_2.12/3.6.6//json4s-scalap_2.12-3.6.6.jar
+jsp-api/2.1//jsp-api-2.1.jar
+jsr305/3.0.0//jsr305-3.0.0.jar
+jta/1.1//jta-1.1.jar
+jul-to-slf4j/1.7.16//jul-to-slf4j-1.7.16.jar
+kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar
+kubernetes-client/4.9.2//kubernetes-client-4.9.2.jar
+kubernetes-model-common/4.9.2//kubernetes-model-common-4.9.2.jar
+kubernetes-model/4.9.2//kubernetes-model-4.9.2.jar
+leveldbjni-all/1.8//leveldbjni-all-1.8.jar
+libfb303/0.9.3//libfb303-0.9.3.jar
+libthrift/0.12.0//libthrift-0.12.0.jar
+log4j/1.2.17//log4j-1.2.17.jar
+logging-interceptor/3.12.6//logging-interceptor-3.12.6.jar
+lz4-java/1.7.1//lz4-java-1.7.1.jar
+machinist_2.12/0.6.8//machinist_2.12-0.6.8.jar
+macro-compat_2.12/1.1.1//macro-compat_2.12-1.1.1.jar
+mesos/1.4.0/shaded-protobuf/mesos-1.4.0-shaded-protobuf.jar
+metrics-core/4.1.1//metrics-core-4.1.1.jar
+metrics-graphite/4.1.1//metrics-graphite-4.1.1.jar
+metrics-jmx/4.1.1//metrics-jmx-4.1.1.jar
+metrics-json/4.1.1//metrics-json-4.1.1.jar
+metrics-jvm/4.1.1//metrics-jvm-4.1.1.jar
+minlog/1.3.0//minlog-1.3.0.jar
+netty-all/4.1.42.Final//netty-all-4.1.42.Final.jar
+objenesis/2.5.1//objenesis-2.5.1.jar
+okhttp/3.12.6//okhttp-3.12.6.jar
+okio/1.15.0//okio-1.15.0.jar
+opencsv/2.3//opencsv-2.3.jar
+orc-core/1.5.9/nohive/orc-core-1.5.9-nohive.jar
+orc-mapreduce/1.5.9/nohive/orc-mapreduce-1.5.9-nohive.jar
+orc-shims/1.5.9//orc-shims-1.5.9.jar
+oro/2.0.8//oro-2.0.8.jar
+osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
+paranamer/2.8//paranamer-2.8.jar
+parquet-column/1.10.1//parquet-column-1.10.1.jar
+parquet-common/1.10.1//parquet-common-1.10.1.jar
+parquet-encoding/1.10.1//parquet-encoding-1.10.1.jar
+parquet-format/2.4.0//parquet-format-2.4.0.jar
+parquet-hadoop-bundle/1.6.0//parquet-hadoop-bundle-1.6.0.jar
+parquet-hadoop/1.10.1//parquet-hadoop-1.10.1.jar
+parquet-jackson/1.10.1//parquet-jackson-1.10.1.jar
+protobuf-java/2.5.0//protobuf-java-2.5.0.jar
+py4j/0.10.8.1//py4j-0.10.8.1.jar
+pyrolite/4.30//pyrolite-4.30.jar
+scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar
+scala-compiler/2.12.10//scala-compiler-2.12.10.jar
+scala-library/2.12.10//scala-library-2.12.10.jar
+scala-parser-combinators_2.12/1.1.2//scala-parser-combinators_2.12-1.1.2.jar
+scala-reflect/2.12.10//scala-reflect-2.12.10.jar
+scala-xml_2.12/1.2.0//scala-xml_2.12-1.2.0.jar
+shapeless_2.12/2.3.3//shapeless_2.12-2.3.3.jar
+shims/0.7.45//shims-0.7.45.jar
+slf4j-api/1.7.16//slf4j-api-1.7.16.jar
+slf4j-log4j12/1.7.16//slf4j-log4j12-1.7.16.jar
+snakeyaml/1.24//snakeyaml-1.24.jar
+snappy-java/1.1.7.3//snappy-java-1.1.7.3.jar
+snappy/0.2//snappy-0.2.jar
+spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar
+spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar
+spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar
+spire_2.12/0.17.0-M1//spire_2.12-0.17.0-M1.jar
+stax-api/1.0-2//stax-api-1.0-2.jar
+stax-api/1.0.1//stax-api-1.0.1.jar
+stream/2.9.6//stream-2.9.6.jar
+stringtemplate/3.2.1//stringtemplate-3.2.1.jar
+super-csv/2.2.0//super-csv-2.2.0.jar
+threeten-extra/1.5.0//threeten-extra-1.5.0.jar
+univocity-parsers/2.8.3//univocity-parsers-2.8.3.jar
+xbean-asm7-shaded/4.15//xbean-asm7-shaded-4.15.jar
+xercesImpl/2.9.1//xercesImpl-2.9.1.jar
+xmlenc/0.52//xmlenc-0.52.jar
+xz/1.5//xz-1.5.jar
+zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar
+zookeeper/3.4.14//zookeeper-3.4.14.jar
+zstd-jni/1.4.4-3//zstd-jni-1.4.4-3.jar
diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
new file mode 100644
index 0000000000000..24b20d5535f78
--- /dev/null
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
@@ -0,0 +1,225 @@
+HikariCP/2.5.1//HikariCP-2.5.1.jar
+JLargeArrays/1.5//JLargeArrays-1.5.jar
+JTransforms/3.1//JTransforms-3.1.jar
+RoaringBitmap/0.7.45//RoaringBitmap-0.7.45.jar
+ST4/4.0.4//ST4-4.0.4.jar
+activation/1.1.1//activation-1.1.1.jar
+aircompressor/0.10//aircompressor-0.10.jar
+algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar
+antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar
+antlr4-runtime/4.7.1//antlr4-runtime-4.7.1.jar
+aopalliance-repackaged/2.6.1//aopalliance-repackaged-2.6.1.jar
+aopalliance/1.0//aopalliance-1.0.jar
+apacheds-i18n/2.0.0-M15//apacheds-i18n-2.0.0-M15.jar
+apacheds-kerberos-codec/2.0.0-M15//apacheds-kerberos-codec-2.0.0-M15.jar
+api-asn1-api/1.0.0-M20//api-asn1-api-1.0.0-M20.jar
+api-util/1.0.0-M20//api-util-1.0.0-M20.jar
+arpack_combined_all/0.1//arpack_combined_all-0.1.jar
+arrow-format/0.15.1//arrow-format-0.15.1.jar
+arrow-memory/0.15.1//arrow-memory-0.15.1.jar
+arrow-vector/0.15.1//arrow-vector-0.15.1.jar
+audience-annotations/0.5.0//audience-annotations-0.5.0.jar
+automaton/1.11-8//automaton-1.11-8.jar
+avro-ipc/1.8.2//avro-ipc-1.8.2.jar
+avro-mapred/1.8.2/hadoop2/avro-mapred-1.8.2-hadoop2.jar
+avro/1.8.2//avro-1.8.2.jar
+bonecp/0.8.0.RELEASE//bonecp-0.8.0.RELEASE.jar
+breeze-macros_2.12/1.0//breeze-macros_2.12-1.0.jar
+breeze_2.12/1.0//breeze_2.12-1.0.jar
+cats-kernel_2.12/2.0.0-M4//cats-kernel_2.12-2.0.0-M4.jar
+chill-java/0.9.5//chill-java-0.9.5.jar
+chill_2.12/0.9.5//chill_2.12-0.9.5.jar
+commons-beanutils/1.9.4//commons-beanutils-1.9.4.jar
+commons-cli/1.2//commons-cli-1.2.jar
+commons-codec/1.10//commons-codec-1.10.jar
+commons-collections/3.2.2//commons-collections-3.2.2.jar
+commons-compiler/3.0.15//commons-compiler-3.0.15.jar
+commons-compress/1.8.1//commons-compress-1.8.1.jar
+commons-configuration/1.6//commons-configuration-1.6.jar
+commons-crypto/1.0.0//commons-crypto-1.0.0.jar
+commons-dbcp/1.4//commons-dbcp-1.4.jar
+commons-digester/1.8//commons-digester-1.8.jar
+commons-httpclient/3.1//commons-httpclient-3.1.jar
+commons-io/2.4//commons-io-2.4.jar
+commons-lang/2.6//commons-lang-2.6.jar
+commons-lang3/3.9//commons-lang3-3.9.jar
+commons-logging/1.1.3//commons-logging-1.1.3.jar
+commons-math3/3.4.1//commons-math3-3.4.1.jar
+commons-net/3.1//commons-net-3.1.jar
+commons-pool/1.5.4//commons-pool-1.5.4.jar
+commons-text/1.6//commons-text-1.6.jar
+compress-lzf/1.0.3//compress-lzf-1.0.3.jar
+core/1.1.2//core-1.1.2.jar
+curator-client/2.7.1//curator-client-2.7.1.jar
+curator-framework/2.7.1//curator-framework-2.7.1.jar
+curator-recipes/2.7.1//curator-recipes-2.7.1.jar
+datanucleus-api-jdo/4.2.4//datanucleus-api-jdo-4.2.4.jar
+datanucleus-core/4.1.17//datanucleus-core-4.1.17.jar
+datanucleus-rdbms/4.1.19//datanucleus-rdbms-4.1.19.jar
+derby/10.12.1.1//derby-10.12.1.1.jar
+dropwizard-metrics-hadoop-metrics2-reporter/0.1.2//dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar
+flatbuffers-java/1.9.0//flatbuffers-java-1.9.0.jar
+generex/1.0.2//generex-1.0.2.jar
+gson/2.2.4//gson-2.2.4.jar
+guava/14.0.1//guava-14.0.1.jar
+guice-servlet/3.0//guice-servlet-3.0.jar
+guice/3.0//guice-3.0.jar
+hadoop-annotations/2.7.4//hadoop-annotations-2.7.4.jar
+hadoop-auth/2.7.4//hadoop-auth-2.7.4.jar
+hadoop-client/2.7.4//hadoop-client-2.7.4.jar
+hadoop-common/2.7.4//hadoop-common-2.7.4.jar
+hadoop-hdfs/2.7.4//hadoop-hdfs-2.7.4.jar
+hadoop-mapreduce-client-app/2.7.4//hadoop-mapreduce-client-app-2.7.4.jar
+hadoop-mapreduce-client-common/2.7.4//hadoop-mapreduce-client-common-2.7.4.jar
+hadoop-mapreduce-client-core/2.7.4//hadoop-mapreduce-client-core-2.7.4.jar
+hadoop-mapreduce-client-jobclient/2.7.4//hadoop-mapreduce-client-jobclient-2.7.4.jar
+hadoop-mapreduce-client-shuffle/2.7.4//hadoop-mapreduce-client-shuffle-2.7.4.jar
+hadoop-yarn-api/2.7.4//hadoop-yarn-api-2.7.4.jar
+hadoop-yarn-client/2.7.4//hadoop-yarn-client-2.7.4.jar
+hadoop-yarn-common/2.7.4//hadoop-yarn-common-2.7.4.jar
+hadoop-yarn-server-common/2.7.4//hadoop-yarn-server-common-2.7.4.jar
+hadoop-yarn-server-web-proxy/2.7.4//hadoop-yarn-server-web-proxy-2.7.4.jar
+hive-beeline/2.3.6//hive-beeline-2.3.6.jar
+hive-cli/2.3.6//hive-cli-2.3.6.jar
+hive-common/2.3.6//hive-common-2.3.6.jar
+hive-exec/2.3.6/core/hive-exec-2.3.6-core.jar
+hive-jdbc/2.3.6//hive-jdbc-2.3.6.jar
+hive-llap-common/2.3.6//hive-llap-common-2.3.6.jar
+hive-metastore/2.3.6//hive-metastore-2.3.6.jar
+hive-serde/2.3.6//hive-serde-2.3.6.jar
+hive-service-rpc/2.3.6//hive-service-rpc-2.3.6.jar
+hive-shims-0.23/2.3.6//hive-shims-0.23-2.3.6.jar
+hive-shims-common/2.3.6//hive-shims-common-2.3.6.jar
+hive-shims-scheduler/2.3.6//hive-shims-scheduler-2.3.6.jar
+hive-shims/2.3.6//hive-shims-2.3.6.jar
+hive-storage-api/2.7.1//hive-storage-api-2.7.1.jar
+hive-vector-code-gen/2.3.6//hive-vector-code-gen-2.3.6.jar
+hk2-api/2.6.1//hk2-api-2.6.1.jar
+hk2-locator/2.6.1//hk2-locator-2.6.1.jar
+hk2-utils/2.6.1//hk2-utils-2.6.1.jar
+htrace-core/3.1.0-incubating//htrace-core-3.1.0-incubating.jar
+httpclient/4.5.6//httpclient-4.5.6.jar
+httpcore/4.4.12//httpcore-4.4.12.jar
+istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar
+ivy/2.4.0//ivy-2.4.0.jar
+jackson-annotations/2.10.0//jackson-annotations-2.10.0.jar
+jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar
+jackson-core/2.10.0//jackson-core-2.10.0.jar
+jackson-databind/2.10.0//jackson-databind-2.10.0.jar
+jackson-dataformat-yaml/2.10.0//jackson-dataformat-yaml-2.10.0.jar
+jackson-datatype-jsr310/2.10.3//jackson-datatype-jsr310-2.10.3.jar
+jackson-jaxrs/1.9.13//jackson-jaxrs-1.9.13.jar
+jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar
+jackson-module-jaxb-annotations/2.10.0//jackson-module-jaxb-annotations-2.10.0.jar
+jackson-module-paranamer/2.10.0//jackson-module-paranamer-2.10.0.jar
+jackson-module-scala_2.12/2.10.0//jackson-module-scala_2.12-2.10.0.jar
+jackson-xc/1.9.13//jackson-xc-1.9.13.jar
+jakarta.activation-api/1.2.1//jakarta.activation-api-1.2.1.jar
+jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar
+jakarta.inject/2.6.1//jakarta.inject-2.6.1.jar
+jakarta.validation-api/2.0.2//jakarta.validation-api-2.0.2.jar
+jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar
+jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar
+janino/3.0.15//janino-3.0.15.jar
+javassist/3.25.0-GA//javassist-3.25.0-GA.jar
+javax.inject/1//javax.inject-1.jar
+javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar
+javax.servlet-api/3.1.0//javax.servlet-api-3.1.0.jar
+javolution/5.5.1//javolution-5.5.1.jar
+jaxb-api/2.2.2//jaxb-api-2.2.2.jar
+jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar
+jcl-over-slf4j/1.7.16//jcl-over-slf4j-1.7.16.jar
+jdo-api/3.0.1//jdo-api-3.0.1.jar
+jersey-client/2.30//jersey-client-2.30.jar
+jersey-common/2.30//jersey-common-2.30.jar
+jersey-container-servlet-core/2.30//jersey-container-servlet-core-2.30.jar
+jersey-container-servlet/2.30//jersey-container-servlet-2.30.jar
+jersey-hk2/2.30//jersey-hk2-2.30.jar
+jersey-media-jaxb/2.30//jersey-media-jaxb-2.30.jar
+jersey-server/2.30//jersey-server-2.30.jar
+jetty-sslengine/6.1.26//jetty-sslengine-6.1.26.jar
+jetty-util/6.1.26//jetty-util-6.1.26.jar
+jetty/6.1.26//jetty-6.1.26.jar
+jline/2.14.6//jline-2.14.6.jar
+joda-time/2.10.5//joda-time-2.10.5.jar
+jodd-core/3.5.2//jodd-core-3.5.2.jar
+jpam/1.1//jpam-1.1.jar
+json/1.8//json-1.8.jar
+json4s-ast_2.12/3.6.6//json4s-ast_2.12-3.6.6.jar
+json4s-core_2.12/3.6.6//json4s-core_2.12-3.6.6.jar
+json4s-jackson_2.12/3.6.6//json4s-jackson_2.12-3.6.6.jar
+json4s-scalap_2.12/3.6.6//json4s-scalap_2.12-3.6.6.jar
+jsp-api/2.1//jsp-api-2.1.jar
+jsr305/3.0.0//jsr305-3.0.0.jar
+jta/1.1//jta-1.1.jar
+jul-to-slf4j/1.7.16//jul-to-slf4j-1.7.16.jar
+kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar
+kubernetes-client/4.9.2//kubernetes-client-4.9.2.jar
+kubernetes-model-common/4.9.2//kubernetes-model-common-4.9.2.jar
+kubernetes-model/4.9.2//kubernetes-model-4.9.2.jar
+leveldbjni-all/1.8//leveldbjni-all-1.8.jar
+libfb303/0.9.3//libfb303-0.9.3.jar
+libthrift/0.12.0//libthrift-0.12.0.jar
+log4j/1.2.17//log4j-1.2.17.jar
+logging-interceptor/3.12.6//logging-interceptor-3.12.6.jar
+lz4-java/1.7.1//lz4-java-1.7.1.jar
+machinist_2.12/0.6.8//machinist_2.12-0.6.8.jar
+macro-compat_2.12/1.1.1//macro-compat_2.12-1.1.1.jar
+mesos/1.4.0/shaded-protobuf/mesos-1.4.0-shaded-protobuf.jar
+metrics-core/4.1.1//metrics-core-4.1.1.jar
+metrics-graphite/4.1.1//metrics-graphite-4.1.1.jar
+metrics-jmx/4.1.1//metrics-jmx-4.1.1.jar
+metrics-json/4.1.1//metrics-json-4.1.1.jar
+metrics-jvm/4.1.1//metrics-jvm-4.1.1.jar
+minlog/1.3.0//minlog-1.3.0.jar
+netty-all/4.1.42.Final//netty-all-4.1.42.Final.jar
+objenesis/2.5.1//objenesis-2.5.1.jar
+okhttp/3.12.6//okhttp-3.12.6.jar
+okio/1.15.0//okio-1.15.0.jar
+opencsv/2.3//opencsv-2.3.jar
+orc-core/1.5.9//orc-core-1.5.9.jar
+orc-mapreduce/1.5.9//orc-mapreduce-1.5.9.jar
+orc-shims/1.5.9//orc-shims-1.5.9.jar
+oro/2.0.8//oro-2.0.8.jar
+osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
+paranamer/2.8//paranamer-2.8.jar
+parquet-column/1.10.1//parquet-column-1.10.1.jar
+parquet-common/1.10.1//parquet-common-1.10.1.jar
+parquet-encoding/1.10.1//parquet-encoding-1.10.1.jar
+parquet-format/2.4.0//parquet-format-2.4.0.jar
+parquet-hadoop/1.10.1//parquet-hadoop-1.10.1.jar
+parquet-jackson/1.10.1//parquet-jackson-1.10.1.jar
+protobuf-java/2.5.0//protobuf-java-2.5.0.jar
+py4j/0.10.8.1//py4j-0.10.8.1.jar
+pyrolite/4.30//pyrolite-4.30.jar
+scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar
+scala-compiler/2.12.10//scala-compiler-2.12.10.jar
+scala-library/2.12.10//scala-library-2.12.10.jar
+scala-parser-combinators_2.12/1.1.2//scala-parser-combinators_2.12-1.1.2.jar
+scala-reflect/2.12.10//scala-reflect-2.12.10.jar
+scala-xml_2.12/1.2.0//scala-xml_2.12-1.2.0.jar
+shapeless_2.12/2.3.3//shapeless_2.12-2.3.3.jar
+shims/0.7.45//shims-0.7.45.jar
+slf4j-api/1.7.16//slf4j-api-1.7.16.jar
+slf4j-log4j12/1.7.16//slf4j-log4j12-1.7.16.jar
+snakeyaml/1.24//snakeyaml-1.24.jar
+snappy-java/1.1.7.3//snappy-java-1.1.7.3.jar
+spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar
+spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar
+spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar
+spire_2.12/0.17.0-M1//spire_2.12-0.17.0-M1.jar
+stax-api/1.0-2//stax-api-1.0-2.jar
+stax-api/1.0.1//stax-api-1.0.1.jar
+stream/2.9.6//stream-2.9.6.jar
+super-csv/2.2.0//super-csv-2.2.0.jar
+threeten-extra/1.5.0//threeten-extra-1.5.0.jar
+transaction-api/1.1//transaction-api-1.1.jar
+univocity-parsers/2.8.3//univocity-parsers-2.8.3.jar
+velocity/1.5//velocity-1.5.jar
+xbean-asm7-shaded/4.15//xbean-asm7-shaded-4.15.jar
+xercesImpl/2.9.1//xercesImpl-2.9.1.jar
+xmlenc/0.52//xmlenc-0.52.jar
+xz/1.5//xz-1.5.jar
+zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar
+zookeeper/3.4.14//zookeeper-3.4.14.jar
+zstd-jni/1.4.4-3//zstd-jni-1.4.4-3.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
new file mode 100644
index 0000000000000..fe9699183c1a5
--- /dev/null
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -0,0 +1,241 @@
+HikariCP/2.5.1//HikariCP-2.5.1.jar
+JLargeArrays/1.5//JLargeArrays-1.5.jar
+JTransforms/3.1//JTransforms-3.1.jar
+RoaringBitmap/0.7.45//RoaringBitmap-0.7.45.jar
+ST4/4.0.4//ST4-4.0.4.jar
+accessors-smart/1.2//accessors-smart-1.2.jar
+activation/1.1.1//activation-1.1.1.jar
+aircompressor/0.10//aircompressor-0.10.jar
+algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar
+antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar
+antlr4-runtime/4.7.1//antlr4-runtime-4.7.1.jar
+aopalliance-repackaged/2.6.1//aopalliance-repackaged-2.6.1.jar
+aopalliance/1.0//aopalliance-1.0.jar
+arpack_combined_all/0.1//arpack_combined_all-0.1.jar
+arrow-format/0.15.1//arrow-format-0.15.1.jar
+arrow-memory/0.15.1//arrow-memory-0.15.1.jar
+arrow-vector/0.15.1//arrow-vector-0.15.1.jar
+audience-annotations/0.5.0//audience-annotations-0.5.0.jar
+automaton/1.11-8//automaton-1.11-8.jar
+avro-ipc/1.8.2//avro-ipc-1.8.2.jar
+avro-mapred/1.8.2/hadoop2/avro-mapred-1.8.2-hadoop2.jar
+avro/1.8.2//avro-1.8.2.jar
+bonecp/0.8.0.RELEASE//bonecp-0.8.0.RELEASE.jar
+breeze-macros_2.12/1.0//breeze-macros_2.12-1.0.jar
+breeze_2.12/1.0//breeze_2.12-1.0.jar
+cats-kernel_2.12/2.0.0-M4//cats-kernel_2.12-2.0.0-M4.jar
+chill-java/0.9.5//chill-java-0.9.5.jar
+chill_2.12/0.9.5//chill_2.12-0.9.5.jar
+commons-beanutils/1.9.4//commons-beanutils-1.9.4.jar
+commons-cli/1.2//commons-cli-1.2.jar
+commons-codec/1.10//commons-codec-1.10.jar
+commons-collections/3.2.2//commons-collections-3.2.2.jar
+commons-compiler/3.0.15//commons-compiler-3.0.15.jar
+commons-compress/1.8.1//commons-compress-1.8.1.jar
+commons-configuration2/2.1.1//commons-configuration2-2.1.1.jar
+commons-crypto/1.0.0//commons-crypto-1.0.0.jar
+commons-daemon/1.0.13//commons-daemon-1.0.13.jar
+commons-dbcp/1.4//commons-dbcp-1.4.jar
+commons-httpclient/3.1//commons-httpclient-3.1.jar
+commons-io/2.4//commons-io-2.4.jar
+commons-lang/2.6//commons-lang-2.6.jar
+commons-lang3/3.9//commons-lang3-3.9.jar
+commons-logging/1.1.3//commons-logging-1.1.3.jar
+commons-math3/3.4.1//commons-math3-3.4.1.jar
+commons-net/3.1//commons-net-3.1.jar
+commons-pool/1.5.4//commons-pool-1.5.4.jar
+commons-text/1.6//commons-text-1.6.jar
+compress-lzf/1.0.3//compress-lzf-1.0.3.jar
+core/1.1.2//core-1.1.2.jar
+curator-client/2.13.0//curator-client-2.13.0.jar
+curator-framework/2.13.0//curator-framework-2.13.0.jar
+curator-recipes/2.13.0//curator-recipes-2.13.0.jar
+datanucleus-api-jdo/4.2.4//datanucleus-api-jdo-4.2.4.jar
+datanucleus-core/4.1.17//datanucleus-core-4.1.17.jar
+datanucleus-rdbms/4.1.19//datanucleus-rdbms-4.1.19.jar
+derby/10.12.1.1//derby-10.12.1.1.jar
+dnsjava/2.1.7//dnsjava-2.1.7.jar
+dropwizard-metrics-hadoop-metrics2-reporter/0.1.2//dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar
+ehcache/3.3.1//ehcache-3.3.1.jar
+flatbuffers-java/1.9.0//flatbuffers-java-1.9.0.jar
+generex/1.0.2//generex-1.0.2.jar
+geronimo-jcache_1.0_spec/1.0-alpha-1//geronimo-jcache_1.0_spec-1.0-alpha-1.jar
+gson/2.2.4//gson-2.2.4.jar
+guava/14.0.1//guava-14.0.1.jar
+guice-servlet/4.0//guice-servlet-4.0.jar
+guice/4.0//guice-4.0.jar
+hadoop-annotations/3.2.0//hadoop-annotations-3.2.0.jar
+hadoop-auth/3.2.0//hadoop-auth-3.2.0.jar
+hadoop-client/3.2.0//hadoop-client-3.2.0.jar
+hadoop-common/3.2.0//hadoop-common-3.2.0.jar
+hadoop-hdfs-client/3.2.0//hadoop-hdfs-client-3.2.0.jar
+hadoop-mapreduce-client-common/3.2.0//hadoop-mapreduce-client-common-3.2.0.jar
+hadoop-mapreduce-client-core/3.2.0//hadoop-mapreduce-client-core-3.2.0.jar
+hadoop-mapreduce-client-jobclient/3.2.0//hadoop-mapreduce-client-jobclient-3.2.0.jar
+hadoop-yarn-api/3.2.0//hadoop-yarn-api-3.2.0.jar
+hadoop-yarn-client/3.2.0//hadoop-yarn-client-3.2.0.jar
+hadoop-yarn-common/3.2.0//hadoop-yarn-common-3.2.0.jar
+hadoop-yarn-registry/3.2.0//hadoop-yarn-registry-3.2.0.jar
+hadoop-yarn-server-common/3.2.0//hadoop-yarn-server-common-3.2.0.jar
+hadoop-yarn-server-web-proxy/3.2.0//hadoop-yarn-server-web-proxy-3.2.0.jar
+hive-beeline/2.3.6//hive-beeline-2.3.6.jar
+hive-cli/2.3.6//hive-cli-2.3.6.jar
+hive-common/2.3.6//hive-common-2.3.6.jar
+hive-exec/2.3.6/core/hive-exec-2.3.6-core.jar
+hive-jdbc/2.3.6//hive-jdbc-2.3.6.jar
+hive-llap-common/2.3.6//hive-llap-common-2.3.6.jar
+hive-metastore/2.3.6//hive-metastore-2.3.6.jar
+hive-serde/2.3.6//hive-serde-2.3.6.jar
+hive-service-rpc/2.3.6//hive-service-rpc-2.3.6.jar
+hive-shims-0.23/2.3.6//hive-shims-0.23-2.3.6.jar
+hive-shims-common/2.3.6//hive-shims-common-2.3.6.jar
+hive-shims-scheduler/2.3.6//hive-shims-scheduler-2.3.6.jar
+hive-shims/2.3.6//hive-shims-2.3.6.jar
+hive-storage-api/2.7.1//hive-storage-api-2.7.1.jar
+hive-vector-code-gen/2.3.6//hive-vector-code-gen-2.3.6.jar
+hk2-api/2.6.1//hk2-api-2.6.1.jar
+hk2-locator/2.6.1//hk2-locator-2.6.1.jar
+hk2-utils/2.6.1//hk2-utils-2.6.1.jar
+htrace-core4/4.1.0-incubating//htrace-core4-4.1.0-incubating.jar
+httpclient/4.5.6//httpclient-4.5.6.jar
+httpcore/4.4.12//httpcore-4.4.12.jar
+istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar
+ivy/2.4.0//ivy-2.4.0.jar
+jackson-annotations/2.10.0//jackson-annotations-2.10.0.jar
+jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar
+jackson-core/2.10.0//jackson-core-2.10.0.jar
+jackson-databind/2.10.0//jackson-databind-2.10.0.jar
+jackson-dataformat-yaml/2.10.0//jackson-dataformat-yaml-2.10.0.jar
+jackson-datatype-jsr310/2.10.3//jackson-datatype-jsr310-2.10.3.jar
+jackson-jaxrs-base/2.9.5//jackson-jaxrs-base-2.9.5.jar
+jackson-jaxrs-json-provider/2.9.5//jackson-jaxrs-json-provider-2.9.5.jar
+jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar
+jackson-module-jaxb-annotations/2.10.0//jackson-module-jaxb-annotations-2.10.0.jar
+jackson-module-paranamer/2.10.0//jackson-module-paranamer-2.10.0.jar
+jackson-module-scala_2.12/2.10.0//jackson-module-scala_2.12-2.10.0.jar
+jakarta.activation-api/1.2.1//jakarta.activation-api-1.2.1.jar
+jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar
+jakarta.inject/2.6.1//jakarta.inject-2.6.1.jar
+jakarta.validation-api/2.0.2//jakarta.validation-api-2.0.2.jar
+jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar
+jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar
+janino/3.0.15//janino-3.0.15.jar
+javassist/3.25.0-GA//javassist-3.25.0-GA.jar
+javax.inject/1//javax.inject-1.jar
+javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar
+javax.servlet-api/3.1.0//javax.servlet-api-3.1.0.jar
+javolution/5.5.1//javolution-5.5.1.jar
+jaxb-api/2.2.11//jaxb-api-2.2.11.jar
+jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar
+jcip-annotations/1.0-1//jcip-annotations-1.0-1.jar
+jcl-over-slf4j/1.7.16//jcl-over-slf4j-1.7.16.jar
+jdo-api/3.0.1//jdo-api-3.0.1.jar
+jersey-client/2.30//jersey-client-2.30.jar
+jersey-common/2.30//jersey-common-2.30.jar
+jersey-container-servlet-core/2.30//jersey-container-servlet-core-2.30.jar
+jersey-container-servlet/2.30//jersey-container-servlet-2.30.jar
+jersey-hk2/2.30//jersey-hk2-2.30.jar
+jersey-media-jaxb/2.30//jersey-media-jaxb-2.30.jar
+jersey-server/2.30//jersey-server-2.30.jar
+jline/2.14.6//jline-2.14.6.jar
+joda-time/2.10.5//joda-time-2.10.5.jar
+jodd-core/3.5.2//jodd-core-3.5.2.jar
+jpam/1.1//jpam-1.1.jar
+json-smart/2.3//json-smart-2.3.jar
+json/1.8//json-1.8.jar
+json4s-ast_2.12/3.6.6//json4s-ast_2.12-3.6.6.jar
+json4s-core_2.12/3.6.6//json4s-core_2.12-3.6.6.jar
+json4s-jackson_2.12/3.6.6//json4s-jackson_2.12-3.6.6.jar
+json4s-scalap_2.12/3.6.6//json4s-scalap_2.12-3.6.6.jar
+jsp-api/2.1//jsp-api-2.1.jar
+jsr305/3.0.0//jsr305-3.0.0.jar
+jta/1.1//jta-1.1.jar
+jul-to-slf4j/1.7.16//jul-to-slf4j-1.7.16.jar
+kerb-admin/1.0.1//kerb-admin-1.0.1.jar
+kerb-client/1.0.1//kerb-client-1.0.1.jar
+kerb-common/1.0.1//kerb-common-1.0.1.jar
+kerb-core/1.0.1//kerb-core-1.0.1.jar
+kerb-crypto/1.0.1//kerb-crypto-1.0.1.jar
+kerb-identity/1.0.1//kerb-identity-1.0.1.jar
+kerb-server/1.0.1//kerb-server-1.0.1.jar
+kerb-simplekdc/1.0.1//kerb-simplekdc-1.0.1.jar
+kerb-util/1.0.1//kerb-util-1.0.1.jar
+kerby-asn1/1.0.1//kerby-asn1-1.0.1.jar
+kerby-config/1.0.1//kerby-config-1.0.1.jar
+kerby-pkix/1.0.1//kerby-pkix-1.0.1.jar
+kerby-util/1.0.1//kerby-util-1.0.1.jar
+kerby-xdr/1.0.1//kerby-xdr-1.0.1.jar
+kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar
+kubernetes-client/4.9.2//kubernetes-client-4.9.2.jar
+kubernetes-model-common/4.9.2//kubernetes-model-common-4.9.2.jar
+kubernetes-model/4.9.2//kubernetes-model-4.9.2.jar
+leveldbjni-all/1.8//leveldbjni-all-1.8.jar
+libfb303/0.9.3//libfb303-0.9.3.jar
+libthrift/0.12.0//libthrift-0.12.0.jar
+log4j/1.2.17//log4j-1.2.17.jar
+logging-interceptor/3.12.6//logging-interceptor-3.12.6.jar
+lz4-java/1.7.1//lz4-java-1.7.1.jar
+machinist_2.12/0.6.8//machinist_2.12-0.6.8.jar
+macro-compat_2.12/1.1.1//macro-compat_2.12-1.1.1.jar
+mesos/1.4.0/shaded-protobuf/mesos-1.4.0-shaded-protobuf.jar
+metrics-core/4.1.1//metrics-core-4.1.1.jar
+metrics-graphite/4.1.1//metrics-graphite-4.1.1.jar
+metrics-jmx/4.1.1//metrics-jmx-4.1.1.jar
+metrics-json/4.1.1//metrics-json-4.1.1.jar
+metrics-jvm/4.1.1//metrics-jvm-4.1.1.jar
+minlog/1.3.0//minlog-1.3.0.jar
+mssql-jdbc/6.2.1.jre7//mssql-jdbc-6.2.1.jre7.jar
+netty-all/4.1.42.Final//netty-all-4.1.42.Final.jar
+nimbus-jose-jwt/4.41.1//nimbus-jose-jwt-4.41.1.jar
+objenesis/2.5.1//objenesis-2.5.1.jar
+okhttp/2.7.5//okhttp-2.7.5.jar
+okhttp/3.12.6//okhttp-3.12.6.jar
+okio/1.15.0//okio-1.15.0.jar
+opencsv/2.3//opencsv-2.3.jar
+orc-core/1.5.9//orc-core-1.5.9.jar
+orc-mapreduce/1.5.9//orc-mapreduce-1.5.9.jar
+orc-shims/1.5.9//orc-shims-1.5.9.jar
+oro/2.0.8//oro-2.0.8.jar
+osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
+paranamer/2.8//paranamer-2.8.jar
+parquet-column/1.10.1//parquet-column-1.10.1.jar
+parquet-common/1.10.1//parquet-common-1.10.1.jar
+parquet-encoding/1.10.1//parquet-encoding-1.10.1.jar
+parquet-format/2.4.0//parquet-format-2.4.0.jar
+parquet-hadoop/1.10.1//parquet-hadoop-1.10.1.jar
+parquet-jackson/1.10.1//parquet-jackson-1.10.1.jar
+protobuf-java/2.5.0//protobuf-java-2.5.0.jar
+py4j/0.10.8.1//py4j-0.10.8.1.jar
+pyrolite/4.30//pyrolite-4.30.jar
+re2j/1.1//re2j-1.1.jar
+scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar
+scala-compiler/2.12.10//scala-compiler-2.12.10.jar
+scala-library/2.12.10//scala-library-2.12.10.jar
+scala-parser-combinators_2.12/1.1.2//scala-parser-combinators_2.12-1.1.2.jar
+scala-reflect/2.12.10//scala-reflect-2.12.10.jar
+scala-xml_2.12/1.2.0//scala-xml_2.12-1.2.0.jar
+shapeless_2.12/2.3.3//shapeless_2.12-2.3.3.jar
+shims/0.7.45//shims-0.7.45.jar
+slf4j-api/1.7.16//slf4j-api-1.7.16.jar
+slf4j-log4j12/1.7.16//slf4j-log4j12-1.7.16.jar
+snakeyaml/1.24//snakeyaml-1.24.jar
+snappy-java/1.1.7.3//snappy-java-1.1.7.3.jar
+spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar
+spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar
+spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar
+spire_2.12/0.17.0-M1//spire_2.12-0.17.0-M1.jar
+stax-api/1.0.1//stax-api-1.0.1.jar
+stax2-api/3.1.4//stax2-api-3.1.4.jar
+stream/2.9.6//stream-2.9.6.jar
+super-csv/2.2.0//super-csv-2.2.0.jar
+threeten-extra/1.5.0//threeten-extra-1.5.0.jar
+token-provider/1.0.1//token-provider-1.0.1.jar
+transaction-api/1.1//transaction-api-1.1.jar
+univocity-parsers/2.8.3//univocity-parsers-2.8.3.jar
+velocity/1.5//velocity-1.5.jar
+woodstox-core/5.0.3//woodstox-core-5.0.3.jar
+xbean-asm7-shaded/4.15//xbean-asm7-shaded-4.15.jar
+xz/1.5//xz-1.5.jar
+zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar
+zookeeper/3.4.14//zookeeper-3.4.14.jar
+zstd-jni/1.4.4-3//zstd-jni-1.4.4-3.jar
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index c9850ca5124fc..578d09c112bfa 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -29,7 +29,8 @@
Spark Project Kubernetes
kubernetes
- 4.6.1
+
+ 4.9.2
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala
index 588cd9d40f9a0..7d88a9f7145ea 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala
@@ -19,7 +19,26 @@ package org.apache.spark.deploy.k8s
import org.apache.spark.SparkConf
import org.apache.spark.util.Utils
-private[spark] object KubernetesUtils {
+import scala.collection.JavaConverters._
+
+import io.fabric8.kubernetes.api.model.{Container, ContainerBuilder, ContainerStateRunning, ContainerStateTerminated, ContainerStateWaiting, ContainerStatus, Pod, PodBuilder, Quantity}
+import io.fabric8.kubernetes.client.KubernetesClient
+import org.apache.commons.codec.binary.Hex
+import org.apache.hadoop.fs.{FileSystem, Path}
+
+import org.apache.spark.{SparkConf, SparkException}
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.deploy.k8s.Config.KUBERNETES_FILE_UPLOAD_PATH
+import org.apache.spark.internal.Logging
+import org.apache.spark.launcher.SparkLauncher
+import org.apache.spark.resource.ResourceUtils
+import org.apache.spark.util.{Clock, SystemClock, Utils}
+import org.apache.spark.util.Utils.getHadoopFileSystem
+
+private[spark] object KubernetesUtils extends Logging {
+
+ private val systemClock = new SystemClock()
+ private lazy val RNG = new SecureRandom()
/**
* Extract and parse Spark configuration properties with a given name prefix and
@@ -44,7 +63,43 @@ private[spark] object KubernetesUtils {
* - File URIs with scheme local:// resolve to just the path of the URI.
* - Otherwise, the URIs are returned as-is.
*/
- def resolveFileUrisAndPath(fileUris: Iterable[String]): Iterable[String] = {
+ def uniqueID(clock: Clock = systemClock): String = {
+ val random = new Array[Byte](3)
+ synchronized {
+ RNG.nextBytes(random)
+ }
+
+ val time = java.lang.Long.toHexString(clock.getTimeMillis() & 0xFFFFFFFFFFL)
+ Hex.encodeHexString(random) + time
+ }
+
+ /**
+ * This function builds the Quantity objects for each resource in the Spark resource
+ * configs based on the component name(spark.driver.resource or spark.executor.resource).
+ * It assumes we can use the Kubernetes device plugin format: vendor-domain/resource.
+ * It returns a set with a tuple of vendor-domain/resource and Quantity for each resource.
+ */
+ def buildResourcesQuantities(
+ componentName: String,
+ sparkConf: SparkConf): Map[String, Quantity] = {
+ val requests = ResourceUtils.parseAllResourceRequests(sparkConf, componentName)
+ requests.map { request =>
+ val vendorDomain = if (request.vendor.isPresent()) {
+ request.vendor.get()
+ } else {
+ throw new SparkException(s"Resource: ${request.id.resourceName} was requested, " +
+ "but vendor was not specified.")
+ }
+ val quantity = new Quantity(request.amount.toString)
+ (KubernetesConf.buildKubernetesResourceName(vendorDomain, request.id.resourceName), quantity)
+ }.toMap
+ }
+
+ /**
+ * Upload files and modify their uris
+ */
+ def uploadAndTransformFileUris(fileUris: Iterable[String], conf: Option[SparkConf] = None)
+ : Iterable[String] = {
fileUris.map { uri =>
resolveFileUri(uri)
}
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
index 575bc54ffe2bb..dda1b5e868102 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
@@ -63,14 +63,10 @@ private[spark] class BasicDriverFeatureStep(
.build()
}
- val driverCpuQuantity = new QuantityBuilder(false)
- .withAmount(driverCpuCores)
- .build()
- val driverMemoryQuantity = new QuantityBuilder(false)
- .withAmount(s"${driverMemoryWithOverheadMiB}Mi")
- .build()
+ val driverCpuQuantity = new Quantity(driverCoresRequest)
+ val driverMemoryQuantity = new Quantity(s"${driverMemoryWithOverheadMiB}Mi")
val maybeCpuLimitQuantity = driverLimitCores.map { limitCores =>
- ("cpu", new QuantityBuilder(false).withAmount(limitCores).build())
+ ("cpu", new Quantity(limitCores))
}
val driverPort = conf.sparkConf.getInt("spark.driver.port", DEFAULT_DRIVER_PORT)
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
index d89995ba5e4f4..0a820cec97c28 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
@@ -85,27 +85,32 @@ private[spark] class BasicExecutorFeatureStep(
// name as the hostname. This preserves uniqueness since the end of name contains
// executorId
val hostname = name.substring(Math.max(0, name.length - 63))
- val executorMemoryQuantity = new QuantityBuilder(false)
- .withAmount(s"${executorMemoryTotal}Mi")
- .build()
- val executorCpuQuantity = new QuantityBuilder(false)
- .withAmount(executorCoresRequest)
- .build()
- val executorExtraClasspathEnv = executorExtraClasspath.map { cp =>
- new EnvVarBuilder()
- .withName(ENV_CLASSPATH)
- .withValue(cp)
- .build()
- }
- val executorExtraJavaOptionsEnv = kubernetesConf
- .get(EXECUTOR_JAVA_OPTIONS)
- .map { opts =>
- val subsOpts = Utils.substituteAppNExecIds(opts, kubernetesConf.appId,
- kubernetesConf.roleSpecificConf.executorId)
- val delimitedOpts = Utils.splitCommandString(subsOpts)
- delimitedOpts.zipWithIndex.map {
- case (opt, index) =>
- new EnvVarBuilder().withName(s"$ENV_JAVA_OPT_PREFIX$index").withValue(opt).build()
+ // Remove non-word characters from the start of the hostname
+ .replaceAll("^[^\\w]+", "")
+ // Replace dangerous characters in the remaining string with a safe alternative.
+ .replaceAll("[^\\w-]+", "_")
+
+ val executorMemoryQuantity = new Quantity(s"${executorMemoryTotal}Mi")
+ val executorCpuQuantity = new Quantity(executorCoresRequest)
+
+ val executorResourceQuantities =
+ KubernetesUtils.buildResourcesQuantities(SPARK_EXECUTOR_PREFIX,
+ kubernetesConf.sparkConf)
+
+ val executorEnv: Seq[EnvVar] = {
+ (Seq(
+ (ENV_DRIVER_URL, driverUrl),
+ (ENV_EXECUTOR_CORES, executorCores.toString),
+ (ENV_EXECUTOR_MEMORY, executorMemoryString),
+ (ENV_APPLICATION_ID, kubernetesConf.appId),
+ // This is to set the SPARK_CONF_DIR to be /opt/spark/conf
+ (ENV_SPARK_CONF_DIR, SPARK_CONF_DIR_INTERNAL),
+ (ENV_EXECUTOR_ID, kubernetesConf.executorId)
+ ) ++ kubernetesConf.environment).map { case (k, v) =>
+ new EnvVarBuilder()
+ .withName(k)
+ .withValue(v)
+ .build()
}
}.getOrElse(Seq.empty[EnvVar])
val executorEnv = (Seq(
@@ -152,9 +157,7 @@ private[spark] class BasicExecutorFeatureStep(
.addToArgs("executor")
.build()
val containerWithLimitCores = executorLimitCores.map { limitCores =>
- val executorCpuLimitQuantity = new QuantityBuilder(false)
- .withAmount(limitCores)
- .build()
+ val executorCpuLimitQuantity = new Quantity(limitCores)
new ContainerBuilder(executorContainer)
.editResources()
.addToLimits("cpu", executorCpuLimitQuantity)
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala
index 026b7eb774c15..bac75f3ff59e0 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala
@@ -70,7 +70,7 @@ private[spark] class MountVolumesFeatureStep(
new VolumeBuilder()
.withEmptyDir(
new EmptyDirVolumeSource(medium.getOrElse(""),
- new Quantity(sizeLimit.orNull)))
+ sizeLimit.map(new Quantity(_)).orNull))
}
val volume = volumeBuilder.withName(spec.volumeName).build()
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala
new file mode 100644
index 0000000000000..26bd317de8ec6
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s
+
+import java.io.File
+
+import io.fabric8.kubernetes.api.model.{Config => _, _}
+import io.fabric8.kubernetes.client.KubernetesClient
+import io.fabric8.kubernetes.client.dsl.{MixedOperation, PodResource}
+import org.mockito.ArgumentMatchers.any
+import org.mockito.Mockito.{mock, never, verify, when}
+import scala.collection.JavaConverters._
+
+import org.apache.spark.{SparkConf, SparkException, SparkFunSuite}
+import org.apache.spark.deploy.k8s._
+import org.apache.spark.internal.config.ConfigEntry
+
+abstract class PodBuilderSuite extends SparkFunSuite {
+
+ protected def templateFileConf: ConfigEntry[_]
+
+ protected def buildPod(sparkConf: SparkConf, client: KubernetesClient): SparkPod
+
+ private val baseConf = new SparkConf(false)
+ .set(Config.CONTAINER_IMAGE, "spark-executor:latest")
+
+ test("use empty initial pod if template is not specified") {
+ val client = mock(classOf[KubernetesClient])
+ buildPod(baseConf.clone(), client)
+ verify(client, never()).pods()
+ }
+
+ test("load pod template if specified") {
+ val client = mockKubernetesClient()
+ val sparkConf = baseConf.clone().set(templateFileConf.key, "template-file.yaml")
+ val pod = buildPod(sparkConf, client)
+ verifyPod(pod)
+ }
+
+ test("complain about misconfigured pod template") {
+ val client = mockKubernetesClient(
+ new PodBuilder()
+ .withNewMetadata()
+ .addToLabels("test-label-key", "test-label-value")
+ .endMetadata()
+ .build())
+ val sparkConf = baseConf.clone().set(templateFileConf.key, "template-file.yaml")
+ val exception = intercept[SparkException] {
+ buildPod(sparkConf, client)
+ }
+ assert(exception.getMessage.contains("Could not load pod from template file."))
+ }
+
+ private def mockKubernetesClient(pod: Pod = podWithSupportedFeatures()): KubernetesClient = {
+ val kubernetesClient = mock(classOf[KubernetesClient])
+ val pods =
+ mock(classOf[MixedOperation[Pod, PodList, DoneablePod, PodResource[Pod, DoneablePod]]])
+ val podResource = mock(classOf[PodResource[Pod, DoneablePod]])
+ when(kubernetesClient.pods()).thenReturn(pods)
+ when(pods.load(any(classOf[File]))).thenReturn(podResource)
+ when(podResource.get()).thenReturn(pod)
+ kubernetesClient
+ }
+
+ private def verifyPod(pod: SparkPod): Unit = {
+ val metadata = pod.pod.getMetadata
+ assert(metadata.getLabels.containsKey("test-label-key"))
+ assert(metadata.getAnnotations.containsKey("test-annotation-key"))
+ assert(metadata.getNamespace === "namespace")
+ assert(metadata.getOwnerReferences.asScala.exists(_.getName == "owner-reference"))
+ val spec = pod.pod.getSpec
+ assert(!spec.getContainers.asScala.exists(_.getName == "executor-container"))
+ assert(spec.getDnsPolicy === "dns-policy")
+ assert(spec.getHostAliases.asScala.exists(_.getHostnames.asScala.exists(_ == "hostname")))
+ assert(spec.getImagePullSecrets.asScala.exists(_.getName == "local-reference"))
+ assert(spec.getInitContainers.asScala.exists(_.getName == "init-container"))
+ assert(spec.getNodeName == "node-name")
+ assert(spec.getNodeSelector.get("node-selector-key") === "node-selector-value")
+ assert(spec.getSchedulerName === "scheduler")
+ assert(spec.getSecurityContext.getRunAsUser === 1000L)
+ assert(spec.getServiceAccount === "service-account")
+ assert(spec.getSubdomain === "subdomain")
+ assert(spec.getTolerations.asScala.exists(_.getKey == "toleration-key"))
+ assert(spec.getVolumes.asScala.exists(_.getName == "test-volume"))
+ val container = pod.container
+ assert(container.getName === "executor-container")
+ assert(container.getArgs.contains("arg"))
+ assert(container.getCommand.equals(List("command").asJava))
+ assert(container.getEnv.asScala.exists(_.getName == "env-key"))
+ assert(container.getResources.getLimits.get("gpu") === new Quantity("1"))
+ assert(container.getSecurityContext.getRunAsNonRoot)
+ assert(container.getStdin)
+ assert(container.getTerminationMessagePath === "termination-message-path")
+ assert(container.getTerminationMessagePolicy === "termination-message-policy")
+ assert(pod.container.getVolumeMounts.asScala.exists(_.getName == "test-volume"))
+ }
+
+ private def podWithSupportedFeatures(): Pod = {
+ new PodBuilder()
+ .withNewMetadata()
+ .addToLabels("test-label-key", "test-label-value")
+ .addToAnnotations("test-annotation-key", "test-annotation-value")
+ .withNamespace("namespace")
+ .addNewOwnerReference()
+ .withController(true)
+ .withName("owner-reference")
+ .endOwnerReference()
+ .endMetadata()
+ .withNewSpec()
+ .withDnsPolicy("dns-policy")
+ .withHostAliases(new HostAliasBuilder().withHostnames("hostname").build())
+ .withImagePullSecrets(
+ new LocalObjectReferenceBuilder().withName("local-reference").build())
+ .withInitContainers(new ContainerBuilder().withName("init-container").build())
+ .withNodeName("node-name")
+ .withNodeSelector(Map("node-selector-key" -> "node-selector-value").asJava)
+ .withSchedulerName("scheduler")
+ .withNewSecurityContext()
+ .withRunAsUser(1000L)
+ .endSecurityContext()
+ .withServiceAccount("service-account")
+ .withSubdomain("subdomain")
+ .withTolerations(new TolerationBuilder()
+ .withKey("toleration-key")
+ .withOperator("Equal")
+ .withEffect("NoSchedule")
+ .build())
+ .addNewVolume()
+ .withNewHostPath()
+ .withPath("/test")
+ .endHostPath()
+ .withName("test-volume")
+ .endVolume()
+ .addNewContainer()
+ .withArgs("arg")
+ .withCommand("command")
+ .addNewEnv()
+ .withName("env-key")
+ .withValue("env-value")
+ .endEnv()
+ .withImagePullPolicy("Always")
+ .withName("executor-container")
+ .withNewResources()
+ .withLimits(Map("gpu" -> new Quantity("1")).asJava)
+ .endResources()
+ .withNewSecurityContext()
+ .withRunAsNonRoot(true)
+ .endSecurityContext()
+ .withStdin(true)
+ .withTerminationMessagePath("termination-message-path")
+ .withTerminationMessagePolicy("termination-message-policy")
+ .addToVolumeMounts(
+ new VolumeMountBuilder()
+ .withName("test-volume")
+ .withMountPath("/test")
+ .build())
+ .endContainer()
+ .endSpec()
+ .build()
+ }
+
+}
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
index 0968cce971c31..20413a40240dc 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
@@ -18,7 +18,7 @@ package org.apache.spark.deploy.k8s.features
import scala.collection.JavaConverters._
-import io.fabric8.kubernetes.api.model.{ContainerPort, ContainerPortBuilder, LocalObjectReferenceBuilder}
+import io.fabric8.kubernetes.api.model.{ContainerPort, ContainerPortBuilder, LocalObjectReferenceBuilder, Quantity}
import org.apache.spark.{SparkConf, SparkFunSuite}
import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesDriverSpecificConf, SparkPod}
@@ -114,11 +114,14 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
val resourceRequirements = configuredPod.container.getResources
val requests = resourceRequirements.getRequests.asScala
- assert(requests("cpu").getAmount === "2")
- assert(requests("memory").getAmount === "456Mi")
+ assert(amountAndFormat(requests("cpu")) === "2")
+ assert(amountAndFormat(requests("memory")) === "456Mi")
val limits = resourceRequirements.getLimits.asScala
- assert(limits("memory").getAmount === "456Mi")
- assert(limits("cpu").getAmount === "4")
+ assert(amountAndFormat(limits("memory")) === "456Mi")
+ assert(amountAndFormat(limits("cpu")) === "4")
+ resources.foreach { case (k8sName, testRInfo) =>
+ assert(amountAndFormat(limits(k8sName)) === testRInfo.count)
+ }
val driverPodMetadata = configuredPod.pod.getMetadata
assert(driverPodMetadata.getName === "spark-driver-pod")
@@ -133,6 +136,38 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
assert(featureStep.getAdditionalPodSystemProperties() === expectedSparkConf)
}
+ test("Check driver pod respects kubernetes driver request cores") {
+ val sparkConf = new SparkConf()
+ .set(KUBERNETES_DRIVER_POD_NAME, "spark-driver-pod")
+ .set(CONTAINER_IMAGE, "spark-driver:latest")
+
+ val basePod = SparkPod.initialPod()
+ // if spark.driver.cores is not set default is 1
+ val requests1 = new BasicDriverFeatureStep(KubernetesTestConf.createDriverConf(sparkConf))
+ .configurePod(basePod)
+ .container.getResources
+ .getRequests.asScala
+ assert(amountAndFormat(requests1("cpu")) === "1")
+
+ // if spark.driver.cores is set it should be used
+ sparkConf.set(DRIVER_CORES, 10)
+ val requests2 = new BasicDriverFeatureStep(KubernetesTestConf.createDriverConf(sparkConf))
+ .configurePod(basePod)
+ .container.getResources
+ .getRequests.asScala
+ assert(amountAndFormat(requests2("cpu")) === "10")
+
+ // spark.kubernetes.driver.request.cores should be preferred over spark.driver.cores
+ Seq("0.1", "100m").foreach { value =>
+ sparkConf.set(KUBERNETES_DRIVER_REQUEST_CORES, value)
+ val requests3 = new BasicDriverFeatureStep(KubernetesTestConf.createDriverConf(sparkConf))
+ .configurePod(basePod)
+ .container.getResources
+ .getRequests.asScala
+ assert(amountAndFormat(requests3("cpu")) === value)
+ }
+ }
+
test("Check appropriate entrypoint rerouting for various bindings") {
val javaSparkConf = new SparkConf()
.set(org.apache.spark.internal.config.DRIVER_MEMORY.key, "4g")
@@ -179,37 +214,35 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
val configuredPythonPod = pythonFeatureStep.configurePod(basePod)
}
- test("Additional system properties resolve jars and set cluster-mode confs.") {
- val allJars = Seq("local:///opt/spark/jar1.jar", "hdfs:///opt/spark/jar2.jar")
- val allFiles = Seq("https://localhost:9000/file1.txt", "local:///opt/spark/file2.txt")
- val sparkConf = new SparkConf()
- .set(KUBERNETES_DRIVER_POD_NAME, "spark-driver-pod")
- .setJars(allJars)
- .set("spark.files", allFiles.mkString(","))
- .set(CONTAINER_IMAGE, "spark-driver:latest")
- val kubernetesConf = KubernetesConf(
- sparkConf,
- emptyDriverSpecificConf,
- RESOURCE_NAME_PREFIX,
- APP_ID,
- DRIVER_LABELS,
- DRIVER_ANNOTATIONS,
- Map.empty,
- Map.empty,
- DRIVER_ENVS,
- Nil,
- allFiles)
+ // Memory overhead tests. Tuples are:
+ // test name, main resource, overhead factor, expected factor
+ Seq(
+ ("java", JavaMainAppResource(None), None, MEMORY_OVERHEAD_FACTOR.defaultValue.get),
+ ("python default", PythonMainAppResource(null), None, NON_JVM_MEMORY_OVERHEAD_FACTOR),
+ ("python w/ override", PythonMainAppResource(null), Some(0.9d), 0.9d),
+ ("r default", RMainAppResource(null), None, NON_JVM_MEMORY_OVERHEAD_FACTOR)
+ ).foreach { case (name, resource, factor, expectedFactor) =>
+ test(s"memory overhead factor: $name") {
+ // Choose a driver memory where the default memory overhead is > MEMORY_OVERHEAD_MIN_MIB
+ val driverMem = MEMORY_OVERHEAD_MIN_MIB / MEMORY_OVERHEAD_FACTOR.defaultValue.get * 2
- val step = new BasicDriverFeatureStep(kubernetesConf)
- val additionalProperties = step.getAdditionalPodSystemProperties()
- val expectedSparkConf = Map(
- KUBERNETES_DRIVER_POD_NAME.key -> "spark-driver-pod",
- "spark.app.id" -> APP_ID,
- KUBERNETES_EXECUTOR_POD_NAME_PREFIX.key -> RESOURCE_NAME_PREFIX,
- "spark.kubernetes.submitInDriver" -> "true",
- "spark.jars" -> "/opt/spark/jar1.jar,hdfs:///opt/spark/jar2.jar",
- "spark.files" -> "https://localhost:9000/file1.txt,/opt/spark/file2.txt")
- assert(additionalProperties === expectedSparkConf)
+ // main app resource, overhead factor
+ val sparkConf = new SparkConf(false)
+ .set(CONTAINER_IMAGE, "spark-driver:latest")
+ .set(DRIVER_MEMORY.key, s"${driverMem.toInt}m")
+ factor.foreach { value => sparkConf.set(MEMORY_OVERHEAD_FACTOR, value) }
+ val conf = KubernetesTestConf.createDriverConf(
+ sparkConf = sparkConf,
+ mainAppResource = resource)
+ val step = new BasicDriverFeatureStep(conf)
+ val pod = step.configurePod(SparkPod.initialPod())
+ val mem = amountAndFormat(pod.container.getResources.getRequests.get("memory"))
+ val expected = (driverMem + driverMem * expectedFactor).toInt
+ assert(mem === s"${expected}Mi")
+
+ val systemProperties = step.getAdditionalPodSystemProperties()
+ assert(systemProperties(MEMORY_OVERHEAD_FACTOR.key) === expectedFactor.toString)
+ }
}
def containerPort(name: String, portNumber: Int): ContainerPort =
@@ -218,4 +251,6 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
.withContainerPort(portNumber)
.withProtocol("TCP")
.build()
+
+ private def amountAndFormat(quantity: Quantity): String = quantity.getAmount + quantity.getFormat
}
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
index 63b237b9dfe46..c7db3dfd51135 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
@@ -78,6 +78,56 @@ class BasicExecutorFeatureStepSuite
.set("spark.kubernetes.resource.type", "java")
}
+ private def newExecutorConf(
+ environment: Map[String, String] = Map.empty): KubernetesExecutorConf = {
+ KubernetesTestConf.createExecutorConf(
+ sparkConf = baseConf,
+ driverPod = Some(DRIVER_POD),
+ labels = LABELS,
+ environment = environment)
+ }
+
+ test("test spark resource missing vendor") {
+ baseConf.set(EXECUTOR_GPU_ID.amountConf, "2")
+ val step = new BasicExecutorFeatureStep(newExecutorConf(), new SecurityManager(baseConf))
+ val error = intercept[SparkException] {
+ val executor = step.configurePod(SparkPod.initialPod())
+ }.getMessage()
+ assert(error.contains("Resource: gpu was requested, but vendor was not specified"))
+ }
+
+ test("test spark resource missing amount") {
+ baseConf.set(EXECUTOR_GPU_ID.vendorConf, "nvidia.com")
+
+ val step = new BasicExecutorFeatureStep(newExecutorConf(), new SecurityManager(baseConf))
+ val error = intercept[SparkException] {
+ val executor = step.configurePod(SparkPod.initialPod())
+ }.getMessage()
+ assert(error.contains("You must specify an amount for gpu"))
+ }
+
+ test("basic executor pod with resources") {
+ val fpgaResourceID = new ResourceID(SPARK_EXECUTOR_PREFIX, FPGA)
+ val gpuExecutorResourceID = new ResourceID(SPARK_EXECUTOR_PREFIX, GPU)
+ val gpuResources =
+ Map(("nvidia.com/gpu" -> TestResourceInformation(gpuExecutorResourceID, "2", "nvidia.com")),
+ ("foo.com/fpga" -> TestResourceInformation(fpgaResourceID, "1", "foo.com")))
+ gpuResources.foreach { case (_, testRInfo) =>
+ baseConf.set(testRInfo.rId.amountConf, testRInfo.count)
+ baseConf.set(testRInfo.rId.vendorConf, testRInfo.vendor)
+ }
+ val step = new BasicExecutorFeatureStep(newExecutorConf(), new SecurityManager(baseConf))
+ val executor = step.configurePod(SparkPod.initialPod())
+
+ assert(executor.container.getResources.getLimits.size() === 3)
+ assert(amountAndFormat(executor.container.getResources
+ .getLimits.get("memory")) === "1408Mi")
+ gpuResources.foreach { case (k8sName, testRInfo) =>
+ assert(amountAndFormat(
+ executor.container.getResources.getLimits.get(k8sName)) === testRInfo.count)
+ }
+ }
+
test("basic executor pod has reasonable defaults") {
val step = new BasicExecutorFeatureStep(
KubernetesConf(
@@ -104,8 +154,8 @@ class BasicExecutorFeatureStepSuite
assert(executor.container.getImage === EXECUTOR_IMAGE)
assert(executor.container.getVolumeMounts.isEmpty)
assert(executor.container.getResources.getLimits.size() === 1)
- assert(executor.container.getResources
- .getLimits.get("memory").getAmount === "1408Mi")
+ assert(amountAndFormat(executor.container.getResources
+ .getLimits.get("memory")) === "1408Mi")
// The pod has no node selector, volumes.
assert(executor.pod.getSpec.getNodeSelector.isEmpty)
@@ -182,7 +232,7 @@ class BasicExecutorFeatureStepSuite
Seq.empty[String]))
val executor = step.configurePod(SparkPod.initialPod())
// This is checking that basic executor + executorMemory = 1408 + 42 = 1450
- assert(executor.container.getResources.getRequests.get("memory").getAmount === "1450Mi")
+ assert(amountAndFormat(executor.container.getResources.getRequests.get("memory")) === "1450Mi")
}
// There is always exactly one controller reference, and it points to the driver pod.
@@ -209,4 +259,6 @@ class BasicExecutorFeatureStepSuite
}.toMap
assert(defaultEnvs === mapEnvs)
}
+
+ private def amountAndFormat(quantity: Quantity): String = quantity.getAmount + quantity.getFormat
}
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
index d309aa94ec115..94fab451a31d3 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
@@ -92,7 +92,8 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
assert(configuredPod.pod.getSpec.getVolumes.size() === 1)
val emptyDir = configuredPod.pod.getSpec.getVolumes.get(0).getEmptyDir
assert(emptyDir.getMedium === "Memory")
- assert(emptyDir.getSizeLimit.getAmount === "6G")
+ assert(emptyDir.getSizeLimit.getAmount === "6")
+ assert(emptyDir.getSizeLimit.getFormat === "G")
assert(configuredPod.container.getVolumeMounts.size() === 1)
assert(configuredPod.container.getVolumeMounts.get(0).getMountPath === "/tmp")
assert(configuredPod.container.getVolumeMounts.get(0).getName === "testVolume")
@@ -113,7 +114,7 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
assert(configuredPod.pod.getSpec.getVolumes.size() === 1)
val emptyDir = configuredPod.pod.getSpec.getVolumes.get(0).getEmptyDir
assert(emptyDir.getMedium === "")
- assert(emptyDir.getSizeLimit.getAmount === null)
+ assert(emptyDir.getSizeLimit === null)
assert(configuredPod.container.getVolumeMounts.size() === 1)
assert(configuredPod.container.getVolumeMounts.get(0).getMountPath === "/tmp")
assert(configuredPod.container.getVolumeMounts.get(0).getName === "testVolume")
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index eac80423e36cc..14288d1b38a0f 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -29,7 +29,7 @@
1.3.0
1.4.0
- 4.6.1
+ 4.9.2
3.2.2
1.0
kubernetes-integration-tests
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
new file mode 100644
index 0000000000000..289fb9641295d
--- /dev/null
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
@@ -0,0 +1,246 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s.integrationtest
+
+import java.net.URL
+
+import scala.collection.JavaConverters._
+
+import com.amazonaws.auth.BasicAWSCredentials
+import com.amazonaws.services.s3.AmazonS3Client
+import io.fabric8.kubernetes.api.model._
+import io.fabric8.kubernetes.api.model.apps.StatefulSetBuilder
+import org.scalatest.concurrent.{Eventually, PatienceConfiguration}
+import org.scalatest.time.{Minutes, Span}
+
+import org.apache.spark.SparkException
+import org.apache.spark.deploy.k8s.integrationtest.DepsTestsSuite.{DEPS_TIMEOUT, FILE_CONTENTS, HOST_PATH}
+import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite.{INTERVAL, MinikubeTag, TIMEOUT}
+import org.apache.spark.deploy.k8s.integrationtest.backend.minikube.Minikube
+
+private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite =>
+ import KubernetesSuite.k8sTestTag
+
+ val cName = "ceph-nano"
+ val svcName = s"$cName-s3"
+ val bucket = "spark"
+
+ private def getCephContainer(): Container = {
+ val envVars = Map ( "NETWORK_AUTO_DETECT" -> "4",
+ "RGW_FRONTEND_PORT" -> "8000",
+ "SREE_PORT" -> "5001",
+ "CEPH_DEMO_UID" -> "nano",
+ "CEPH_DAEMON" -> "demo",
+ "DEBUG" -> "verbose"
+ ).map( envV =>
+ new EnvVarBuilder()
+ .withName(envV._1)
+ .withValue(envV._2)
+ .build()
+ ).toArray
+
+ val resources = Map(
+ "cpu" -> new Quantity("1"),
+ "memory" -> new Quantity("512M")
+ ).asJava
+
+ new ContainerBuilder()
+ .withImage("ceph/daemon:latest")
+ .withImagePullPolicy("Always")
+ .withName(cName)
+ .withPorts(new ContainerPortBuilder()
+ .withName(svcName)
+ .withProtocol("TCP")
+ .withContainerPort(8000)
+ .build()
+ )
+ .withResources(new ResourceRequirementsBuilder()
+ .withLimits(resources)
+ .withRequests(resources)
+ .build()
+ )
+ .withEnv(envVars: _*)
+ .build()
+ }
+
+ // Based on https://github.com/ceph/cn
+ private def setupCephStorage(): Unit = {
+ val labels = Map("app" -> "ceph", "daemon" -> "nano").asJava
+ val cephService = new ServiceBuilder()
+ .withNewMetadata()
+ .withName(svcName)
+ .withLabels(labels)
+ .endMetadata()
+ .withNewSpec()
+ .withPorts(new ServicePortBuilder()
+ .withName("https")
+ .withPort(8000)
+ .withProtocol("TCP")
+ .withTargetPort(new IntOrString(8000))
+ .build()
+ )
+ .withType("NodePort")
+ .withSelector(labels)
+ .endSpec()
+ .build()
+
+ val cephStatefulSet = new StatefulSetBuilder()
+ .withNewMetadata()
+ .withName(cName)
+ .withLabels(labels)
+ .endMetadata()
+ .withNewSpec()
+ .withReplicas(1)
+ .withNewSelector()
+ .withMatchLabels(Map("app" -> "ceph").asJava)
+ .endSelector()
+ .withServiceName(cName)
+ .withNewTemplate()
+ .withNewMetadata()
+ .withName(cName)
+ .withLabels(labels)
+ .endMetadata()
+ .withNewSpec()
+ .withContainers(getCephContainer())
+ .endSpec()
+ .endTemplate()
+ .endSpec()
+ .build()
+
+ kubernetesTestComponents
+ .kubernetesClient
+ .services()
+ .create(cephService)
+
+ kubernetesTestComponents
+ .kubernetesClient
+ .apps()
+ .statefulSets()
+ .create(cephStatefulSet)
+ }
+
+ private def deleteCephStorage(): Unit = {
+ kubernetesTestComponents
+ .kubernetesClient
+ .apps()
+ .statefulSets()
+ .withName(cName)
+ .delete()
+
+ kubernetesTestComponents
+ .kubernetesClient
+ .services()
+ .withName(svcName)
+ .delete()
+ }
+
+ test("Launcher client dependencies", k8sTestTag, MinikubeTag) {
+ val fileName = Utils.createTempFile(FILE_CONTENTS, HOST_PATH)
+ try {
+ setupCephStorage()
+ val cephUrlStr = getServiceUrl(svcName)
+ val cephUrl = new URL(cephUrlStr)
+ val cephHost = cephUrl.getHost
+ val cephPort = cephUrl.getPort
+ val examplesJar = Utils.getExamplesJarAbsolutePath(sparkHomeDir)
+ val (accessKey, secretKey) = getCephCredentials()
+ sparkAppConf
+ .set("spark.hadoop.fs.s3a.access.key", accessKey)
+ .set("spark.hadoop.fs.s3a.secret.key", secretKey)
+ .set("spark.hadoop.fs.s3a.connection.ssl.enabled", "false")
+ .set("spark.hadoop.fs.s3a.endpoint", s"$cephHost:$cephPort")
+ .set("spark.kubernetes.file.upload.path", s"s3a://$bucket")
+ .set("spark.files", s"$HOST_PATH/$fileName")
+ .set("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
+ .set("spark.jars.packages", "com.amazonaws:aws-java-sdk:" +
+ "1.7.4,org.apache.hadoop:hadoop-aws:2.7.6")
+ .set("spark.driver.extraJavaOptions", "-Divy.cache.dir=/tmp -Divy.home=/tmp")
+ createS3Bucket(accessKey, secretKey, cephUrlStr)
+ runSparkRemoteCheckAndVerifyCompletion(appResource = examplesJar,
+ appArgs = Array(fileName),
+ timeout = Option(DEPS_TIMEOUT))
+ } finally {
+ // make sure this always runs
+ deleteCephStorage()
+ }
+ }
+
+ // There isn't a cleaner way to get the credentials
+ // when ceph-nano runs on k8s
+ private def getCephCredentials(): (String, String) = {
+ Eventually.eventually(TIMEOUT, INTERVAL) {
+ val cephPod = kubernetesTestComponents
+ .kubernetesClient
+ .pods()
+ .withName(s"$cName-0")
+ .get()
+ implicit val podName: String = cephPod.getMetadata.getName
+ implicit val components: KubernetesTestComponents = kubernetesTestComponents
+ val contents = Utils.executeCommand("cat", "/nano_user_details")
+ (extractS3Key(contents, "access_key"), extractS3Key(contents, "secret_key"))
+ }
+ }
+
+ private def extractS3Key(data: String, key: String): String = {
+ data.split("\n")
+ .filter(_.contains(key))
+ .head
+ .split(":")
+ .last
+ .trim
+ .replaceAll("[,|\"]", "")
+ }
+
+ private def createS3Bucket(accessKey: String, secretKey: String, endPoint: String): Unit = {
+ Eventually.eventually(TIMEOUT, INTERVAL) {
+ try {
+ val credentials = new BasicAWSCredentials(accessKey, secretKey)
+ val s3client = new AmazonS3Client(credentials)
+ s3client.setEndpoint(endPoint)
+ s3client.createBucket(bucket)
+ } catch {
+ case e: Exception =>
+ throw new SparkException(s"Failed to create bucket $bucket.", e)
+ }
+ }
+ }
+
+ private def getServiceUrl(serviceName: String): String = {
+ val fuzzyUrlMatcher = """^(.*?)([a-zA-Z]+://.*?)(\s*)$""".r
+ Eventually.eventually(TIMEOUT, INTERVAL) {
+ // ns is always available either random or provided by the user
+ val rawUrl = Minikube.minikubeServiceAction(
+ serviceName, "-n", kubernetesTestComponents.namespace, "--url")
+ val url = rawUrl match {
+ case fuzzyUrlMatcher(junk, url, extra) =>
+ logDebug(s"Service url matched junk ${junk} - url ${url} - extra ${extra}")
+ url
+ case _ =>
+ logWarning(s"Response from minikube ${rawUrl} did not match URL regex")
+ rawUrl
+ }
+ url
+ }
+ }
+}
+
+private[spark] object DepsTestsSuite {
+ val HOST_PATH = "/tmp"
+ val FILE_CONTENTS = "test deps"
+ // increase the default because jar resolution takes time in the container
+ val DEPS_TIMEOUT = PatienceConfiguration.Timeout(Span(4, Minutes))
+}
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PVTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PVTestsSuite.scala
new file mode 100644
index 0000000000000..4b4dff93f5742
--- /dev/null
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PVTestsSuite.scala
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s.integrationtest
+
+import scala.collection.JavaConverters._
+
+import io.fabric8.kubernetes.api.model._
+import io.fabric8.kubernetes.api.model.storage.StorageClassBuilder
+import org.scalatest.concurrent.{Eventually, PatienceConfiguration}
+import org.scalatest.time.{Milliseconds, Span}
+
+import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite._
+
+private[spark] trait PVTestsSuite { k8sSuite: KubernetesSuite =>
+ import PVTestsSuite._
+
+ private def setupLocalStorage(): Unit = {
+ val scBuilder = new StorageClassBuilder()
+ .withKind("StorageClass")
+ .withApiVersion("storage.k8s.io/v1")
+ .withNewMetadata()
+ .withName(STORAGE_NAME)
+ .endMetadata()
+ .withProvisioner("kubernetes.io/no-provisioner")
+ .withVolumeBindingMode("WaitForFirstConsumer")
+
+ val pvBuilder = new PersistentVolumeBuilder()
+ .withKind("PersistentVolume")
+ .withApiVersion("v1")
+ .withNewMetadata()
+ .withName("test-local-pv")
+ .endMetadata()
+ .withNewSpec()
+ .withCapacity(Map("storage" -> new Quantity("1Gi")).asJava)
+ .withAccessModes("ReadWriteOnce")
+ .withPersistentVolumeReclaimPolicy("Retain")
+ .withStorageClassName("test-local-storage")
+ .withLocal(new LocalVolumeSourceBuilder().withPath(VM_PATH).build())
+ .withNewNodeAffinity()
+ .withNewRequired()
+ .withNodeSelectorTerms(new NodeSelectorTermBuilder()
+ .withMatchExpressions(new NodeSelectorRequirementBuilder()
+ .withKey("kubernetes.io/hostname")
+ .withOperator("In")
+ .withValues("minikube", "docker-for-desktop", "docker-desktop").build()).build())
+ .endRequired()
+ .endNodeAffinity()
+ .endSpec()
+
+ val pvcBuilder = new PersistentVolumeClaimBuilder()
+ .withKind("PersistentVolumeClaim")
+ .withApiVersion("v1")
+ .withNewMetadata()
+ .withName(PVC_NAME)
+ .endMetadata()
+ .withNewSpec()
+ .withAccessModes("ReadWriteOnce")
+ .withStorageClassName("test-local-storage")
+ .withResources(new ResourceRequirementsBuilder()
+ .withRequests(Map("storage" -> new Quantity("1Gi")).asJava).build())
+ .endSpec()
+
+ kubernetesTestComponents
+ .kubernetesClient
+ .storage()
+ .storageClasses()
+ .create(scBuilder.build())
+
+ kubernetesTestComponents
+ .kubernetesClient
+ .persistentVolumes()
+ .create(pvBuilder.build())
+
+ kubernetesTestComponents
+ .kubernetesClient
+ .persistentVolumeClaims()
+ .create(pvcBuilder.build())
+ }
+
+ private def deleteLocalStorage(): Unit = {
+ kubernetesTestComponents
+ .kubernetesClient
+ .persistentVolumeClaims()
+ .withName(PVC_NAME)
+ .delete()
+
+ kubernetesTestComponents
+ .kubernetesClient
+ .persistentVolumes()
+ .withName(PV_NAME)
+ .delete()
+
+ kubernetesTestComponents
+ .kubernetesClient
+ .storage()
+ .storageClasses()
+ .withName(STORAGE_NAME)
+ .delete()
+ }
+
+ private def checkPVs(pod: Pod, file: String) = {
+ Eventually.eventually(TIMEOUT, INTERVAL) {
+ implicit val podName: String = pod.getMetadata.getName
+ implicit val components: KubernetesTestComponents = kubernetesTestComponents
+ val contents = Utils.executeCommand("cat", s"$CONTAINER_MOUNT_PATH/$file")
+ assert(contents.toString.trim.equals(FILE_CONTENTS))
+ }
+ }
+
+ test("PVs with local storage", k8sTestTag, MinikubeTag) {
+ sparkAppConf
+ .set(s"spark.kubernetes.driver.volumes.persistentVolumeClaim.data.mount.path",
+ CONTAINER_MOUNT_PATH)
+ .set(s"spark.kubernetes.driver.volumes.persistentVolumeClaim.data.options.claimName",
+ PVC_NAME)
+ .set(s"spark.kubernetes.executor.volumes.persistentVolumeClaim.data.mount.path",
+ CONTAINER_MOUNT_PATH)
+ .set(s"spark.kubernetes.executor.volumes.persistentVolumeClaim.data.options.claimName",
+ PVC_NAME)
+ val file = Utils.createTempFile(FILE_CONTENTS, HOST_PATH)
+ try {
+ setupLocalStorage()
+ runDFSReadWriteAndVerifyCompletion(
+ FILE_CONTENTS.split(" ").length,
+ driverPodChecker = (driverPod: Pod) => {
+ doBasicDriverPodCheck(driverPod)
+ checkPVs(driverPod, file)
+ },
+ executorPodChecker = (executorPod: Pod) => {
+ doBasicExecutorPodCheck(executorPod)
+ checkPVs(executorPod, file)
+ },
+ appArgs = Array(s"$CONTAINER_MOUNT_PATH/$file", s"$CONTAINER_MOUNT_PATH"),
+ interval = Some(PV_TESTS_INTERVAL)
+ )
+ } finally {
+ // make sure this always runs
+ deleteLocalStorage()
+ }
+ }
+}
+
+private[spark] object PVTestsSuite {
+ val STORAGE_NAME = "test-local-storage"
+ val PV_NAME = "test-local-pv"
+ val PVC_NAME = "test-local-pvc"
+ val CONTAINER_MOUNT_PATH = "/opt/spark/pv-tests"
+ val HOST_PATH = sys.env.getOrElse("PVC_TESTS_HOST_PATH", "/tmp")
+ val VM_PATH = sys.env.getOrElse("PVC_TESTS_VM_PATH", "/tmp")
+ val FILE_CONTENTS = "test PVs"
+ val PV_TESTS_INTERVAL = PatienceConfiguration.Interval(Span(10, Milliseconds))
+}