@@ -30,21 +30,7 @@ FWDIR="$(cd `dirname $0`/..; pwd)"
3030# Build up classpath
3131CLASSPATH=" $SPARK_CLASSPATH :$FWDIR /conf"
3232
33- # Support for interacting with Hive. Since hive pulls in a lot of dependencies that might break
34- # existing Spark applications, it is not included in the standard spark assembly. Instead, we only
35- # include it in the classpath if the user has explicitly requested it by running "sbt hive/assembly"
36- # Hopefully we will find a way to avoid uber-jars entirely and deploy only the needed packages in
37- # the future.
38- if [ -f " $FWDIR " /sql/hive/target/scala-$SCALA_VERSION /spark-hive-assembly-* .jar ]; then
39-
40- # Datanucleus jars do not work if only included in the uberjar as plugin.xml metadata is lost.
41- DATANUCLEUSJARS=$( JARS=(" $FWDIR /lib_managed/jars" /datanucleus-* .jar); IFS=:; echo " ${JARS[*]} " )
42- CLASSPATH=$CLASSPATH :$DATANUCLEUSJARS
43-
44- ASSEMBLY_DIR=" $FWDIR /sql/hive/target/scala-$SCALA_VERSION /"
45- else
46- ASSEMBLY_DIR=" $FWDIR /assembly/target/scala-$SCALA_VERSION /"
47- fi
33+ ASSEMBLY_DIR=" $FWDIR /assembly/target/scala-$SCALA_VERSION "
4834
4935# First check if we have a dependencies jar. If so, include binary classes with the deps jar
5036if [ -f " $ASSEMBLY_DIR " /spark-assembly* hadoop* -deps.jar ]; then
@@ -59,7 +45,7 @@ if [ -f "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar ]; then
5945 CLASSPATH=" $CLASSPATH :$FWDIR /sql/core/target/scala-$SCALA_VERSION /classes"
6046 CLASSPATH=" $CLASSPATH :$FWDIR /sql/hive/target/scala-$SCALA_VERSION /classes"
6147
62- DEPS_ASSEMBLY_JAR=` ls " $ASSEMBLY_DIR " /spark* -assembly* hadoop* -deps.jar`
48+ DEPS_ASSEMBLY_JAR=` ls " $ASSEMBLY_DIR " /spark-assembly* hadoop* -deps.jar`
6349 CLASSPATH=" $CLASSPATH :$DEPS_ASSEMBLY_JAR "
6450else
6551 # Else use spark-assembly jar from either RELEASE or assembly directory
7157 CLASSPATH=" $CLASSPATH :$ASSEMBLY_JAR "
7258fi
7359
60+ # When Hive support is needed, Datanucleus jars must be included on the classpath.
61+ # Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
62+ # Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
63+ # built with Hive, so first check if the datanucleus jars exist, and then ensure the current Spark
64+ # assembly is built for Hive, before actually populating the CLASSPATH with the jars.
65+ # Note that this check order is faster (by up to half a second) in the case where Hive is not used.
66+ num_datanucleus_jars=$( ls " $FWDIR " /lib_managed/jars/ | grep " datanucleus-.*\\ .jar" | wc -l)
67+ if [ $num_datanucleus_jars -gt 0 ]; then
68+ AN_ASSEMBLY_JAR=${ASSEMBLY_JAR:- $DEPS_ASSEMBLY_JAR }
69+ num_hive_files=$( jar tvf " $AN_ASSEMBLY_JAR " org/apache/hadoop/hive/ql/exec 2> /dev/null | wc -l)
70+ if [ $num_hive_files -gt 0 ]; then
71+ echo " Spark assembly has been built with Hive, including Datanucleus jars on classpath" 1>&2
72+ DATANUCLEUSJARS=$( echo " $FWDIR /lib_managed/jars" /datanucleus-* .jar | tr " " :)
73+ CLASSPATH=$CLASSPATH :$DATANUCLEUSJARS
74+ fi
75+ fi
76+
7477# Add test classes if we're running from SBT or Maven with SPARK_TESTING set to 1
7578if [[ $SPARK_TESTING == 1 ]]; then
7679 CLASSPATH=" $CLASSPATH :$FWDIR /core/target/scala-$SCALA_VERSION /test-classes"
0 commit comments