diff --git a/assembly/pom.xml b/assembly/pom.xml index abd8935339992..9f8caf4cf2da1 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -16,7 +16,8 @@ ~ limitations under the License. --> - + 4.0.0 org.apache.spark @@ -80,51 +81,21 @@ - org.apache.maven.plugins - maven-shade-plugin - - false - ${spark.jar} - - - *:* - - - - - *:* - - org.datanucleus:* - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - + maven-assembly-plugin + dist package - shade + single - - - - META-INF/services/org.apache.hadoop.fs.FileSystem - - - reference.conf - - - log4j.properties - - - - + ${project.artifactId}-${project.version} + + src/main/assembly/spark-dist.xml + @@ -153,16 +124,6 @@ - - hive - - - org.apache.spark - spark-hive_${scala.binary.version} - ${project.version} - - - spark-ganglia-lgpl diff --git a/assembly/src/main/assembly/spark-dist.xml b/assembly/src/main/assembly/spark-dist.xml new file mode 100644 index 0000000000000..3bab17f66db51 --- /dev/null +++ b/assembly/src/main/assembly/spark-dist.xml @@ -0,0 +1,97 @@ + + + spark-dist + + dir + + false + + + + + README + + + + + ${project.parent.basedir}/sbin/ + + /sbin + + **/* + + + + + ${project.parent.basedir}/ec2/ + + /ec2 + + **/* + + + + + ${project.parent.basedir}/bin/ + + /bin + + **/* + + + + + ${project.parent.basedir}/conf/ + + /conf + + **/*.template + **/slaves + + + + + ${project.parent.basedir}/python/ + + /python + + **/* + + + + + + + /share/spark/core + + org.apache.spark:*:jar + + + + /share/spark/core/lib + true + false + runtime + false + + org.apache.spark:*:jar + + + + + diff --git a/assembly/src/main/assembly/spark-examples-dist.xml b/assembly/src/main/assembly/spark-examples-dist.xml new file mode 100644 index 0000000000000..d31ad4ac00424 --- /dev/null +++ b/assembly/src/main/assembly/spark-examples-dist.xml @@ -0,0 +1,48 @@ + + + spark-examples-dist + + dir + + false + + + + /share/spark/examples + + org.apache.spark:spark-examples*:jar + + + + /share/spark/examples/lib + true + false + runtime + false + + org.apache.spark:*:jar + org.apache.hadoop:*:jar + org.slf4j:slf4j-api + org.slf4j:slf4j-log4j12 + log4j:log4j + org.scala-lang:*:jar + + + + + diff --git a/assembly/src/main/assembly/spark-hive-dist.xml b/assembly/src/main/assembly/spark-hive-dist.xml new file mode 100644 index 0000000000000..b6f23600e4c69 --- /dev/null +++ b/assembly/src/main/assembly/spark-hive-dist.xml @@ -0,0 +1,46 @@ + + + spark-hive-dist + + dir + + false + + + /share/spark/hive + + org.apache.spark:spark-hive*:jar + + + + /share/spark/hive/lib + true + false + runtime + false + + org.apache.spark:*:jar + org.apache.hadoop:*:jar + org.slf4j:slf4j-api + org.slf4j:slf4j-log4j12 + log4j:log4j + org.scala-lang:*:jar + + + + diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh index 7df43a555d562..65db73cba34f6 100755 --- a/bin/compute-classpath.sh +++ b/bin/compute-classpath.sh @@ -54,11 +54,27 @@ if [ -f "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar ]; then ASSEMBLY_JAR=$(ls "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar 2>/dev/null) else - # Else use spark-assembly jar from either RELEASE or assembly directory if [ -f "$FWDIR/RELEASE" ]; then - ASSEMBLY_JAR=$(ls "$FWDIR"/lib/spark-assembly*hadoop*.jar 2>/dev/null) + CLASSPATH="$CLASSPATH:$FWDIR/share/spark/core/*:$FWDIR/share/spark/core/lib/*" + if [ -d "$FWDIR/share/spark/hive" ]; then + CLASSPATH="$CLASSPATH:$FWDIR/share/spark/hive/*:$FWDIR/share/spark/hive/lib/*" + fi + if [ -d "$FWDIR/share/spark/examples" ]; then + CLASSPATH="$CLASSPATH:$FWDIR/share/spark/examples/*:$FWDIR/share/spark/examples/lib/*" + fi else - ASSEMBLY_JAR=$(ls "$ASSEMBLY_DIR"/spark-assembly*hadoop*.jar 2>/dev/null) + SPARK_DIST=`echo "$FWDIR"/assembly/target/*spark-dist` + if [ -d "$SPARK_DIST" ]; then + CLASSPATH="$CLASSPATH:$SPARK_DIST/share/spark/core/*:$SPARK_DIST/share/spark/core/lib/*" + fi + if [ -d "$FWDIR"/sql/hive/target/*spark-hive-dist/ ]; then + HVIE_DIST=`echo "$FWDIR"/sql/hive/target/*spark-hive-dist` + CLASSPATH="$CLASSPATH:$HVIE_DIST/share/spark/hive/*:$HVIE_DIST/share/spark/hive/lib/*" + fi + ASSEMBLY_JAR=`echo "$ASSEMBLY_DIR"/spark-assembly*hadoop*.jar` + if [ -f "$ASSEMBLY_JAR" ]; then + CLASSPATH="$CLASSPATH:$ASSEMBLY_JAR" + fi fi fi diff --git a/bin/spark-class b/bin/spark-class index 6480ccb58d6aa..302b6a89ac02c 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -110,18 +110,20 @@ export JAVA_OPTS if [ ! -f "$FWDIR/RELEASE" ]; then # Exit if the user hasn't compiled Spark - num_jars=$(ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/ | grep "spark-assembly.*hadoop.*.jar" | wc -l) - jars_list=$(ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/ | grep "spark-assembly.*hadoop.*.jar") - if [ "$num_jars" -eq "0" ]; then - echo "Failed to find Spark assembly in $FWDIR/assembly/target/scala-$SCALA_VERSION/" >&2 - echo "You need to build Spark before running this program." >&2 - exit 1 - fi - if [ "$num_jars" -gt "1" ]; then - echo "Found multiple Spark assembly jars in $FWDIR/assembly/target/scala-$SCALA_VERSION:" >&2 - echo "$jars_list" - echo "Please remove all but one jar." - exit 1 + if [ ! -d "$FWDIR"/assembly/target/*spark-dist/ ]; then + num_jars=$(ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/ | grep "spark-assembly.*hadoop.*.jar" | wc -l) + jars_list=$(ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/ | grep "spark-assembly.*hadoop.*.jar") + if [ "$num_jars" -eq "0" ]; then + echo "Failed to find Spark assembly in $FWDIR/assembly/target/" >&2 + echo "You need to build Spark with './make-distribution.sh' before running this program." >&2 + exit 1 + fi + if [ "$num_jars" -gt "1" ]; then + echo "Found multiple Spark assembly jars in $FWDIR/assembly/target/scala-$SCALA_VERSION:" >&2 + echo "$jars_list" + echo "Please remove all but one jar." + exit 1 + fi fi fi diff --git a/examples/pom.xml b/examples/pom.xml index 874bcd7916f35..ba4403f096ca1 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -16,7 +16,8 @@ ~ limitations under the License. --> - + 4.0.0 org.apache.spark @@ -183,42 +184,19 @@ org.apache.maven.plugins - maven-shade-plugin - - false - ${project.build.directory}/scala-${scala.binary.version}/spark-examples-${project.version}-hadoop${hadoop.version}.jar - - - *:* - - - - - *:* - - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - + maven-assembly-plugin + dist package - shade + single - - - - reference.conf - - - log4j.properties - - + ${project.artifactId}-${project.version} + + ../assembly/src/main/assembly/spark-examples-dist.xml + diff --git a/make-distribution.sh b/make-distribution.sh index 7a08d6b9151c4..7b78a2b10a3b7 100755 --- a/make-distribution.sh +++ b/make-distribution.sh @@ -164,19 +164,19 @@ ${BUILD_COMMAND} # Make directories rm -rf "$DISTDIR" -mkdir -p "$DISTDIR/lib" +mkdir -p "$DISTDIR" echo "Spark $VERSION built for Hadoop $SPARK_HADOOP_VERSION" > "$DISTDIR/RELEASE" # Copy jars -cp $FWDIR/assembly/target/scala*/*assembly*hadoop*.jar "$DISTDIR/lib/" -cp $FWDIR/examples/target/scala*/spark-examples*.jar "$DISTDIR/lib/" +cp -r $FWDIR/assembly/target/*spark-dist/* "$DISTDIR/" +cp -r $FWDIR/examples/target/*spark-examples-dist/* "$DISTDIR/" # Copy example sources (needed for python and SQL) -mkdir -p "$DISTDIR/examples/src/main" -cp -r $FWDIR/examples/src/main "$DISTDIR/examples/src/" +mkdir -p "$DISTDIR//share/spark/examples/src/main" +cp -r $FWDIR/examples/src/main "$DISTDIR//share/spark/examples/src/main" if [ "$SPARK_HIVE" == "true" ]; then - cp $FWDIR/lib_managed/jars/datanucleus*.jar "$DISTDIR/lib/" + cp -r $FWDIR/sql/hive/target/*spark-hive-dist/* "$DISTDIR/" fi # Copy license and ASF files @@ -187,15 +187,6 @@ if [ -e $FWDIR/CHANGES.txt ]; then cp "$FWDIR/CHANGES.txt" "$DISTDIR" fi -# Copy other things -mkdir "$DISTDIR"/conf -cp "$FWDIR"/conf/*.template "$DISTDIR"/conf -cp "$FWDIR"/conf/slaves "$DISTDIR"/conf -cp -r "$FWDIR/bin" "$DISTDIR" -cp -r "$FWDIR/python" "$DISTDIR" -cp -r "$FWDIR/sbin" "$DISTDIR" -cp -r "$FWDIR/ec2" "$DISTDIR" - # Download and copy in tachyon, if requested if [ "$SPARK_TACHYON" == "true" ]; then TACHYON_VERSION="0.4.1" diff --git a/pom.xml b/pom.xml index 786b6d4984d86..85c6636203f6b 100644 --- a/pom.xml +++ b/pom.xml @@ -94,7 +94,6 @@ streaming sql/catalyst sql/core - sql/hive repl assembly external/twitter @@ -978,6 +977,12 @@ yarn + + hive + + sql/hive + + diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 4d0b2fa1452a2..7844cdf1f4bf5 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -37,11 +37,13 @@ org.apache.spark spark-core_${scala.binary.version} ${project.version} + provided org.apache.spark spark-sql_${scala.binary.version} ${project.version} + provided org.spark-project.hive @@ -66,6 +68,7 @@ org.codehaus.jackson jackson-mapper-asl + provided org.spark-project.hive @@ -110,26 +113,21 @@ org.scalatest scalatest-maven-plugin - - org.apache.maven.plugins - maven-dependency-plugin - 2.4 + maven-assembly-plugin - copy-dependencies + dist package - copy-dependencies + single - - ${basedir}/../../lib_managed/jars - false - false - true - org.datanucleus + ${project.artifactId}-${project.version} + + ../../assembly/src/main/assembly/spark-hive-dist.xml +