From 3d175194f27c86a605a9b65bbef2e51a551178e7 Mon Sep 17 00:00:00 2001 From: witgo Date: Wed, 30 Apr 2014 16:32:23 +0800 Subject: [PATCH 1/4] Improved build configuration III --- assembly/pom.xml | 62 ++----------- assembly/src/main/assembly/spark-dist.xml | 88 +++++++++++++++++++ .../src/main/assembly/spark-examples-dist.xml | 48 ++++++++++ assembly/src/main/assembly/sprk-hive-dist.xml | 46 ++++++++++ bin/compute-classpath.sh | 23 ++++- bin/spark-class | 26 +++--- examples/pom.xml | 40 ++------- make-distribution.sh | 18 ++-- pom.xml | 7 +- sql/hive/pom.xml | 19 ++-- 10 files changed, 252 insertions(+), 125 deletions(-) create mode 100644 assembly/src/main/assembly/spark-dist.xml create mode 100644 assembly/src/main/assembly/spark-examples-dist.xml create mode 100644 assembly/src/main/assembly/sprk-hive-dist.xml diff --git a/assembly/pom.xml b/assembly/pom.xml index bdb38806492a6..7cbdfaee00731 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -16,7 +16,8 @@ ~ limitations under the License. --> - + 4.0.0 org.apache.spark @@ -40,14 +41,6 @@ root - - - - lib - file://${project.basedir}/lib - - - org.apache.spark @@ -93,48 +86,21 @@ - org.apache.maven.plugins - maven-shade-plugin - - false - ${spark.jar} - - - *:* - - - - - *:* - - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - + maven-assembly-plugin + dist package - shade + single - - - - META-INF/services/org.apache.hadoop.fs.FileSystem - - - reference.conf - - - log4j.properties - - + ${project.artifactId}-${project.version} + + src/main/assembly/spark-dist.xml + @@ -163,16 +129,6 @@ - - hive - - - org.apache.spark - spark-hive_${scala.binary.version} - ${project.version} - - - spark-ganglia-lgpl diff --git a/assembly/src/main/assembly/spark-dist.xml b/assembly/src/main/assembly/spark-dist.xml new file mode 100644 index 0000000000000..ecdc1c5a989b4 --- /dev/null +++ b/assembly/src/main/assembly/spark-dist.xml @@ -0,0 +1,88 @@ + + + spark-dist + + dir + + false + + + + + README + + + + + ${project.parent.basedir}/sbin/ + + /sbin + + **/* + + + + + ${project.parent.basedir}/bin/ + + /bin + + **/* + + + + + ${project.parent.basedir}/conf/ + + /conf + + **/*.template + **/slaves + + + + + ${project.parent.basedir}/python/ + + /python + + **/* + + + + + + + /share/spark/core + + org.apache.spark:*:jar + + + + /share/spark/core/lib + true + false + runtime + false + + org.apache.spark:*:jar + + + + + diff --git a/assembly/src/main/assembly/spark-examples-dist.xml b/assembly/src/main/assembly/spark-examples-dist.xml new file mode 100644 index 0000000000000..d31ad4ac00424 --- /dev/null +++ b/assembly/src/main/assembly/spark-examples-dist.xml @@ -0,0 +1,48 @@ + + + spark-examples-dist + + dir + + false + + + + /share/spark/examples + + org.apache.spark:spark-examples*:jar + + + + /share/spark/examples/lib + true + false + runtime + false + + org.apache.spark:*:jar + org.apache.hadoop:*:jar + org.slf4j:slf4j-api + org.slf4j:slf4j-log4j12 + log4j:log4j + org.scala-lang:*:jar + + + + + diff --git a/assembly/src/main/assembly/sprk-hive-dist.xml b/assembly/src/main/assembly/sprk-hive-dist.xml new file mode 100644 index 0000000000000..b6f23600e4c69 --- /dev/null +++ b/assembly/src/main/assembly/sprk-hive-dist.xml @@ -0,0 +1,46 @@ + + + spark-hive-dist + + dir + + false + + + /share/spark/hive + + org.apache.spark:spark-hive*:jar + + + + /share/spark/hive/lib + true + false + runtime + false + + org.apache.spark:*:jar + org.apache.hadoop:*:jar + org.slf4j:slf4j-api + org.slf4j:slf4j-log4j12 + log4j:log4j + org.scala-lang:*:jar + + + + diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh index b0218531e9eb8..9235130d846d6 100755 --- a/bin/compute-classpath.sh +++ b/bin/compute-classpath.sh @@ -48,13 +48,28 @@ if [ -f "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar ]; then DEPS_ASSEMBLY_JAR=`ls "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar` CLASSPATH="$CLASSPATH:$DEPS_ASSEMBLY_JAR" else - # Else use spark-assembly jar from either RELEASE or assembly directory if [ -f "$FWDIR/RELEASE" ]; then - ASSEMBLY_JAR=`ls "$FWDIR"/lib/spark-assembly*hadoop*.jar` + CLASSPATH="$CLASSPATH:$FWDIR/share/spark/core/*:$FWDIR/share/spark/core/lib/*" + if [ -d "$FWDIR/share/spark/hive" ]; then + CLASSPATH="$CLASSPATH:$FWDIR/share/spark/hive/*:$FWDIR/share/spark/hive/lib/*" + fi + if [ -d "$FWDIR/share/spark/examples" ]; then + CLASSPATH="$CLASSPATH:$FWDIR/share/spark/examples/*:$FWDIR/share/spark/examples/lib/*" + fi else - ASSEMBLY_JAR=`ls "$ASSEMBLY_DIR"/spark-assembly*hadoop*.jar` + SPARK_DIST=`echo "$FWDIR"/assembly/target/*spark-dist` + if [ -d "$SPARK_DIST" ]; then + CLASSPATH="$CLASSPATH:$SPARK_DIST/share/spark/core/*:$SPARK_DIST/share/spark/core/lib/*" + fi + if [ -d "$FWDIR"/sql/hive/target/*spark-hive-dist/ ]; then + HVIE_DIST=`echo "$FWDIR"/sql/hive/target/*spark-hive-dist` + CLASSPATH="$CLASSPATH:$HVIE_DIST/share/spark/hive/*:$HVIE_DIST/share/spark/hive/lib/*" + fi + ASSEMBLY_JAR=`echo "$ASSEMBLY_DIR"/spark-assembly*hadoop*.jar` + if [ -f "$ASSEMBLY_JAR" ]; then + CLASSPATH="$CLASSPATH:$ASSEMBLY_JAR" + fi fi - CLASSPATH="$CLASSPATH:$ASSEMBLY_JAR" fi # When Hive support is needed, Datanucleus jars must be included on the classpath. diff --git a/bin/spark-class b/bin/spark-class index e8160c8af64c1..7ce99f7820c57 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -110,18 +110,20 @@ export JAVA_OPTS if [ ! -f "$FWDIR/RELEASE" ]; then # Exit if the user hasn't compiled Spark - num_jars=$(ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/ | grep "spark-assembly.*hadoop.*.jar" | wc -l) - jars_list=$(ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/ | grep "spark-assembly.*hadoop.*.jar") - if [ "$num_jars" -eq "0" ]; then - echo "Failed to find Spark assembly in $FWDIR/assembly/target/scala-$SCALA_VERSION/" >&2 - echo "You need to build Spark with 'sbt/sbt assembly' before running this program." >&2 - exit 1 - fi - if [ "$num_jars" -gt "1" ]; then - echo "Found multiple Spark assembly jars in $FWDIR/assembly/target/scala-$SCALA_VERSION:" >&2 - echo "$jars_list" - echo "Please remove all but one jar." - exit 1 + if [ ! -d "$FWDIR"/assembly/target/*spark-dist/ ]; then + num_jars=$(ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/ | grep "spark-assembly.*hadoop.*.jar" | wc -l) + jars_list=$(ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/ | grep "spark-assembly.*hadoop.*.jar") + if [ "$num_jars" -eq "0" ]; then + echo "Failed to find Spark assembly in $FWDIR/assembly/target/" >&2 + echo "You need to build Spark with './make-distribution.sh' before running this program." >&2 + exit 1 + fi + if [ "$num_jars" -gt "1" ]; then + echo "Found multiple Spark assembly jars in $FWDIR/assembly/target/scala-$SCALA_VERSION:" >&2 + echo "$jars_list" + echo "Please remove all but one jar." + exit 1 + fi fi fi diff --git a/examples/pom.xml b/examples/pom.xml index e1fc149d87f17..a14bc2b112904 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -16,7 +16,8 @@ ~ limitations under the License. --> - + 4.0.0 org.apache.spark @@ -179,42 +180,19 @@ org.apache.maven.plugins - maven-shade-plugin - - false - ${project.build.directory}/scala-${scala.binary.version}/spark-examples-${project.version}-hadoop${hadoop.version}.jar - - - *:* - - - - - *:* - - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - + maven-assembly-plugin + dist package - shade + single - - - - reference.conf - - - log4j.properties - - + ${project.artifactId}-${project.version} + + ../assembly/src/main/assembly/spark-examples-dist.xml + diff --git a/make-distribution.sh b/make-distribution.sh index c05dcd89d90a7..99aebae327fa0 100755 --- a/make-distribution.sh +++ b/make-distribution.sh @@ -140,21 +140,15 @@ fi # Make directories rm -rf "$DISTDIR" -mkdir -p "$DISTDIR/lib" +mkdir -p "$DISTDIR" echo "Spark $VERSION built for Hadoop $SPARK_HADOOP_VERSION" > "$DISTDIR/RELEASE" # Copy jars -cp $FWDIR/assembly/target/scala*/*assembly*hadoop*.jar "$DISTDIR/lib/" -cp $FWDIR/examples/target/scala*/spark-examples*.jar "$DISTDIR/lib/" - -# Copy other things -mkdir "$DISTDIR"/conf -cp "$FWDIR"/conf/*.template "$DISTDIR"/conf -cp "$FWDIR"/conf/slaves "$DISTDIR"/conf -cp -r "$FWDIR/bin" "$DISTDIR" -cp -r "$FWDIR/python" "$DISTDIR" -cp -r "$FWDIR/sbin" "$DISTDIR" - +cp -r $FWDIR/assembly/target/*spark-dist/* "$DISTDIR/" +cp -r $FWDIR/examples/target/*spark-examples-dist/* "$DISTDIR/" +if [ "$SPARK_HIVE" == "true" ]; then + cp -r $FWDIR/sql/hive/target/*spark-hive-dist/* "$DISTDIR/" +fi # Download and copy in tachyon, if requested if [ "$SPARK_TACHYON" == "true" ]; then diff --git a/pom.xml b/pom.xml index 646753fe30301..54e5a8cc28e00 100644 --- a/pom.xml +++ b/pom.xml @@ -93,7 +93,6 @@ streaming sql/catalyst sql/core - sql/hive repl assembly external/twitter @@ -903,6 +902,12 @@ + + hive + + sql/hive + + diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 889d249146b8c..55ed87fdeb4d7 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -81,26 +81,21 @@ org.scalatest scalatest-maven-plugin - - org.apache.maven.plugins - maven-dependency-plugin - 2.4 + maven-assembly-plugin - copy-dependencies + dist package - copy-dependencies + single - - ${basedir}/../../lib_managed/jars - false - false - true - org.datanucleus + ${project.artifactId}-${project.version} + + ../../assembly/src/main/assembly/sprk-hive-dist.xml + From 3aae189f684df1cfaeeb95e6125acad89f4cd21a Mon Sep 17 00:00:00 2001 From: witgo Date: Sat, 3 May 2014 10:04:27 +0800 Subject: [PATCH 2/4] park-core provided --- sql/hive/pom.xml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 55ed87fdeb4d7..175ece9efc8e1 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -37,11 +37,13 @@ org.apache.spark spark-core_${scala.binary.version} ${project.version} + provided org.apache.spark spark-sql_${scala.binary.version} ${project.version} + provided org.apache.hive @@ -56,6 +58,7 @@ org.codehaus.jackson jackson-mapper-asl + provided org.apache.hive From b1c0a66f698b3b784e8f9211ba47b303fbbbf036 Mon Sep 17 00:00:00 2001 From: witgo Date: Sat, 3 May 2014 22:51:20 +0800 Subject: [PATCH 3/4] sprk-hive-dist.xml => sprak-hive-dist.xml --- .../main/assembly/{sprk-hive-dist.xml => sprak-hive-dist.xml} | 0 sql/hive/pom.xml | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename assembly/src/main/assembly/{sprk-hive-dist.xml => sprak-hive-dist.xml} (100%) diff --git a/assembly/src/main/assembly/sprk-hive-dist.xml b/assembly/src/main/assembly/sprak-hive-dist.xml similarity index 100% rename from assembly/src/main/assembly/sprk-hive-dist.xml rename to assembly/src/main/assembly/sprak-hive-dist.xml diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 175ece9efc8e1..b74cc253afb55 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -97,7 +97,7 @@ ${project.artifactId}-${project.version} - ../../assembly/src/main/assembly/sprk-hive-dist.xml + ../../assembly/src/main/assembly/spark-hive-dist.xml From bc090bd00b34beff0e06481ab6352c5b6e1bb1df Mon Sep 17 00:00:00 2001 From: witgo Date: Mon, 5 May 2014 12:04:00 +0800 Subject: [PATCH 4/4] sprak-hive-dist.xml => spark-hive-dist.xml --- .../main/assembly/{sprak-hive-dist.xml => spark-hive-dist.xml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename assembly/src/main/assembly/{sprak-hive-dist.xml => spark-hive-dist.xml} (100%) diff --git a/assembly/src/main/assembly/sprak-hive-dist.xml b/assembly/src/main/assembly/spark-hive-dist.xml similarity index 100% rename from assembly/src/main/assembly/sprak-hive-dist.xml rename to assembly/src/main/assembly/spark-hive-dist.xml