From a1dddebfe25bbb207e877c5d20b329270f75dd2f Mon Sep 17 00:00:00 2001 From: Victor Jouffrey Date: Wed, 17 Sep 2025 12:11:12 +0200 Subject: [PATCH 1/9] Add build with hopsfs workflow --- .../workflows/build_spark_with_hopsfs.yaml | 201 ++++++++++++++++++ 1 file changed, 201 insertions(+) create mode 100644 .github/workflows/build_spark_with_hopsfs.yaml diff --git a/.github/workflows/build_spark_with_hopsfs.yaml b/.github/workflows/build_spark_with_hopsfs.yaml new file mode 100644 index 0000000000000..5fd71c2290d72 --- /dev/null +++ b/.github/workflows/build_spark_with_hopsfs.yaml @@ -0,0 +1,201 @@ +name: Build Spark with hopsfs +on: + workflow_call: + inputs: + ref: + description: 'The ref to checkout for the spark repo, default is branch-3.5' + required: false + type: string + default: 'branch-3.5' + jira_tag: + description: 'The tag to use for the jira release, default is the version from version.txt' + required: false + type: string + default: 'NOJIRA' + runner: + description: 'The type of runner to use, default is ghrunner-ee8' + required: false + type: string + default: 'ghrunner-ee8' + build: + description: 'Whether to build spark or not, default is false. If this is false then the workflow will only prepare the versioning related outputs.' + required: false + type: boolean + default: true + secrets: + NEXUS_HARBOR_PASSWORD: + required: true + outputs: + pom_version_no_jira: + value: ${{ jobs.build-spark.outputs.pom_version_no_jira }} + description: 'The pom version without the jira tag' + pom_version: + value: ${{ jobs.build-spark.outputs.pom_version }} + description: 'The pom version with the jira tag' + commit_hash: + value: ${{ jobs.build-spark.outputs.commit_hash }} + description: 'The commit hash of the spark repo' + jira_tag: + value: ${{ jobs.build-spark.outputs.jira_tag }} + description: 'The jira tag used for the build' + spark_tar_name: + value: ${{ jobs.build-spark.outputs.spark_tar_name }} + description: 'The name of the spark tar file' + spark_tar_url: + value: ${{ jobs.build-spark.outputs.spark_tar_url }} + description: 'The url of the spark tar file' + hopsfs_version: + value: ${{ jobs.build-spark.outputs.hopsfs_version }} + description: 'The version of hopsfs used in the build' + workflow_dispatch: + inputs: + ref: + description: 'The ref to checkout for the spark repo, default is branch-3.5' + required: false + type: string + default: 'branch-3.5' + jira_tag: + description: 'The tag to use for the jira release, default is the version from version.txt' + required: false + type: string + default: 'NOJIRA' + runner: + description: 'The type of runner to use, default is ghrunner-ee8' + required: false + type: string + default: 'ghrunner-ee8' + build: + description: 'Whether to build spark or not, default is false. If this is false then the workflow will only prepare the versioning related outputs.' + required: false + type: boolean + default: true + # pull_request: +concurrency: + group: build-spark-${{ github.workflow }}-${{ github.job }}-${{ inputs.jira_tag || 'NOJIRA' }} + cancel-in-progress: true + +# Used to avoid error on PRs +env: + SPARK_REF: ${{ inputs.ref || 'branch-3.5' }} + JIRA_TAG: ${{ inputs.jira_tag || 'NOJIRA' }} + +jobs: + build-spark: + runs-on: ${{ inputs.runner }} + outputs: + pom_version_no_jira: ${{ steps.prep_version.outputs.pom_version_no_jira }} + pom_version: ${{ steps.prep_version.outputs.pom_version }} + commit_hash: ${{ steps.prep_version.outputs.commit_hash }} + jira_tag: ${{ env.JIRA_TAG }} + spark_tar_name: ${{ steps.prep_version.outputs.spark_tar_name }} + spark_tar_url: ${{ steps.prep_version.outputs.spark_tar_url }} + hopsfs_version: ${{ steps.prep_version.outputs.hopsfs_version }} + steps: + - name: Checkout spark repo + uses: actions/checkout@v4 + with: + repository: logicalclocks/spark + ref: ${{ env.SPARK_REF }} + path: ${{ github.workspace }}/spark + + - name: To build or not to build + id: to_build_or_not_to_build + shell: bash + env: + BUILD_SPARK: ${{ (github.event_name == 'pull_request' && contains(join(github.event.pull_request.labels.*.name, ','), 'build-spark')) || inputs.build }} + run: | + if [[ "${{ env.BUILD_SPARK }}" != "true" ]]; then + echo "# :recycle: Not building Spark" >> $GITHUB_STEP_SUMMARY + if [[ "${{ github.event_name }}" == "pull_request" ]]; then + echo "This is a pull request and the 'build-spark' label is not present." >> $GITHUB_STEP_SUMMARY + echo "pull_request_labels=${{ join(github.event.pull_request.labels.*.name, ', ') }}" >> $GITHUB_STEP_SUMMARY + elif [[ "${{ inputs.build || 'false'}}" != "true" ]]; then + echo "The input 'build' is set to false." >> $GITHUB_STEP_SUMMARY + fi + echo "BUILD_SPARK=$BUILD_SPARK" >> $GITHUB_OUTPUT + else + echo "# :white_check_mark: Building Spark" >> $GITHUB_STEP_SUMMARY + echo "BUILD_SPARK=$BUILD_SPARK" >> $GITHUB_OUTPUT + fi + + - name: Prep step version + shell: bash + id: prep_version + working-directory: ${{ github.workspace }}/spark + run: | + COMMIT_HASH=$(git rev-parse --short HEAD) + POM_VERSION_NO_JIRA=$(mvn -q -Dexec.executable="echo" -Dexec.args='${project.version}' --non-recursive exec:exec) + find . -name "pom.xml" -exec sed -i "s|${POM_VERSION_NO_JIRA}|${POM_VERSION_NO_JIRA%-SNAPSHOT}-${JIRA_TAG}-SNAPSHOT|g" {} \; + POM_VERSION=$(mvn -q -Dexec.executable="echo" -Dexec.args='${project.version}' --non-recursive exec:exec) + SPARK_TAR_NAME=spark-${POM_VERSION}-bin-without-hadoop-with-hive.tgz + SPARK_TAR_URL="${{ vars.NEXUS_DEV_SPARK_URL }}/${JIRA_TAG}/${SPARK_TAR_NAME}" + HOPSFS_VERSION=$(mvn -q -Dexec.executable="echo" -Dexec.args='${hadoop.version}' --non-recursive exec:exec) + + echo "POM_VERSION_NO_JIRA=${POM_VERSION_NO_JIRA}" >> $GITHUB_ENV + echo "POM_VERSION=${POM_VERSION}" >> $GITHUB_ENV + echo "COMMIT_HASH=$COMMIT_HASH" >> $GITHUB_ENV + echo "SPARK_TAR_NAME=${SPARK_TAR_NAME}" >> $GITHUB_ENV + echo "SPARK_TAR_URL=${SPARK_TAR_URL}" >> $GITHUB_ENV + echo "HOPSFS_VERSION=${HOPSFS_VERSION}" >> $GITHUB_ENV + + echo "POM_VERSION_NO_JIRA=${POM_VERSION_NO_JIRA}" >> $GITHUB_STEP_SUMMARY + echo "POM_VERSION=${POM_VERSION}" >> $GITHUB_STEP_SUMMARY + echo "COMMIT_HASH=$COMMIT_HASH" >> $GITHUB_STEP_SUMMARY + echo "SPARK_TAR_NAME=${SPARK_TAR_NAME}" >> $GITHUB_STEP_SUMMARY + echo "SPARK_TAR_URL=${SPARK_TAR_URL}" >> $GITHUB_STEP_SUMMARY + echo "HOPSFS_VERSION=${HOPSFS_VERSION}" >> $GITHUB_STEP_SUMMARY + + echo "POM_VERSION=${POM_VERSION}" >> $GITHUB_OUTPUT + echo "POM_VERSION_NO_JIRA=${POM_VERSION_NO_JIRA}" >> $GITHUB_OUTPUT + echo "COMMIT_HASH=$COMMIT_HASH" >> $GITHUB_OUTPUT + echo "SPARK_TAR_NAME=${SPARK_TAR_NAME}" >> $GITHUB_OUTPUT + echo "SPARK_TAR_URL=${SPARK_TAR_URL}" >> $GITHUB_OUTPUT + echo "HOPSFS_VERSION=${HOPSFS_VERSION}" >> $GITHUB_OUTPUT + + - name: Set up .m2 settings.xml + shell: bash + if: steps.to_build_or_not_to_build.outputs.BUILD_SPARK == 'true' + env: + M2_HOME: ~/.m2 + run: | + echo "M2_HOME var is $M2_HOME" >> $GITHUB_STEP_SUMMARY + mkdir -p ~/.m2 + echo "" > ~/.m2/settings.xml + echo "HopsEE${{ vars.NEXUS_HARBOR_USER }}${{ secrets.NEXUS_HARBOR_PASSWORD }}" >> ~/.m2/settings.xml + echo "HiveEE${{ vars.NEXUS_HARBOR_USER }}${{ secrets.NEXUS_HARBOR_PASSWORD }}" >> ~/.m2/settings.xml + echo "" >> ~/.m2/settings.xml + + + - name: Cache maven + id: cache-maven + if: steps.to_build_or_not_to_build.outputs.BUILD_SPARK == 'true' + uses: actions/cache@v4 + with: + path: | + ~/.m2 + !~/.m2/settings.xml + key: ${{ runner.os }}-maven-spark-${{ hashFiles('spark/**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven-spark- + + + - name: Build spark and spark-packaging + shell: bash + if: steps.to_build_or_not_to_build.outputs.BUILD_SPARK == 'true' + working-directory: ${{ github.workspace }}/spark + env: + POM_VERSION: ${{ env.POM_VERSION }} + M2_HOME: ~/.m2 + run: | + ./dev/make-distribution.sh --name without-hadoop-with-hive --tgz "-Pkubernetes,hadoop-provided,parquet-provided,hive,hadoop-cloud,bigtop-dist" + + - name: Upload spark-packaging artifact to Nexus + shell: bash + if: steps.to_build_or_not_to_build.outputs.BUILD_SPARK == 'true' + working-directory: ${{ github.workspace }}/spark + env: + M2_HOME: ~/.m2 + run: | + curl -u ${{ vars.NEXUS_HARBOR_USER }}:${{ secrets.NEXUS_HARBOR_PASSWORD }} --upload-file spark-$POM_VERSION-bin-without-hadoop-with-hive.tgz "${SPARK_TAR_URL}" + export MAVEN_OPTS="${MAVEN_OPTS:--Xss128m -Xmx4g -XX:ReservedCodeCacheSize=128m}" + ./build/mvn deploy -DskipTests -Dmaven.javadoc.skip=true -Dmaven.scaladoc.skip=true -Dmaven.source.skip -Dcyclonedx.skip=true -Pkubernetes,hadoop-provided,parquet-provided,hive,hadoop-cloud \ No newline at end of file From d510746b4ae0d3af2906b0dc2b8d8e30c1d3d6db Mon Sep 17 00:00:00 2001 From: Victor Jouffrey Date: Wed, 17 Sep 2025 12:16:51 +0200 Subject: [PATCH 2/9] Add repository to pom.xml --- pom.xml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/pom.xml b/pom.xml index 4f912329beed6..3235ba0a85134 100644 --- a/pom.xml +++ b/pom.xml @@ -348,6 +348,28 @@ false + + HopsEE + Hops Release Repository + https://nexus.hops.works/repository/hops-artifacts + + true + + + true + + + + HiveEE + Hive Release Repository + https://nexus.hops.works/repository/hive-artifacts + + true + + + true + + From c62c0a03d438b9f6fbb57380fcb47122a0966a50 Mon Sep 17 00:00:00 2001 From: Victor Jouffrey Date: Wed, 17 Sep 2025 12:27:34 +0200 Subject: [PATCH 3/9] Use ubuntu latest if running on the spark repo itself --- .github/workflows/build_spark_with_hopsfs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_spark_with_hopsfs.yaml b/.github/workflows/build_spark_with_hopsfs.yaml index 5fd71c2290d72..c7a591cba1bc7 100644 --- a/.github/workflows/build_spark_with_hopsfs.yaml +++ b/.github/workflows/build_spark_with_hopsfs.yaml @@ -81,7 +81,7 @@ env: jobs: build-spark: - runs-on: ${{ inputs.runner }} + runs-on: ${{ inputs.runner || 'ubuntu-latest' }} outputs: pom_version_no_jira: ${{ steps.prep_version.outputs.pom_version_no_jira }} pom_version: ${{ steps.prep_version.outputs.pom_version }} From b555386f81c7865e9f355f1046fc0f221d06125e Mon Sep 17 00:00:00 2001 From: Victor Jouffrey Date: Fri, 19 Sep 2025 10:04:15 +0200 Subject: [PATCH 4/9] Update hive version and hopsfs to 17 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 8d1d7e0c7abf0..d3e388998a16b 100644 --- a/pom.xml +++ b/pom.xml @@ -122,7 +122,7 @@ 2.0.17 2.24.3 - 3.2.0.16-EE-SNAPSHOT + 3.2.0.17-EE-RC1 io.hops 3.23.4 @@ -133,7 +133,7 @@ io.hops.hive core - 3.0.0.13.5 + 3.0.0.13.10-SNAPSHOT 3.0.0.13.5 3.0 From 028a5b19e78900f6563dc4c6cbb92ca8e4b26b6c Mon Sep 17 00:00:00 2001 From: Victor Jouffrey Date: Fri, 19 Sep 2025 11:35:15 +0200 Subject: [PATCH 5/9] Add snapshot repo --- pom.xml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pom.xml b/pom.xml index d3e388998a16b..c927ee6300b2e 100644 --- a/pom.xml +++ b/pom.xml @@ -4039,6 +4039,9 @@ HopsEE Hops Release Repository https://nexus.hops.works/repository/hops-artifacts + + true + From 0b614406af235d444f7c0810f6896e95a9d0652b Mon Sep 17 00:00:00 2001 From: Victor Jouffrey Date: Fri, 19 Sep 2025 13:33:58 +0200 Subject: [PATCH 6/9] Test pom fixing --- pom.xml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index c927ee6300b2e..33587967beb10 100644 --- a/pom.xml +++ b/pom.xml @@ -4039,9 +4039,11 @@ HopsEE Hops Release Repository https://nexus.hops.works/repository/hops-artifacts - - true - + + HopsEE + Hops Repo + https://nexus.hops.works/repository/hops-artifacts/ + From b70f15d01cd5c632472b4b1b7c76e572fc7a5c6a Mon Sep 17 00:00:00 2001 From: Victor Jouffrey Date: Mon, 22 Sep 2025 20:38:52 +0200 Subject: [PATCH 7/9] Hive23 version used for hive-llap which is messing up --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 33587967beb10..c33c30b0368df 100644 --- a/pom.xml +++ b/pom.xml @@ -134,7 +134,7 @@ core 3.0.0.13.10-SNAPSHOT - 3.0.0.13.5 + 3.0.0.13.10-SNAPSHOT 3.0 From d6150761ca8b61ae49fb32451952faa1da0a5346 Mon Sep 17 00:00:00 2001 From: Victor Jouffrey Date: Tue, 23 Sep 2025 07:16:21 +0200 Subject: [PATCH 8/9] Use hive IS-153 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index c33c30b0368df..9db8caa41bcb4 100644 --- a/pom.xml +++ b/pom.xml @@ -133,8 +133,8 @@ io.hops.hive core - 3.0.0.13.10-SNAPSHOT - 3.0.0.13.10-SNAPSHOT + 3.0.0.13.10-IS-153-SNAPSHOT + 3.0.0.13.10-IS-153-SNAPSHOT 3.0 From 038aab528c3a18b1ab85cb7e5bc7be1d4f9a8fb2 Mon Sep 17 00:00:00 2001 From: Victor Jouffrey Date: Tue, 23 Sep 2025 11:46:00 +0200 Subject: [PATCH 9/9] Force snapshot update --- dev/make-distribution.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh index 6758a8aee0322..73945012fcd1f 100755 --- a/dev/make-distribution.sh +++ b/dev/make-distribution.sh @@ -166,7 +166,7 @@ export MAVEN_OPTS="${MAVEN_OPTS:--Xss128m -Xmx4g -XX:ReservedCodeCacheSize=128m} # Store the command as an array because $MVN variable might have spaces in it. # Normal quoting tricks don't work. # See: http://mywiki.wooledge.org/BashFAQ/050 -BUILD_COMMAND=("$MVN" clean package \ +BUILD_COMMAND=("$MVN" clean package -U \ -DskipTests \ -Dmaven.javadoc.skip=true \ -Dmaven.scaladoc.skip=true \