From f1cc61c2ede5ce99ffc768c80e9684ed85f71df8 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Wed, 17 Sep 2025 14:45:45 +0100 Subject: [PATCH 01/10] HADOOP-19696. hadoop binary distribution to move cloud connectors to hadoop common/lib * new assembly for hadoop cloud storage * hadoop-cloud-storage does the assembly on -Pdist * layout stitching to move into share/hadoop/common/lib * remove connectors from hadoop-tools-dist * cut old jackson version from huawaei cloud dependency -even though it was being upgraded by our own artifacts, it was a complication. --- dev-support/bin/dist-layout-stitching | 4 ++ .../assemblies/hadoop-cloud-storage.xml | 55 ++++++++++++++++ .../main/resources/assemblies/hadoop-src.xml | 1 + .../hadoop-cloud-storage/pom.xml | 63 +++++++++++++++++++ .../hadoop-huaweicloud/pom.xml | 4 ++ hadoop-tools/hadoop-tools-dist/pom.xml | 40 ------------ 6 files changed, 127 insertions(+), 40 deletions(-) create mode 100644 hadoop-assemblies/src/main/resources/assemblies/hadoop-cloud-storage.xml diff --git a/dev-support/bin/dist-layout-stitching b/dev-support/bin/dist-layout-stitching index d4bfd8aaada3b..fee6b92219d4f 100755 --- a/dev-support/bin/dist-layout-stitching +++ b/dev-support/bin/dist-layout-stitching @@ -130,6 +130,10 @@ run cp -p "${ROOT}/README.txt" . run copy "${ROOT}/hadoop-common-project/hadoop-common/target/hadoop-common-${VERSION}" . run copy "${ROOT}/hadoop-common-project/hadoop-nfs/target/hadoop-nfs-${VERSION}" . run copy "${ROOT}/hadoop-common-project/hadoop-registry/target/hadoop-registry-${VERSION}" . + +# cloud connectors go into common +run copy "${ROOT}/hadoop-cloud-storage-project/hadoop-cloud-storage/target/hadoop-cloud-storage-${VERSION}" . + run copy "${ROOT}/hadoop-hdfs-project/hadoop-hdfs/target/hadoop-hdfs-${VERSION}" . run copy "${ROOT}/hadoop-hdfs-project/hadoop-hdfs-nfs/target/hadoop-hdfs-nfs-${VERSION}" . run copy "${ROOT}/hadoop-hdfs-project/hadoop-hdfs-client/target/hadoop-hdfs-client-${VERSION}" . diff --git a/hadoop-assemblies/src/main/resources/assemblies/hadoop-cloud-storage.xml b/hadoop-assemblies/src/main/resources/assemblies/hadoop-cloud-storage.xml new file mode 100644 index 0000000000000..de668d1516a4a --- /dev/null +++ b/hadoop-assemblies/src/main/resources/assemblies/hadoop-cloud-storage.xml @@ -0,0 +1,55 @@ + + + hadoop-cloud-storage + + dir + + false + + + + ../../hadoop-tools/hadoop-aws/src/main/bin + /bin + 0755 + + + ./../hadoop-tools/hadoop-aws/src/main/shellprofile.d + + * + + /libexec/shellprofile.d + 0755 + + + + + + /share/hadoop/common/lib + false + runtime + false + + + org.apache.hadoop:hadoop-annotations + org.apache.hadoop.thirdparty:hadoop-shaded-guava + + + + diff --git a/hadoop-assemblies/src/main/resources/assemblies/hadoop-src.xml b/hadoop-assemblies/src/main/resources/assemblies/hadoop-src.xml index 871694209393f..7895f4e57142f 100644 --- a/hadoop-assemblies/src/main/resources/assemblies/hadoop-src.xml +++ b/hadoop-assemblies/src/main/resources/assemblies/hadoop-src.xml @@ -57,6 +57,7 @@ **/file:/** **/SecurityAuth.audit* patchprocess/** + **/auth-keys.xml diff --git a/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml b/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml index 2df2cd3d9242d..10eda1d4b6814 100644 --- a/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml +++ b/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml @@ -124,6 +124,12 @@ org.apache.hadoop hadoop-huaweicloud compile + + + com.fasterxml.jackson + * + + org.apache.hadoop @@ -146,4 +152,61 @@ + + + + + maven-deploy-plugin + + true + + + + org.apache.rat + apache-rat-plugin + + + + + + + + dist + + false + + + + + org.apache.maven.plugins + maven-assembly-plugin + + + org.apache.hadoop + hadoop-assemblies + ${project.version} + + + + + dist + prepare-package + + single + + + false + false + ${project.artifactId}-${project.version} + + hadoop-cloud-storage + + + + + + + + + diff --git a/hadoop-cloud-storage-project/hadoop-huaweicloud/pom.xml b/hadoop-cloud-storage-project/hadoop-huaweicloud/pom.xml index 641ad3afb1b0e..d00dc1f22c704 100755 --- a/hadoop-cloud-storage-project/hadoop-huaweicloud/pom.xml +++ b/hadoop-cloud-storage-project/hadoop-huaweicloud/pom.xml @@ -169,6 +169,10 @@ log4j-api org.apache.logging.log4j + + com.fasterxml.jackson.core + * + diff --git a/hadoop-tools/hadoop-tools-dist/pom.xml b/hadoop-tools/hadoop-tools-dist/pom.xml index 9fa02a0ac0027..803caf8fa0648 100644 --- a/hadoop-tools/hadoop-tools-dist/pom.xml +++ b/hadoop-tools/hadoop-tools-dist/pom.xml @@ -92,30 +92,12 @@ pom ${project.version} - - org.apache.hadoop - hadoop-aws - compile - ${project.version} - org.apache.hadoop hadoop-kafka compile ${project.version} - - org.apache.hadoop - hadoop-azure - compile - ${project.version} - - - org.apache.hadoop - hadoop-aliyun - compile - ${project.version} - org.apache.hadoop hadoop-sls @@ -127,34 +109,12 @@ ${project.version} compile - - org.apache.hadoop - hadoop-azure-datalake - compile - ${project.version} - org.apache.hadoop hadoop-fs2img compile ${project.version} - - org.apache.hadoop - hadoop-gcp - compile - ${project.version} - - - - * - * - - - From 321d5b6d8a703f574474ebb3368254f57df60229 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Tue, 23 Sep 2025 18:35:28 +0100 Subject: [PATCH 02/10] HADOOP-19696. cos, huawei, aliyun cloud dependencies * add the artifacts found with the relevant hadoop-* modules to the binary license * leave all three with cloud-storage dependencies such that they don't include these in a pull of hadoop-cloud-storage (regression?) * unless specific profiles cos, huawei and aliyun are declared, at which point they're exported by hadoop-cloud-storage and put into the assembly. This avoids dealing with complex dependencies we don't want (okio, more xml parsers,...), let making it straightforward to build a distro with it if you want. bundle.jar is always getting in. Do I do it here iff -Paws is set or do I delay it until the copy to the final distro artifact tree takes place. delay: keeps it as an export of hadoop-cloud-storage pom early: consistent with the rest --- LICENSE-binary | 21 +++++- .../hadoop-cloud-storage/pom.xml | 68 ++++++++++++++++++- licenses-binary/LICENSE-dom4j.txt | 39 +++++++++++ 3 files changed, 126 insertions(+), 2 deletions(-) create mode 100644 licenses-binary/LICENSE-dom4j.txt diff --git a/LICENSE-binary b/LICENSE-binary index b61b7f3166733..714ebb7f7612c 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -203,7 +203,10 @@ -------------------------------------------------------------------------------- This project bundles some components that are also licensed under the Apache -License Version 2.0: +License Version 2.0. +Note: some of the listed artifacts may not be included in a given build of the binary +distribution; it depends on the build options. This list intends +to be inclusive of all which may be included: hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/nvd3-1.8.5.* (css and js files) @@ -215,6 +218,7 @@ com.aliyun:aliyun-java-sdk-core:4.5.10 com.aliyun:aliyun-java-sdk-kms:2.11.0 com.aliyun:aliyun-java-sdk-ram:3.1.0 com.aliyun:aliyun-java-sdk-sts:3.0.0 +com.aliyun:java-trace-api:0.2.11-beta.jar com.aliyun.oss:aliyun-sdk-oss:3.13.2 com.cedarsoftware:java-util:1.9.0 com.cedarsoftware:json-io:2.5.1 @@ -268,8 +272,13 @@ com.google.http-client:google-http-client-jackson2:1.46.3 com.google.http-client:google-http-client:1.46.3 com.google.j2objc:j2objc-annotations:3.0.0 com.google.oauth-client:google-oauth-client:1.37.0 +com.huaweicloud:esdk-obs-java:3.20.4.2 +com.jamesmurty.utils:java-xmlbuilder-1.2.jar com.microsoft.azure:azure-storage:7.0.0 com.nimbusds:nimbus-jose-jwt:10.4 +com.squareup.okhttp3:okhttp:jar:3.14.2 +com.squareup.okio:okio:jar:1.17.2 +com.volcengine:ve-tos-java-sdk-hadoop:2.8.9.jar com.zaxxer:HikariCP:4.0.3 commons-beanutils:commons-beanutils:1.9.4 commons-cli:commons-cli:1.9.0 @@ -346,6 +355,9 @@ io.opentelemetry:opentelemetry-sdk-logs:1.47.0 io.opentelemetry:opentelemetry-sdk-metrics:1.47.0 io.opentelemetry:opentelemetry-sdk-trace:1.47.0 io.opentelemetry.semconv:opentelemetry-semconv:1.29.0-alpha +io.opentracing:opentracing-api:0.33.0.jar +io.opentracing:opentracing-noop:0.33.0.jar +io.opentracing:opentracing-util:0.33.0.jar io.reactivex:rxjava:1.3.8 io.reactivex:rxjava-string:1.1.1 io.reactivex:rxnetty:0.4.20 @@ -496,6 +508,7 @@ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanage bootstrap v3.3.6 broccoli-asset-rev v2.4.2 broccoli-funnel v1.0.1 +cos_api-bundle-5.6.19.jar datatables v1.11.5 em-helpers v0.5.13 em-table v0.1.6 @@ -552,6 +565,7 @@ org.codehaus.mojo:animal-sniffer-annotations:1.24 org.jruby.jcodings:jcodings:1.0.13 org.jruby.joni:joni:2.1.2 org.ojalgo:ojalgo:43.0 +org.reactivestreams:reactive-streams:1.0.3.jar org.slf4j:jul-to-slf4j:1.7.36 org.slf4j:slf4j-api:1.7.36 org.slf4j:slf4j-reload4j:1.7.36 @@ -622,3 +636,8 @@ Public Domain ------------- aopalliance:aopalliance:1.0 + +Dom4J license +------------- + +org.dom4j:dom4j:2.1.4.jar \ No newline at end of file diff --git a/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml b/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml index 10eda1d4b6814..69fccb32434f9 100644 --- a/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml +++ b/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml @@ -99,6 +99,12 @@ org.apache.hadoop hadoop-aliyun compile + + + * + * + + org.apache.hadoop @@ -119,6 +125,12 @@ org.apache.hadoop hadoop-cos compile + + + * + * + + org.apache.hadoop @@ -126,7 +138,7 @@ compile - com.fasterxml.jackson + * * @@ -169,7 +181,10 @@ + + + dist @@ -208,5 +223,56 @@ + + + + huaweicloud + + false + + + + org.apache.hadoop + hadoop-huaweicloud + compile + + + com.fasterxml.jackson + * + + + + + + + + + cos + + false + + + + org.apache.hadoop + hadoop-cos + compile + + + + + + aliyun + + false + + + + org.apache.hadoop + hadoop-aliyun + compile + + + + diff --git a/licenses-binary/LICENSE-dom4j.txt b/licenses-binary/LICENSE-dom4j.txt new file mode 100644 index 0000000000000..1a02acb149474 --- /dev/null +++ b/licenses-binary/LICENSE-dom4j.txt @@ -0,0 +1,39 @@ +Copyright 2001-2023 © MetaStuff, Ltd. and DOM4J contributors. All Rights Reserved. + +Redistribution and use of this software and associated documentation +("Software"), with or without modification, are permitted provided +that the following conditions are met: + +1. Redistributions of source code must retain copyright + statements and notices. Redistributions must also contain a + copy of this document. + +2. Redistributions in binary form must reproduce the + above copyright notice, this list of conditions and the + following disclaimer in the documentation and/or other + materials provided with the distribution. + +3. The name "DOM4J" must not be used to endorse or promote + products derived from this Software without prior written + permission of MetaStuff, Ltd. For written permission, + please contact dom4j-info@metastuff.com. + +4. Products derived from this Software may not be called "DOM4J" + nor may "DOM4J" appear in their names without prior written + permission of MetaStuff, Ltd. DOM4J is a registered + trademark of MetaStuff, Ltd. + +5. Due credit should be given to the DOM4J Project - https://dom4j.github.io/ + +THIS SOFTWARE IS PROVIDED BY METASTUFF, LTD. AND CONTRIBUTORS +``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT +NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +METASTUFF, LTD. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +OF THE POSSIBILITY OF SUCH DAMAGE. From fcdcbd725c318f69f16c6a15c3b5a5885198c1c5 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Fri, 26 Sep 2025 10:40:31 +0100 Subject: [PATCH 03/10] HADOOP-19708 volcano tos: remove shading * Unshade tos * explicit declaration of apache http dependencies, with excludes as needed * updated LICENSE-binary --- LICENSE-binary | 2 + .../hadoop-cloud-storage/pom.xml | 48 ++++++++--- .../hadoop-tos/pom.xml | 82 +++++++++++++------ hadoop-project/pom.xml | 24 ++++++ 4 files changed, 117 insertions(+), 39 deletions(-) diff --git a/LICENSE-binary b/LICENSE-binary index 714ebb7f7612c..15febb07c48da 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -385,6 +385,8 @@ org.apache.htrace:htrace-core:3.1.0-incubating org.apache.htrace:htrace-core4:4.1.0-incubating org.apache.httpcomponents:httpclient:4.5.13 org.apache.httpcomponents:httpcore:4.4.13 +org.apache.httpcomponents.client5:httpclient5:5.5 +org.apache.httpcomponents.core5:httpcore5:5.5 org.apache.kafka:kafka-clients:3.9.0 org.apache.kerby:kerb-admin:2.0.3 org.apache.kerby:kerb-client:2.0.3 diff --git a/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml b/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml index 69fccb32434f9..1cd932d2dd879 100644 --- a/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml +++ b/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml @@ -95,17 +95,7 @@ - - org.apache.hadoop - hadoop-aliyun - compile - - - * - * - - - + org.apache.hadoop hadoop-aws @@ -147,6 +137,12 @@ org.apache.hadoop hadoop-tos compile + + + * + * + + org.apache.hadoop @@ -259,7 +255,7 @@ - + aliyun @@ -273,6 +269,34 @@ + + + adls + + false + + + + org.apache.hadoop + hadoop-azure-datalake + compile + + + + + + tos + + false + + + + org.apache.hadoop + hadoop-tos + compile + + + diff --git a/hadoop-cloud-storage-project/hadoop-tos/pom.xml b/hadoop-cloud-storage-project/hadoop-tos/pom.xml index cea293dd7869a..63f63653da758 100644 --- a/hadoop-cloud-storage-project/hadoop-tos/pom.xml +++ b/hadoop-cloud-storage-project/hadoop-tos/pom.xml @@ -48,11 +48,27 @@ hadoop-mapreduce-client-core provided + + org.apache.httpcomponents.client5 + httpclient5 + + + org.apache.httpcomponents.core5 + httpcore5 + com.volcengine ve-tos-java-sdk-hadoop ${ve-tos-java-sdk.version} + + org.apache.httpcomponents.client5 + httpclient5 + + + org.apache.httpcomponents.core5 + httpcore5 + org.slf4j slf4j-api @@ -153,32 +169,6 @@ - - org.apache.maven.plugins - maven-shade-plugin - - - package - - shade - - - true - - - *:* - - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - - - - - com.github.spotbugs spotbugs-maven-plugin @@ -238,5 +228,43 @@ - + + + + shade-tos + + false + + + + + org.apache.maven.plugins + maven-shade-plugin + + + package + + shade + + + true + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + mozilla/public-suffix-list.txt + + + + + + + + + + + diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 9967f3d79c9cb..b2538f48d78e2 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -77,6 +77,8 @@ 4.5.13 4.4.13 + 5.5 + 5.3.6 1.7.36 @@ -857,6 +859,28 @@ httpcore ${httpcore.version} + + org.apache.httpcomponents.client5 + httpclient5 + ${httpclient5.version} + + + org.slf4j + * + + + + + org.apache.httpcomponents.core5 + httpcore5 + ${httpcore5.version} + + + org.apache.logging.log4j + * + + + commons-codec commons-codec From f8c3d7e1ebdb17a7be6513691e8f9408476da1f6 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Thu, 9 Oct 2025 16:54:14 +0100 Subject: [PATCH 04/10] HADOOP-19696. Moving cloud storage dist into its own module This ensures that anything done dependency-wise for packaging doesn't impact the hadoop-cloud-storage module and any downstream uses. --- .../assemblies/hadoop-cloud-storage.xml | 8 +- .../hadoop-cloud-storage-dist/pom.xml | 303 ++++++++++++++++++ .../hadoop-cloud-storage/pom.xml | 163 +--------- hadoop-cloud-storage-project/pom.xml | 1 + 4 files changed, 315 insertions(+), 160 deletions(-) create mode 100644 hadoop-cloud-storage-project/hadoop-cloud-storage-dist/pom.xml diff --git a/hadoop-assemblies/src/main/resources/assemblies/hadoop-cloud-storage.xml b/hadoop-assemblies/src/main/resources/assemblies/hadoop-cloud-storage.xml index de668d1516a4a..3482edd3c8bc7 100644 --- a/hadoop-assemblies/src/main/resources/assemblies/hadoop-cloud-storage.xml +++ b/hadoop-assemblies/src/main/resources/assemblies/hadoop-cloud-storage.xml @@ -6,7 +6,7 @@ (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the Li2cense is distributed on an "AS IS" BASIS, @@ -16,13 +16,17 @@ --> + xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3 https://maven.apache.org/xsd/assembly-1.1.3.xsd"> hadoop-cloud-storage dir false + ../../hadoop-tools/hadoop-aws/src/main/bin diff --git a/hadoop-cloud-storage-project/hadoop-cloud-storage-dist/pom.xml b/hadoop-cloud-storage-project/hadoop-cloud-storage-dist/pom.xml new file mode 100644 index 0000000000000..7f641ec0cedff --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-cloud-storage-dist/pom.xml @@ -0,0 +1,303 @@ + + + + 4.0.0 + + org.apache.hadoop + hadoop-project + 3.5.0-SNAPSHOT + ../../hadoop-project + + hadoop-cloud-storage-dist + 3.5.0-SNAPSHOT + jar + + Apache Hadoop Cloud Storage Distribution + Apache Hadoop Cloud Storage Distribution + + + + cloud-storage + + + + + org.apache.hadoop + hadoop-annotations + provided + + + org.apache.hadoop + hadoop-common + provided + + + + + org.apache.hadoop + hadoop-aws + compile + + + software.amazon.awssdk + * + + + + + + org.apache.hadoop + hadoop-azure + compile + + + org.apache.hadoop + hadoop-azure-datalake + compile + + + * + * + + + + + org.apache.hadoop + hadoop-cos + compile + + + * + * + + + + + org.apache.hadoop + hadoop-huaweicloud + compile + + + * + * + + + + + org.apache.hadoop + hadoop-tos + compile + + + * + * + + + + + org.apache.hadoop + hadoop-gcp + compile + + + + * + * + + + + + + + + + maven-deploy-plugin + + true + + + + org.apache.rat + apache-rat-plugin + + + + + + + + + + + dist + + false + + + + + org.apache.maven.plugins + maven-assembly-plugin + + + org.apache.hadoop + hadoop-assemblies + ${project.version} + + + + + dist + prepare-package + + single + + + false + false + ${project.artifactId}-${project.version} + + hadoop-cloud-storage + + + + + + + + + + + + + hadoop-aliyun-dependencies + + false + + + + org.apache.hadoop + hadoop-aliyun + compile + + + + + + + hadoop-aws-dependencies + + false + + + + org.apache.hadoop + hadoop-aws + compile + + + + + + + hadoop-cos-dependencies + + false + + + + org.apache.hadoop + hadoop-cos + compile + + + + + + + hadoop-azure-datalake-dependencies + + false + + + + org.apache.hadoop + hadoop-azure-datalake + compile + + + + + + + hadoop-huaweicloud-dependencies + + false + + + + org.apache.hadoop + hadoop-huaweicloud + compile + + + com.fasterxml.jackson + * + + + + + + + + + hadoop-tos-dependencies + + false + + + + org.apache.hadoop + hadoop-tos + compile + + + + + + diff --git a/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml b/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml index 1cd932d2dd879..2df2cd3d9242d 100644 --- a/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml +++ b/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml @@ -95,7 +95,11 @@ - + + org.apache.hadoop + hadoop-aliyun + compile + org.apache.hadoop hadoop-aws @@ -115,34 +119,16 @@ org.apache.hadoop hadoop-cos compile - - - * - * - - org.apache.hadoop hadoop-huaweicloud compile - - - * - * - - org.apache.hadoop hadoop-tos compile - - - * - * - - org.apache.hadoop @@ -160,143 +146,4 @@ - - - - - maven-deploy-plugin - - true - - - - org.apache.rat - apache-rat-plugin - - - - - - - - - - - dist - - false - - - - - org.apache.maven.plugins - maven-assembly-plugin - - - org.apache.hadoop - hadoop-assemblies - ${project.version} - - - - - dist - prepare-package - - single - - - false - false - ${project.artifactId}-${project.version} - - hadoop-cloud-storage - - - - - - - - - - - - huaweicloud - - false - - - - org.apache.hadoop - hadoop-huaweicloud - compile - - - com.fasterxml.jackson - * - - - - - - - - - cos - - false - - - - org.apache.hadoop - hadoop-cos - compile - - - - - - aliyun - - false - - - - org.apache.hadoop - hadoop-aliyun - compile - - - - - - adls - - false - - - - org.apache.hadoop - hadoop-azure-datalake - compile - - - - - - tos - - false - - - - org.apache.hadoop - hadoop-tos - compile - - - - - diff --git a/hadoop-cloud-storage-project/pom.xml b/hadoop-cloud-storage-project/pom.xml index 036dfa3a699a9..93da918fb9bc4 100644 --- a/hadoop-cloud-storage-project/pom.xml +++ b/hadoop-cloud-storage-project/pom.xml @@ -34,6 +34,7 @@ hadoop-cos hadoop-huaweicloud hadoop-tos + hadoop-cloud-storage-dist From dadaafdf419c26f592724a91e318b9834a7faf60 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Thu, 9 Oct 2025 18:38:09 +0100 Subject: [PATCH 05/10] HADOOP-19696. module hadoop-cloud-storage-dist for the distribution This ensures that anything done dependency-wise for packaging doesn't impact the hadoop-cloud-storage module and any downstream uses. - separate profile for each component to pull in all dependencies - hadoop-azure is always included, hadoop-aws *except* bundle.jar - hadoop-gcp and hadoop-tos are complete iff shaded - updated BUILDING.txt This is enough to let anyone cut a release with their choice of functional cloud connectors. --- BUILDING.txt | 45 ++++++++++ LICENSE-binary | 7 +- dev-support/bin/dist-layout-stitching | 2 +- .../hadoop-cloud-storage-dist/pom.xml | 83 +++++++++++++------ .../hadoop-tos/pom.xml | 4 +- hadoop-project/pom.xml | 6 +- 6 files changed, 111 insertions(+), 36 deletions(-) diff --git a/BUILDING.txt b/BUILDING.txt index f87cc8d11ead2..6eb32e50b8c64 100644 --- a/BUILDING.txt +++ b/BUILDING.txt @@ -388,6 +388,51 @@ Create a local staging version of the website (in /tmp/hadoop-site) Note that the site needs to be built in a second pass after other artifacts. +---------------------------------------------------------------------------------- +Including Cloud Connector Dependencies in Distributions: + +Hadoop distributions include the hadoop modules to work with data and services +on cloud infrastructure + +However, dependencies are omitted for all cloud connectors except hadoop-azure +(abfs:// and wasb://) and possibly hadoop-gcp (gs://) and hadoop-tos (tos://). +For the latter two modules, it depends on shading options. + +For hadoop-aws the AWS SDK bundle.jar omitted, but everything else is included. + +* This keeps binary release size below the limit of apache distributions +* Reduces download and size overhead in docker usage. +* Reduces the CVE attack surface +* Reduces the risk of classpath conflict. + +To produce a build with the specific desired dependencies, the build must be executed +with the relevant profile of ${module}-package. + +For example, a build with the hadoop-aws and hadoop-azure-datalake dependencies, +run with + + mvn package -Pdist -DskipTests -Dhadoop-aws-package -Dhadoop-azure-datalake-package + +Available package profiles: + hadoop-aliyun-package + hadoop-aws-package + hadoop-azure-datalake-package + hadoop-cos-package + hadoop-gcp-package + hadoop-huaweicloud-package + hadoop-tos-package + +To build a complete distribution then with all cloud dependencies included: + +mvn package -Pdist -DskipTests -Dtar -Dmaven.javadoc.skip=true \ + -Phadoop-aliyun-package,hadoop-aws-package,hadoop-azure-datalake-package\ + -Phadoop-cos-package,hadoop-gcp-package,hadoop-tos-package + +The resulting tar file will be too large to be distributable through ASF infrastructure. + +The hadoop-gcp and hadoop-tos artifacts include their dependencies unless the distribution +is built with -DskipShade. + ---------------------------------------------------------------------------------- Installing Hadoop diff --git a/LICENSE-binary b/LICENSE-binary index 15febb07c48da..d0ef57d57f7b0 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -214,11 +214,12 @@ hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/data hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/checker/TimeoutFuture.java ch.qos.reload4j:reload4j:1.2.22 +com.aliyun:aliyun-java-core:0.2.11-beta com.aliyun:aliyun-java-sdk-core:4.5.10 com.aliyun:aliyun-java-sdk-kms:2.11.0 com.aliyun:aliyun-java-sdk-ram:3.1.0 com.aliyun:aliyun-java-sdk-sts:3.0.0 -com.aliyun:java-trace-api:0.2.11-beta.jar +com.aliyun:java-trace-api:0.2.11-beta com.aliyun.oss:aliyun-sdk-oss:3.13.2 com.cedarsoftware:java-util:1.9.0 com.cedarsoftware:json-io:2.5.1 @@ -274,7 +275,7 @@ com.google.j2objc:j2objc-annotations:3.0.0 com.google.oauth-client:google-oauth-client:1.37.0 com.huaweicloud:esdk-obs-java:3.20.4.2 com.jamesmurty.utils:java-xmlbuilder-1.2.jar -com.microsoft.azure:azure-storage:7.0.0 +com.microsoft.azure:azure-storage:7.0.1 com.nimbusds:nimbus-jose-jwt:10.4 com.squareup.okhttp3:okhttp:jar:3.14.2 com.squareup.okio:okio:jar:1.17.2 @@ -556,7 +557,7 @@ com.microsoft.azure:azure-cosmosdb:2.4.5 com.microsoft.azure:azure-cosmosdb-commons:2.4.5 com.microsoft.azure:azure-cosmosdb-direct:2.4.5 com.microsoft.azure:azure-cosmosdb-gateway:2.4.5 -com.microsoft.azure:azure-data-lake-store-sdk:2.3.3 +com.microsoft.azure:azure-data-lake-store-sdk:2.3.9 com.microsoft.azure:azure-keyvault-core:1.0.0 com.microsoft.sqlserver:mssql-jdbc:6.2.1.jre7 org.bouncycastle:bcpkix-jdk18on:1.82 diff --git a/dev-support/bin/dist-layout-stitching b/dev-support/bin/dist-layout-stitching index fee6b92219d4f..e19b2e4f1d621 100755 --- a/dev-support/bin/dist-layout-stitching +++ b/dev-support/bin/dist-layout-stitching @@ -132,7 +132,7 @@ run copy "${ROOT}/hadoop-common-project/hadoop-nfs/target/hadoop-nfs-${VERSION}" run copy "${ROOT}/hadoop-common-project/hadoop-registry/target/hadoop-registry-${VERSION}" . # cloud connectors go into common -run copy "${ROOT}/hadoop-cloud-storage-project/hadoop-cloud-storage/target/hadoop-cloud-storage-${VERSION}" . +run copy "${ROOT}/hadoop-cloud-storage-project/hadoop-cloud-storage-dist/target/hadoop-cloud-storage-dist-${VERSION}" . run copy "${ROOT}/hadoop-hdfs-project/hadoop-hdfs/target/hadoop-hdfs-${VERSION}" . run copy "${ROOT}/hadoop-hdfs-project/hadoop-hdfs-nfs/target/hadoop-hdfs-nfs-${VERSION}" . diff --git a/hadoop-cloud-storage-project/hadoop-cloud-storage-dist/pom.xml b/hadoop-cloud-storage-project/hadoop-cloud-storage-dist/pom.xml index 7f641ec0cedff..74a282e4b887f 100644 --- a/hadoop-cloud-storage-project/hadoop-cloud-storage-dist/pom.xml +++ b/hadoop-cloud-storage-project/hadoop-cloud-storage-dist/pom.xml @@ -37,15 +37,34 @@ the Jars. By default, while hadoop-* artifacts are all included, dependencies - are omitted for nearly everything. - * keeps size down - * keeps CVE attack surface down + are omitted for all cloud connectors except hadoop-azure and + possibly hadoop-gcs and hadoop-tos modules. + For hadoop-aws the AWS SDK bundle.jar omitted, but everything else is included. + + * This keeps binary release size below the limit of apache distributions + * Reduces download and size overhead in docker usage. + * Reduces the CVE attack surface + * Reduces the risk of classpath conflict. + To produce a build with the specific desired dependencies, the build must be executed - with the relevant profile of ${module}-dependencies. + with the relevant profile of ${module}-package. For example, a build with the hadoop-aws and hadoop-azure-datalake dependencies, - run with - -Phadoop-aws-dependencies -Phadoop-azure-datalake-dependencies + build with -Dhadoop-aws-package -Dhadoop-azure-datalake-package + + Available package profiles: + hadoop-aliyun-package + hadoop-aws-package + hadoop-azure-datalake-package + hadoop-cos-package + hadoop-gcp-package + hadoop-huaweicloud-package + hadoop-tos-package + + To build a complete distribution then + mvn package -Pdist -DskipTests -Phadoop-aliyun-package,hadoop-aws-package,hadoop-azure-datalake-package\ + -Phadoop-cos-package,hadoop-gcp-package,hadoop-tos-package + --> cloud-storage @@ -130,10 +149,6 @@ org.apache.hadoop hadoop-gcp compile - * @@ -202,12 +217,11 @@ - - hadoop-aliyun-dependencies + hadoop-aliyun-package - false + hadoop-aliyun-package @@ -220,9 +234,9 @@ - hadoop-aws-dependencies + hadoop-aws-package - false + hadoop-aws-package @@ -233,31 +247,31 @@ - + - hadoop-cos-dependencies + hadoop-azure-datalake-package - false + hadoop-azure-datalake-package org.apache.hadoop - hadoop-cos + hadoop-azure-datalake compile - + - hadoop-azure-datalake-dependencies + hadoop-cos-package - false + hadoop-cos-package org.apache.hadoop - hadoop-azure-datalake + hadoop-cos compile @@ -265,9 +279,9 @@ - hadoop-huaweicloud-dependencies + hadoop-huaweicloud-package - false + hadoop-huaweicloud-package @@ -284,11 +298,26 @@ + + + hadoop-gcp-package + + hadoop-gcp-package + + + + org.apache.hadoop + hadoop-gcp + compile + + + + - hadoop-tos-dependencies + hadoop-tos-package - false + hadoop-tos-package diff --git a/hadoop-cloud-storage-project/hadoop-tos/pom.xml b/hadoop-cloud-storage-project/hadoop-tos/pom.xml index 63f63653da758..4495189da8fba 100644 --- a/hadoop-cloud-storage-project/hadoop-tos/pom.xml +++ b/hadoop-cloud-storage-project/hadoop-tos/pom.xml @@ -231,9 +231,9 @@ - shade-tos + shade - false + !skipShade diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index b2538f48d78e2..a53f1ed880798 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -2181,9 +2181,9 @@ 2.4.4 - com.google.cloud - google-cloud-storage - 2.52.0 + com.google.cloud + google-cloud-storage + 2.52.0 From 5cc9a3d5b8470cd1f59c979ba197e246ac29ee94 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Thu, 9 Oct 2025 20:10:28 +0100 Subject: [PATCH 06/10] HADOOP-19696. change docs to use -D over -P --- BUILDING.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/BUILDING.txt b/BUILDING.txt index 6eb32e50b8c64..cdc3b15dd6929 100644 --- a/BUILDING.txt +++ b/BUILDING.txt @@ -425,8 +425,8 @@ Available package profiles: To build a complete distribution then with all cloud dependencies included: mvn package -Pdist -DskipTests -Dtar -Dmaven.javadoc.skip=true \ - -Phadoop-aliyun-package,hadoop-aws-package,hadoop-azure-datalake-package\ - -Phadoop-cos-package,hadoop-gcp-package,hadoop-tos-package + -Dhadoop-aliyun-package -Dhadoop-aws-package -Dhadoop-azure-datalake-package\ + -Dhadoop-cos-package -Dhadoop-gcp-package -Dhadoop-tos-package The resulting tar file will be too large to be distributable through ASF infrastructure. From 705064155d9f7584803104116eea0ec62b13528e Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Tue, 14 Oct 2025 11:59:33 +0100 Subject: [PATCH 07/10] HADOOP-19696 remove hadoop-gcp service declaration; core-default.xml works --- .../services/org.apache.hadoop.fs.FileSystem | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 hadoop-tools/hadoop-gcp/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem diff --git a/hadoop-tools/hadoop-gcp/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem b/hadoop-tools/hadoop-gcp/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem deleted file mode 100644 index a727523db3c7a..0000000000000 --- a/hadoop-tools/hadoop-gcp/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem +++ /dev/null @@ -1,16 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -org.apache.hadoop.fs.gs.GoogleHadoopFileSystem From 6a62615ec2591df01c8af75bd0cc48721cc3907d Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Tue, 14 Oct 2025 12:05:05 +0100 Subject: [PATCH 08/10] HADOOP-19696 remove hadoop-huaweicloud service declaration; use core-default.xml --- .../services/org.apache.hadoop.fs.FileSystem | 16 ---------------- .../src/main/resources/core-default.xml | 6 ++++++ .../conf/TestCommonConfigurationFields.java | 2 ++ 3 files changed, 8 insertions(+), 16 deletions(-) delete mode 100644 hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem diff --git a/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem deleted file mode 100644 index e77425ab52989..0000000000000 --- a/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem +++ /dev/null @@ -1,16 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -org.apache.hadoop.fs.obs.OBSFileSystem diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index 76664ea72d277..3c14cda56844f 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -4695,4 +4695,10 @@ The switch to turn S3A auditing on or off. + + fs.obs.impl + org.apache.hadoop.fs.obs.OBSFileSystem + The FileSystem for obs: uris. + + diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestCommonConfigurationFields.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestCommonConfigurationFields.java index 5dd4d4702a760..bdbaa2438979a 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestCommonConfigurationFields.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestCommonConfigurationFields.java @@ -257,5 +257,7 @@ public void initializeMemberVariables() { xmlPropsToSkipCompare.add("io.seqfile.local.dir"); xmlPropsToSkipCompare.add("hadoop.http.sni.host.check.enabled"); + + xmlPrefixToSkipCompare.add("fs.obs."); } } From 3a0cecb493d2105d5e12b0ebf80cabc2c1142325 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Tue, 14 Oct 2025 16:54:44 +0100 Subject: [PATCH 09/10] HADOOP-19696. improve docs. --- BUILDING.txt | 21 ++++++++++++------- .../hadoop-cloud-storage-dist/pom.xml | 13 +++++++++--- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/BUILDING.txt b/BUILDING.txt index cdc3b15dd6929..674bf01bf2a21 100644 --- a/BUILDING.txt +++ b/BUILDING.txt @@ -391,16 +391,18 @@ Note that the site needs to be built in a second pass after other artifacts. ---------------------------------------------------------------------------------- Including Cloud Connector Dependencies in Distributions: -Hadoop distributions include the hadoop modules to work with data and services +Hadoop distributions include the hadoop modules need to work with data and services on cloud infrastructure However, dependencies are omitted for all cloud connectors except hadoop-azure (abfs:// and wasb://) and possibly hadoop-gcp (gs://) and hadoop-tos (tos://). For the latter two modules, it depends on shading options. -For hadoop-aws the AWS SDK bundle.jar omitted, but everything else is included. +For hadoop-aws the AWS SDK bundle.jar is omitted, but everything else is included. -* This keeps binary release size below the limit of apache distributions + +Excluding the extra binaries: +* Keeps release artifact size below the limit of the ASF distribution network * Reduces download and size overhead in docker usage. * Reduces the CVE attack surface * Reduces the risk of classpath conflict. @@ -425,13 +427,18 @@ Available package profiles: To build a complete distribution then with all cloud dependencies included: mvn package -Pdist -DskipTests -Dtar -Dmaven.javadoc.skip=true \ - -Dhadoop-aliyun-package -Dhadoop-aws-package -Dhadoop-azure-datalake-package\ - -Dhadoop-cos-package -Dhadoop-gcp-package -Dhadoop-tos-package + -Dhadoop-aliyun-package \ + -Dhadoop-aws-package \ + -Dhadoop-azure-datalake-package \ + -Dhadoop-cos-package \ + -Dhadoop-gcp-package \ + -Dhadoop-huaweicloud-package \ + -Dhadoop-tos-package The resulting tar file will be too large to be distributable through ASF infrastructure. -The hadoop-gcp and hadoop-tos artifacts include their dependencies unless the distribution -is built with -DskipShade. +The hadoop-gcp and hadoop-tos artifacts include their dependencies as shaded +artifacts unless the distribution is built with -DskipShade. ---------------------------------------------------------------------------------- Installing Hadoop diff --git a/hadoop-cloud-storage-project/hadoop-cloud-storage-dist/pom.xml b/hadoop-cloud-storage-project/hadoop-cloud-storage-dist/pom.xml index 74a282e4b887f..46e7595b4215e 100644 --- a/hadoop-cloud-storage-project/hadoop-cloud-storage-dist/pom.xml +++ b/hadoop-cloud-storage-project/hadoop-cloud-storage-dist/pom.xml @@ -61,9 +61,16 @@ hadoop-huaweicloud-package hadoop-tos-package - To build a complete distribution then - mvn package -Pdist -DskipTests -Phadoop-aliyun-package,hadoop-aws-package,hadoop-azure-datalake-package\ - -Phadoop-cos-package,hadoop-gcp-package,hadoop-tos-package + To build a complete distribution then: + +mvn package -Pdist -DskipTests -Dtar -Dmaven.javadoc.skip=true \ + -Dhadoop-aliyun-package \ + -Dhadoop-aws-package \ + -Dhadoop-azure-datalake-package \ + -Dhadoop-cos-package \ + -Dhadoop-gcp-package \ + -Dhadoop-huaweicloud-package \ + -Dhadoop-tos-package --> From e39cd338876b40caf949638150c9e2a34bda2d68 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Tue, 21 Oct 2025 11:55:06 +0100 Subject: [PATCH 10/10] HADOOP-19696. PR Feedback The hadoop-cloud-storage-dist module is now only executed when the dist profile is set. --- BUILDING.txt | 9 ++++----- hadoop-cloud-storage-project/pom.xml | 16 ++++++++++++++-- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/BUILDING.txt b/BUILDING.txt index 674bf01bf2a21..4aa4f89553b25 100644 --- a/BUILDING.txt +++ b/BUILDING.txt @@ -391,7 +391,7 @@ Note that the site needs to be built in a second pass after other artifacts. ---------------------------------------------------------------------------------- Including Cloud Connector Dependencies in Distributions: -Hadoop distributions include the hadoop modules need to work with data and services +Hadoop distributions include the hadoop modules needed to work with data and services on cloud infrastructure However, dependencies are omitted for all cloud connectors except hadoop-azure @@ -400,15 +400,14 @@ For the latter two modules, it depends on shading options. For hadoop-aws the AWS SDK bundle.jar is omitted, but everything else is included. - Excluding the extra binaries: -* Keeps release artifact size below the limit of the ASF distribution network +* Keeps release artifact size below the limit of the ASF distribution network. * Reduces download and size overhead in docker usage. -* Reduces the CVE attack surface +* Reduces the CVE attack surface and audit-related complaints about those same ScVES. * Reduces the risk of classpath conflict. To produce a build with the specific desired dependencies, the build must be executed -with the relevant profile of ${module}-package. +with the relevant profile of ${module}-package alongside the -Pdist profile. For example, a build with the hadoop-aws and hadoop-azure-datalake dependencies, run with diff --git a/hadoop-cloud-storage-project/pom.xml b/hadoop-cloud-storage-project/pom.xml index 93da918fb9bc4..19dade2f0ffde 100644 --- a/hadoop-cloud-storage-project/pom.xml +++ b/hadoop-cloud-storage-project/pom.xml @@ -34,7 +34,6 @@ hadoop-cos hadoop-huaweicloud hadoop-tos - hadoop-cloud-storage-dist @@ -53,5 +52,18 @@ - + + + dist + + false + + tar + + + + hadoop-cloud-storage-dist + + +