diff --git a/3.2.0/Dockerfile b/3.2.0/Dockerfile new file mode 100644 index 0000000..59c4cc8 --- /dev/null +++ b/3.2.0/Dockerfile @@ -0,0 +1,78 @@ +FROM ubuntu:23.10 + + +#################### +# JAVA +#################### + +ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64 + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y openjdk-8-jdk && \ + rm -rf /var/lib/apt/lists/* + + + +#################### +# HADOOP +#################### + +ENV HADOOP_VERSION 3.2.0 +ENV HADOOP_HOME /usr/local/hadoop +ENV HADOOP_OPTS -Djava.library.path=/usr/local/hadoop/lib/native +ENV PATH $PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin + +ENV YARN_RESOURCEMANAGER_USER root +#ENV HADOOP_SECURE_DN_USER root +ENV YARN_NODEMANAGER_USER root + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y wget libzip4 libsnappy1v5 libssl-dev && \ + wget http://archive.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz && \ + apt-get remove -y wget && \ + rm -rf /var/lib/apt/lists/* && \ + tar -zxf /hadoop-$HADOOP_VERSION.tar.gz && \ + rm /hadoop-$HADOOP_VERSION.tar.gz && \ + mv hadoop-$HADOOP_VERSION /usr/local/hadoop && \ + mkdir -p /usr/local/hadoop/logs + + +# Overwrite default HADOOP configuration files with our config files +COPY conf $HADOOP_HOME/etc/hadoop/ + +# Formatting HDFS +RUN mkdir -p /data/dfs/data /data/dfs/name /data/dfs/namesecondary && \ + hdfs namenode -format +VOLUME /data + + +# Helper script for starting YARN +ADD start-yarn.sh /usr/local/bin/start-yarn.sh + + + +#################### +# PORTS +#################### +# +# http://docs.hortonworks.com/HDPDocuments/HDP2/HDP-2.3.0/bk_HDP_Reference_Guide/content/reference_chap2.html +# http://www.cloudera.com/content/cloudera/en/documentation/core/latest/topics/cdh_ig_ports_cdh5.html +# http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/core-default.xml +# http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/hdfs-default.xml + +# HDFS: NameNode (NN): +# 9820 = fs.defaultFS (IPC / File system metadata operations) +# (9000 is also frequently used alternatively) +# 9871 = dfs.namenode.https-address (HTTPS / Secure UI) +# 9870 = dfs.namenode.https-address (HTTPS / Secure UI) +# HDFS: DataNode (DN): +# 9866 = dfs.datanode.address (Data transfer) +# 9867 = dfs.datanode.ipc.address (IPC / metadata operations) +# 9864 = dfs.datanode.https.address (HTTPS / Secure UI) +# HDFS: Secondary NameNode (SNN) +# 9868 = dfs.secondary.http.address (HTTP / Checkpoint for NameNode metadata) +EXPOSE 9000 9870 9866 9867 9864 9868 8088 + + + +CMD ["hdfs"] diff --git a/3.2.0/conf/core-site.xml b/3.2.0/conf/core-site.xml new file mode 100644 index 0000000..e39c33b --- /dev/null +++ b/3.2.0/conf/core-site.xml @@ -0,0 +1,24 @@ + + + + + + + + + fs.default.name + hdfs://hdfs-namenode:9000 + + diff --git a/3.2.0/conf/hdfs-site.xml b/3.2.0/conf/hdfs-site.xml new file mode 100644 index 0000000..f23376d --- /dev/null +++ b/3.2.0/conf/hdfs-site.xml @@ -0,0 +1,40 @@ + + + + + + + + + dfs.replication + 2 + + + dfs.datanode.data.dir + file:///data/dfs/data + + + dfs.namenode.name.dir + file:///data/dfs/name + + + dfs.namenode.checkpoint.dir + file:///data/dfs/namesecondary + + + dfs.namenode.datanode.registration.ip-hostname-check + false + + diff --git a/3.2.0/conf/mapred-site.xml b/3.2.0/conf/mapred-site.xml new file mode 100644 index 0000000..85078af --- /dev/null +++ b/3.2.0/conf/mapred-site.xml @@ -0,0 +1,24 @@ + + + + + + + + + mapreduce.framework.name + yarn + + diff --git a/3.2.0/conf/yarn-site.xml b/3.2.0/conf/yarn-site.xml new file mode 100644 index 0000000..14757ee --- /dev/null +++ b/3.2.0/conf/yarn-site.xml @@ -0,0 +1,29 @@ + + + + + yarn.nodemanager.aux-services + mapreduce_shuffle + + + yarn.nodemanager.aux-services.mapreduce.shuffle.class + org.apache.hadoop.mapred.ShuffleHandler + + + yarn.resourcemanager.hostname + hdfs-namenode + + + diff --git a/3.2.0/docker-compose.yml b/3.2.0/docker-compose.yml new file mode 100644 index 0000000..6a03880 --- /dev/null +++ b/3.2.0/docker-compose.yml @@ -0,0 +1,31 @@ +version: "3.7" +services: + namenode: + image: cjj2010/hadoop:3.2.0 + ports: + - "9870:9870" + - "8088:8088" + command: > + bash -c "hdfs namenode & yarn resourcemanager " + hostname: hdfs-namenode + + datanode: + image: cjj2010/hadoop:3.2.0 + depends_on: + - namenode + command: > + bash -c "hdfs datanode & yarn nodemanager " + ports: +# The host port is randomly assigned by Docker, to allow scaling to multiple DataNodes on the same host + - "9864" + links: + - namenode:hdfs-namenode + + + secondarynamenode: + image: cjj2010/hadoop:3.2.0 + command: hdfs secondarynamenode + ports: + - "9868:9868" + links: + - namenode:hdfs-namenode diff --git a/3.2.0/start-yarn.sh b/3.2.0/start-yarn.sh new file mode 100644 index 0000000..b17aa8b --- /dev/null +++ b/3.2.0/start-yarn.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +yarn resourcemanager& +yarn nodemanager