Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 78 additions & 0 deletions 3.2.0/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
FROM ubuntu:23.10


####################
# JAVA
####################

ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64

RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y openjdk-8-jdk && \
rm -rf /var/lib/apt/lists/*



####################
# HADOOP
####################

ENV HADOOP_VERSION 3.2.0
ENV HADOOP_HOME /usr/local/hadoop
ENV HADOOP_OPTS -Djava.library.path=/usr/local/hadoop/lib/native
ENV PATH $PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

ENV YARN_RESOURCEMANAGER_USER root
#ENV HADOOP_SECURE_DN_USER root
ENV YARN_NODEMANAGER_USER root

RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y wget libzip4 libsnappy1v5 libssl-dev && \
wget http://archive.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz && \
apt-get remove -y wget && \
rm -rf /var/lib/apt/lists/* && \
tar -zxf /hadoop-$HADOOP_VERSION.tar.gz && \
rm /hadoop-$HADOOP_VERSION.tar.gz && \
mv hadoop-$HADOOP_VERSION /usr/local/hadoop && \
mkdir -p /usr/local/hadoop/logs


# Overwrite default HADOOP configuration files with our config files
COPY conf $HADOOP_HOME/etc/hadoop/

# Formatting HDFS
RUN mkdir -p /data/dfs/data /data/dfs/name /data/dfs/namesecondary && \
hdfs namenode -format
VOLUME /data


# Helper script for starting YARN
ADD start-yarn.sh /usr/local/bin/start-yarn.sh



####################
# PORTS
####################
#
# http://docs.hortonworks.com/HDPDocuments/HDP2/HDP-2.3.0/bk_HDP_Reference_Guide/content/reference_chap2.html
# http://www.cloudera.com/content/cloudera/en/documentation/core/latest/topics/cdh_ig_ports_cdh5.html
# http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/core-default.xml
# http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/hdfs-default.xml

# HDFS: NameNode (NN):
# 9820 = fs.defaultFS (IPC / File system metadata operations)
# (9000 is also frequently used alternatively)
# 9871 = dfs.namenode.https-address (HTTPS / Secure UI)
# 9870 = dfs.namenode.https-address (HTTPS / Secure UI)
# HDFS: DataNode (DN):
# 9866 = dfs.datanode.address (Data transfer)
# 9867 = dfs.datanode.ipc.address (IPC / metadata operations)
# 9864 = dfs.datanode.https.address (HTTPS / Secure UI)
# HDFS: Secondary NameNode (SNN)
# 9868 = dfs.secondary.http.address (HTTP / Checkpoint for NameNode metadata)
EXPOSE 9000 9870 9866 9867 9864 9868 8088



CMD ["hdfs"]
24 changes: 24 additions & 0 deletions 3.2.0/conf/core-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://hdfs-namenode:9000</value>
</property>
</configuration>
40 changes: 40 additions & 0 deletions 3.2.0/conf/hdfs-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///data/dfs/data</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///data/dfs/name</value>
</property>
<property>
<name>dfs.namenode.checkpoint.dir</name>
<value>file:///data/dfs/namesecondary </value>
</property>
<property>
<name>dfs.namenode.datanode.registration.ip-hostname-check</name>
<value>false</value>
</property>
</configuration>
24 changes: 24 additions & 0 deletions 3.2.0/conf/mapred-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
29 changes: 29 additions & 0 deletions 3.2.0/conf/yarn-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>hdfs-namenode</value>
</property>

</configuration>
31 changes: 31 additions & 0 deletions 3.2.0/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
version: "3.7"
services:
namenode:
image: cjj2010/hadoop:3.2.0
ports:
- "9870:9870"
- "8088:8088"
command: >
bash -c "hdfs namenode & yarn resourcemanager "
hostname: hdfs-namenode

datanode:
image: cjj2010/hadoop:3.2.0
depends_on:
- namenode
command: >
bash -c "hdfs datanode & yarn nodemanager "
ports:
# The host port is randomly assigned by Docker, to allow scaling to multiple DataNodes on the same host
- "9864"
links:
- namenode:hdfs-namenode


secondarynamenode:
image: cjj2010/hadoop:3.2.0
command: hdfs secondarynamenode
ports:
- "9868:9868"
links:
- namenode:hdfs-namenode
4 changes: 4 additions & 0 deletions 3.2.0/start-yarn.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash

yarn resourcemanager&
yarn nodemanager