1. 依赖

该教程依赖 Zookeeper 集群,请先查看:

zookeeper 集群搭建

2. 下载与安装

下载 hadoop,这里选择 3.3.4

点击前往下载

上传文件到目录 /opt/software 后解压

tar -zxvf /opt/software/hadoop-3.3.4.tar.gz -C /opt/module

3. HDFS 高可用

3.1. 配置

配置文件所在路径:/opt/module/hadoop-3.3.4/etc/hadoop

3.1.1. hadoop-env.sh

在文档末尾加入一下内容,这里由于是测试环境,内存给的比较小

# 配置 JAVA 程序的堆内存,相当于 Xmx 和 Xms,单位默认为 MB
export HADOOP_HEAPSIZE_MAX=64
export HADOOP_HEAPSIZE_MIN=64

# 设置启动用户为 ROOT,如果不使用 ROOT 启动可以不配置
HDFS_NAMENODE_USER=root
HDFS_DATANODE_USER=root
HDFS_JOURNALNODE_USER=root
HDFS_ZKFC_USER=root
YARN_RESOURCEMANAGER_USER=root
YARN_NODEMANAGER_USER=root

3.1.2. core-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>

    <!-- 指定 zookeeper 的连接地址 -->
    <property>
        <name>ha.zookeeper.quorum</name>
        <value>mitchell-101:2181,mitchell-102:2181,mitchell-103:2181</value>
    </property> 

    <!-- NameNode 集群路径 -->
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://mitchell</value>
    </property>
    
    <!-- 数据储存路径 -->
    <property>
        <name>hadoop.tmp.dir</name>
        <value>/opt/module/hadoop-3.3.4/data</value>
    </property>

    <!-- 配置 HDFS 网页登录使用的静态用户 -->
    <property>
        <name>hadoop.http.staticuser.user</name>
        <value>mitchell</value>
    </property>
    
    <!-- 最大连接重试次数,值太小启动集群时 namenode 可能会因为连接不上而掉线 -->
    <property>
        <name>ipc.client.connect.max.retries</name>
        <value>100</value>
    </property>
    
    <!-- 每次连接的等待时间(毫秒) -->
    <property>
        <name>ipc.client.connect.retry.interval</name>
        <value>10000</value>
    </property>

</configuration>

3.1.3. hdfs-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
    
    <!-- 分布式集群名称 -->
    <property>
        <name>dfs.nameservices</name>
        <value>mitchell</value>
    </property>
    
    <!-- NameNode 集群节点 -->
    <property>
        <name>dfs.ha.namenodes.mitchell</name>
        <value>nn1,nn2,nn3</value>
    </property>
    
    <!-- NameNode 集群通信地址 -->
    <property>
        <name>dfs.namenode.rpc-address.mitchell.nn1</name>
        <value>mitchell-101:8020</value>
    </property>
    <property>
        <name>dfs.namenode.rpc-address.mitchell.nn2</name>
        <value>mitchell-102:8020</value>
    </property>    
    <property>
        <name>dfs.namenode.rpc-address.mitchell.nn3</name>
        <value>mitchell-103:8020</value>
    </property>
    
     <!-- NameNode HTTP 通信地址 -->
    <property>
        <name>dfs.namenode.http-address.mitchell.nn1</name>
        <value>mitchell-101:9870</value>
    </property>
    <property>
        <name>dfs.namenode.http-address.mitchell.nn2</name>
        <value>mitchell-102:9870</value>
    </property>    
    <property>
        <name>dfs.namenode.http-address.mitchell.nn3</name>
        <value>mitchell-103:9870</value>
    </property>
    
    <!-- JournalNode 地址 -->
    <property>
        <name>dfs.namenode.shared.edits.dir</name>
        <value>qjournal://mitchell-101:8485;mitchell-102:8485;mitchell-103:8485/cluster</value>
    </property>
    
    <!-- NameNode 存储名称空间和事务日志的本地文件系统上的路径 -->
    <property>
        <name>dfs.namenode.name.dir</name>
        <value>${hadoop.tmp.dir}/namenode</value>
    </property>
    
    <!-- DataNode 存储名称空间和事务日志的本地文件系统上的路径  -->
    <property>
        <name>dfs.datanode.data.dir</name>
        <value>${hadoop.tmp.dir}/datanode</value>
    </property>
    
    <!-- JournalNode 在本地磁盘存放数据的位置 -->
    <property>
        <name>dfs.journalnode.edits.dir</name>
        <value>${hadoop.tmp.dir}/journalnode</value>
    </property>    
    
    <!-- 开启NameNode失败自动切换 -->
    <property>
        <name>dfs.ha.automatic-failover.enabled</name>
        <value>true</value>
    </property>
    
    <!-- 指定该集群出故障时,哪个实现类负责执行故障切换 -->
    <property>
        <name>dfs.client.failover.proxy.provider.mycluster</name>
        <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
    </property>

    <!-- 配置隔离机制方法-->
    <property>
        <name>dfs.ha.fencing.methods</name>
        <value>sshfence</value>
    </property>
    
    <!-- 使用 sshfence 隔离机制时需要ssh免登陆 -->
    <property>
        <name>dfs.ha.fencing.ssh.private-key-files</name>
        <value>/home/mitchell/.ssh/id_rsa</value>
    </property>
    
    <!-- 配置 sshfence 隔离机制超时时间 -->
    <property>
        <name>dfs.ha.fencing.ssh.connect-timeout</name>
        <value>30000</value>
    </property>
    
</configuration>

3.1.4. workers

mitchell-101
mitchell-102
mitchell-103

3.2. 启动和关闭

# 【101】分发 hadoop
xsync.sh /opt/module/hadoop-3.3.4

# 【101、102、103】启动 journalnode
./bin/hdfs --daemon start journalnode 

# 【101】格式化 namenode
./bin/hdfs namenode -format

# 【101】启动 namenode
./bin/hdfs --daemon start namenode

# 【102、103】服务器拉取原数据
./bin/hdfs namenode -bootstrapStandby

# 【101】初始化 zkfc,注意先启动 zookeeper 集群
./bin/hdfs zkfc -formatZK

# 【101】启动和关闭集群
./bin/start-dfs.sh
./bin/stop-dfs.sh

# 【101】查看集群是否启动成功
jpsall.sh

启停脚本:vim /opt/module/bin/mhadoop-hdfs.sh

#!/bin/bash

if [ $# -lt 1 ]
then
    echo "请输入参数 start 或者 stop"
    exit;
fi

case $1 in
"start")
    echo "=============================== 启动 hdfs 集群 ==============================="
    /opt/module/hadoop-3.3.4/sbin/start-dfs.sh
;;
"stop")
    echo "=============================== 关闭 hdfs 集群 ==============================="
    /opt/module/hadoop-3.3.4/sbin/stop-dfs.sh
;;
*)
	echo "请输入参数 start 或 stop"
;;
esac

3.3. 浏览器访问

其中会有一个节点为 active 状态,两个节点处于 standby 不启用状态

http://mitchell-101:9870

http://mitchell-102:9870

http://mitchell-103:9870

4. YARN 高可用

4.1. 配置

4.1.1. yarn-site

<?xml version="1.0"?>
<configuration>

    <!-- 指定 zookeeper 的连接地址 -->
    <property>
        <name>hadoop.zk.address</name>
        <value>mitchell-101:2181,mitchell-102:2181,mitchell-103:2181</value>
    </property> 

    <!-- 读取数据的方式 -->
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
    
     <!-- 启动 resource manager ha -->
    <property>
        <name>yarn.resourcemanager.ha.enabled</name>
        <value>true</value>
    </property>
    
    <!-- resource manager 集群 ID -->
    <property>
        <name>yarn.resourcemanager.cluster-id</name>
        <value>cluster-yarn</value>
    </property>
    
    <!-- resource manager 逻辑列表 -->
    <property>
        <name>yarn.resourcemanager.ha.rm-ids</name>
        <value>rm1,rm2,rm3</value>
    </property>

    <!-- resource manager 主机名 -->
    <property>
        <name>yarn.resourcemanager.hostname.rm1</name>
        <value>mitchell-101</value>
    </property>
    <property>
        <name>yarn.resourcemanager.hostname.rm2</name>
        <value>mitchell-102</value>
    </property>
    <property>
        <name>yarn.resourcemanager.hostname.rm3</name>
        <value>mitchell-103</value>
    </property>

    <!-- resource manager 内部通讯地址 -->
    <property>
        <name>yarn.resourcemanager.address.rm1</name>
        <value>mitchell-101:8032</value>
    </property>
    <property>
        <name>yarn.resourcemanager.address.rm2</name>
        <value>mitchell-102:8032</value>
    </property>
    <property>
        <name>yarn.resourcemanager.address.rm3</name>
        <value>mitchell-103:8032</value>
    </property>    

    <!-- resource manager web 地址 -->
    <property>
        <name>yarn.resourcemanager.webapp.address.rm1</name>
        <value>mitchell-101:8088</value>
    </property>
    <property>
        <name>yarn.resourcemanager.webapp.address.rm2</name>
        <value>mitchell-102:8088</value>
    </property>
    <property>
        <name>yarn.resourcemanager.webapp.address.rm3</name>
        <value>mitchell-103:8088</value>
    </property>    

    <!-- AM 申请资源的地址 -->
    <property>
        <name>yarn.resourcemanager.scheduler.address.rm1</name>
        <value>mitchell-101:8030</value>
    </property>
    <property>
        <name>yarn.resourcemanager.scheduler.address.rm2</name>
        <value>mitchell-102:8030</value>
    </property>
    <property>
        <name>yarn.resourcemanager.scheduler.address.rm3</name>
        <value>mitchell-103:8030</value>
    </property>
    
    <!-- NM 连接的地址 -->
    <property>
        <name>yarn.resourcemanager.resource-tracker.address.rm1</name>
        <value>mitchell-101:8031</value>
    </property>
    <property>
        <name>yarn.resourcemanager.resource-tracker.address.rm2</name>
        <value>mitchell-102:8031</value>
    </property>
    <property>
        <name>yarn.resourcemanager.resource-tracker.address.rm3</name>
        <value>mitchell-103:8031</value>
    </property>
    
    <!-- 启用自动故障转移 -->
    <property>
        <name>yarn.resourcemanager.recovery.enabled</name>
        <value>true</value>
    </property>
    
    <!-- 配置 RM 的状态信息存储方式,有 MemStore 和 ZKStore -->
    <property>
        <name>yarn.resourcemanager.store.class</name>                            
        <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
    </property>

</configuration>

4.2. 启动和关闭

# 分发配置文件
xsync /opt/module/hadoop-3.3.4/etc

# 启动和关闭
start-yarn.sh
stop-yarn.sh

启停脚本:vim /opt/module/bin/mhadoop-yarn.sh

#!/bin/bash

if [ $# -lt 1 ]
then
    echo "请输入参数 start 或者 stop"
    exit;
fi

case $1 in
"start")
    echo "=============================== 启动 yarn 集群 ==============================="
    /opt/module/hadoop-3.3.4/sbin/start-yarn.sh
;;
"stop")
    echo "=============================== 关闭 yarn 集群 ==============================="
    /opt/module/hadoop-3.3.4/sbin/stop-yarn.sh
;;
*)
	echo "请输入参数 start 或 stop"
;;
esac

4.3. 浏览器访问

http://mitchell-101:8088/

http://mitchell-102:8088/

http://mitchell-103:8088/

Logo

腾讯云面向开发者汇聚海量精品云计算使用和开发经验,营造开放的云计算技术生态圈。

更多推荐