一、Haddop2.7.7集群搭建

1.普通集群

一般生成环境下不会让在root下搭建,这里我模拟在testuser下安装hadoop的过程(一些需要root操作的步骤还是需要使用root权限,如防火墙,/etc/hosts文件修改,搭建NTP服务器等)

1.1搭建前准备

以下操作为root权限操作

##防火墙停止
systemctl stop firewalld
systemctl disable firewalld

##设置主机名
hostnamectl set-hostname master1 ###(192.168.56.101)
hostnamectl set-hostname node1  ###(192.168.56.102)
hostnamectl set-hostname node2  ###(192.168.56.103)

#修改/etc/hosts文件(三台机)
cat >> /etc/hosts << EOF
192.168.56.101 master1
192.168.56.102 node1
192.168.56.103 node2
EOF

下载文件

wget https://archive.apache.org/dist/hadoop/common/hadoop-2.7.7/hadoop-2.7.7.tar.gz

1.2搭建NTP服务器

以下操作为root权限操作
集群最后能将三台机的时间同步先做好,避免因时间不同步导致一些问题出现,因此这里先把三台机的NTP同步环境先弄好。

主ntp机器(192.168.56.101):

##主ntp机器(192.168.56.101)
yum install ntp ntpdate -y
systemctl status ntpd

#注释配置
sed -i 's/^server/#&/' /etc/ntp.conf 

cat >> /etc/ntp.conf << EOF
server 127.127.1.0 iburst
EOF

systemctl start ntpd
systemctl status ntpd
ntpq -p
systemctl enable ntpd

firewall-cmd --permanent --add-port=123/udp
firewall-cmd --reload

从ntp机器(192.168.56.102,192.168.56.103):

##客户端机器:
yum install ntp ntpdate -y
systemctl status ntpd

#注释配置
sed -i 's/^server/#&/' /etc/ntp.conf 

cat >> /etc/ntp.conf << EOF
server 192.168.56.101
restrict 192.168.56.101 nomodify notrap noquery
EOF

ntpdate -u 192.168.56.101

systemctl start ntpd
systemctl enable ntpd
ntpq -p

1.3ssh互信(免密码登录)

使用testuser进行安装,建立testuser账号的ssh互信,以下操作为在testuser下操作

1.##在节点(192.168.56.101)执行下面的命令:
ssh-keygen -t rsa -P '' #一路回车直到生成公钥

#从master1节点拷贝id_rsa.pub到node1主机上,并且改名为id_rsa.pub.master1,如果node1上.ssh目录不存在,就手工建一个
scp /home/testuser/.ssh/id_rsa.pub testuser@node1:/home/testuser/.ssh/id_rsa.pub.master1

#从master1节点拷贝id_rsa.pub到node2主机上,并且改名为id_rsa.pub.master1,如果node2上.ssh目录不存在,就手工建一个
scp /home/testuser/.ssh/id_rsa.pub testuser@node2:/home/testuser/.ssh/id_rsa.pub.master1

2.###在对应的主机下执行如下命令:
cat /home/testuser/.ssh/id_rsa.pub >> /home/testuser/.ssh/authorized_keys #master1主机
cat /home/testuser/.ssh/id_rsa.pub.master1 >> /home/testuser/.ssh/authorized_keys #node1,node2

3.###注意authorized_keys的权限问题
cd .ssh
chmod 600 authorized_keys

4.##测试
在master1上执行测试
ssh node1
ssh node2

1.4jdk安装

使用testuser进行安装

tar -xvf jdk-8u301-linux-x64.tar.gz -C /home/testuser

cat >> /home/testuser/.bashrc << EOF

export JAVA_HOME=/home/testuser/jdk1.8.0_301/
export PATH=\$JAVA_HOME/bin:\$PATH
export CLASSPATH=.:\$JAVA_HOME/lib/dt.jar:\$JAVA_HOME/lib/tools.jar
EOF

source /home/testuser/.bashrc

1.5Hadoop搭建(master1)

  1. 解压hadoop安装文件
tar -xvf hadoop-2.7.7.tar.gz -C /home/testuser
  1. 修改相应的配置文件
cat >> /home/testuser/.bashrc << EOF

#hadoop enviroment 
export HADOOP_HOME=/home/testuser/hadoop-2.7.7/
export PATH="\$HADOOP_HOME/bin:\$HADOOP_HOME/sbin:\$PATH"
export HADOOP_CONF_DIR=\$HADOOP_HOME/etc/hadoop
export YARN_CONF_DIR=\$HADOOP_HOME/etc/hadoop
EOF

source /home/testuser/.bashrc
  1. 修改相应的配置文件
sed -i 's/^export JAVA_HOME=${JAVA_HOME}/export JAVA_HOME=\/usr\/local\/jdk1.8.0_301\//' /home/testuser/hadoop-2.7.7/etc/hadoop/hadoop-env.sh

cat > /home/testuser/hadoop-2.7.7/etc/hadoop/slaves << EOF
node1
node2
EOF
cat > /home/testuser/hadoop-2.7.7/etc/hadoop/core-site.xml << EOF
<configuration>
        <property>
                <name>fs.defaultFS</name>
                <value>hdfs://master1:9000</value>
        </property>
        <property>
         <name>io.file.buffer.size</name>
         <value>131072</value>
       </property>
        <property>
                <name>hadoop.tmp.dir</name>
                <value>/home/testuser/hadoop-2.7.7/tmp</value>
        </property>
</configuration>
EOF
cat > /home/testuser/hadoop-2.7.7/etc/hadoop/hdfs-site.xml << EOF
<configuration>
    <property>
      <name>dfs.namenode.secondary.http-address</name>
      <value>master1:50090</value>
    </property>
    <property>
      <name>dfs.replication</name>
      <value>2</value>
    </property>
    <property>
      <name>dfs.namenode.name.dir</name>
      <value>file:/home/testuser/hadoop-2.7.7/hdfs/name</value>
    </property>
    <property>
      <name>dfs.datanode.data.dir</name>
      <value>file:/home/testuser/hadoop-2.7.7/hdfs/data</value>
    </property>
</configuration>
EOF
cat > /home/testuser/hadoop-2.7.7/etc/hadoop/mapred-site.xml << EOF
<configuration>
  <property>
    <name>mapreduce.framework.name</name>
    <value>yarn</value>
  </property>
  <property>
          <name>mapreduce.jobhistory.address</name>
          <value>master1:10020</value>
  </property>
  <property>
          <name>mapreduce.jobhistory.address</name>
          <value>master1:19888</value>
  </property>
</configuration>
EOF
cat > /home/testuser/hadoop-2.7.7/etc/hadoop/yarn-site.xml << EOF
<?xml version="1.0"?>
<configuration>

<!-- Site specific YARN configuration properties -->
     <property>
          <name>yarn.nodemanager.aux-services</name>
          <value>mapreduce_shuffle</value>
     </property>
     <property>
           <name>yarn.resourcemanager.address</name>
           <value>master1:8032</value>
     </property>
     <property>
          <name>yarn.resourcemanager.scheduler.address</name>
          <value>master1:8030</value>
      </property>
     <property>
         <name>yarn.resourcemanager.resource-tracker.address</name>
         <value>master1:8031</value>
     </property>
     <property>
         <name>yarn.resourcemanager.admin.address</name>
         <value>master1:8033</value>
     </property>
     <property>
         <name>yarn.resourcemanager.webapp.address</name>
         <value>master1:8088</value>
     </property>

</configuration>
EOF

  1. ####格式化一下namenode
####格式化一下namenode
hadoop namenode -format

5.###将文件传送至node1,nod2节点

scp -r /home/testuser/hadoop-2.7.7 testuser@node1:/home/testuser
scp -r /home/testuser/hadoop-2.7.7 testuser@node2:/home/testuser

1.6Hadoop搭建(node1,node2)

修改环境变量

cat >> /home/testuser/.bashrc << EOF

#hadoop enviroment 
export HADOOP_HOME=/home/testuser/hadoop-2.7.7/
export PATH="\$HADOOP_HOME/bin:\$HADOOP_HOME/sbin:\$PATH"
export HADOOP_CONF_DIR=\$HADOOP_HOME/etc/hadoop
export YARN_CONF_DIR=\$HADOOP_HOME/etc/hadoop
EOF

source /home/testuser/.bashrc

1.7Hadoop集群启动

####5.启动集群的脚本
/home/testuser/hadoop-2.7.7/sbin/start-all.sh

或
/home/testuser/hadoop-2.7.7/sbin/start-dfs.sh
/home/testuser/hadoop-2.7.7/sbin/start-yarn.sh

进程情况如下:

[testuser@master1 ~]$ jps
4257 SecondaryNameNode
4657 Jps
4406 ResourceManager
4105 NameNode

[testuser@node1 ~]$ jps
4176 NodeManager
4240 Jps
4085 DataNode

[testuser@node2 ~]$ jps
4113 Jps
4024 NodeManager
3932 DataNode
####测试(master1结点上)
cat >> wordcount.txt << EOF
Hello hadoop
hello spark
hello bigdata
EOF

hadoop fs -mkdir -p /Hadoop/Input
hadoop fs -put wordcount.txt /Hadoop/Input
hadoop jar /home/testuser/hadoop-2.7.7/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.7.jar wordcount /Hadoop/Input /Hadoop/Output
hadoop fs -cat /Hadoop/Output/*

####测试结果
[testuser@master1 ~]$ hadoop fs -cat /Hadoop/Output/*
Hello   1
bigdata 1
hadoop  1
hello   2
spark   1

hadoop集群搭建成功!

2.高可用HA集群

角色规划

master1:192.168.56.101
node1:192.168.56.102
node2:192.168.56.103

在这里插入图片描述

一般生成环境下不会让在root下搭建,这里我模拟在testuser下安装hadoop的过程(一些需要root操作的步骤还是需要使用root权限,如防火墙,/etc/hosts文件修改,搭建NTP服务器等)

2.1搭建前准备

以下操作为root权限操作

##防火墙停止
systemctl stop firewalld
systemctl disable firewalld

##设置主机名
hostnamectl set-hostname master1 ###(192.168.56.101)
hostnamectl set-hostname node1 ###(192.168.56.102)
hostnamectl set-hostname node2 ###(192.168.56.103)

#修改/etc/hosts文件(三台机)
cat >> /etc/hosts << EOF
192.168.56.101 master1
192.168.56.102 node1
192.168.56.103 node2
EOF

下载文件

wget https://dlcdn.apache.org/hadoop/common/hadoop-2.7.7/hadoop-2.7.7.tar.gz

2.2搭建NTP服务器

以下操作为root权限操作

集群最后能将三台机的时间同步先做好,避免因时间不同步导致一些问题出现,因此这里先把三台机的NTP同步环境先弄好。

主ntp机器(192.168.56.101):

##主ntp机器(192.168.56.101)
yum install ntp ntpdate -y
systemctl status ntpd

#注释配置
sed -i 's/^server/#&/' /etc/ntp.conf 

cat >> /etc/ntp.conf << EOF
server 127.127.1.0 iburst
EOF

systemctl start ntpd
systemctl status ntpd
ntpq -p
systemctl enable ntpd

firewall-cmd --permanent --add-port=123/udp
firewall-cmd --reload

从ntp机器(192.168.56.102,192.168.56.103):

##客户端机器:
yum install ntp ntpdate -y
systemctl status ntpd

#注释配置
sed -i 's/^server/#&/' /etc/ntp.conf 

cat >> /etc/ntp.conf << EOF
server 192.168.56.101
restrict 192.168.56.101 nomodify notrap noquery
EOF

ntpdate -u 192.168.56.101

systemctl start ntpd
systemctl enable ntpd
ntpq -p

2.3ssh互信(免密码登录)

使用testuser进行安装,建立testuser账号的ssh互信,以下操作为在testuser下操作

1.##在节点(192.168.56.101)执行下面的命令:
ssh-keygen -t rsa -P '' #一路回车直到生成公钥

#从master1节点拷贝id_rsa.pub到node1主机上,并且改名为id_rsa.pub.master1,如果node1上.ssh目录不存在,就手工建一个
scp /home/testuser/.ssh/id_rsa.pub testuser@node1:/home/testuser/.ssh/id_rsa.pub.master1

#从master1节点拷贝id_rsa.pub到node2主机上,并且改名为id_rsa.pub.master1,如果node2上.ssh目录不存在,就手工建一个
scp /home/testuser/.ssh/id_rsa.pub testuser@node2:/home/testuser/.ssh/id_rsa.pub.master1

2.###在对应的主机下执行如下命令:
cat /home/testuser/.ssh/id_rsa.pub >> /home/testuser/.ssh/authorized_keys #master1主机
cat /home/testuser/.ssh/id_rsa.pub.master1 >> /home/testuser/.ssh/authorized_keys #node1,node2

3.###注意authorized_keys的权限问题
cd .ssh
chmod 600 authorized_keys

4.##测试
在master1上执行测试
ssh node1
ssh node2

2.4jdk安装

tar -xvf jdk-8u301-linux-x64.tar.gz -C /home/testuser

cat >> /home/testuser/.bashrc << EOF

export JAVA_HOME=/home/testuser/jdk1.8.0_301/
export PATH=\$JAVA_HOME/bin:\$PATH
export CLASSPATH=.:\$JAVA_HOME/lib/dt.jar:\$JAVA_HOME/lib/tools.jar
EOF

source /home/testuser/.bashrc

2.5zookeepr集群搭建

wget 

tar xvf zookeeper-3.4.10.tar.gz -C /home/testuser/
mkdir /home/testuser/zookeeper-3.4.10/data
mkdir /home/testuser/zookeeper-3.4.10/logs

cat >> /home/testuser/zookeeper-3.4.10/conf/zoo.cfg << EOF
tickTime=2000
dataDir=/home/testuser/zookeeper-3.4.10/data
dataLogDir=/home/testuser/zookeeper-3.4.10/logs
clientPort=2181
initLimit=5
syncLimit=2
server.1=192.168.56.101:2888:3888
server.2=192.168.56.102:2888:3888
server.3=192.168.56.103:2888:3888
EOF

echo 1 > /home/testuser/zookeeper-3.4.10/data/myid  ##master1
echo 2 > /home/testuser/zookeeper-3.4.10/data/myid  ##node1
echo 3 > /home/testuser/zookeeper-3.4.10/data/myid  ##node2

启动---->>> /home/testuser/zookeeper-3.4.10/bin/zkServer.sh start
查看状态>>> /home/testuser/zookeeper-3.4.10/bin/zkServer.sh status

2.6Hadoop搭建(master1)

  1. 解压hadoop安装文件
tar -xvf hadoop-2.7.7.tar.gz -C /home/testuser
  1. 修改相应的配置文件
cat >> /home/testuser/.bashrc << EOF

#hadoop enviroment 
export HADOOP_HOME=/home/testuser/hadoop-2.7.7/
export PATH="\$HADOOP_HOME/bin:\$HADOOP_HOME/sbin:\$PATH"
export HADOOP_CONF_DIR=\$HADOOP_HOME/etc/hadoop
export YARN_CONF_DIR=\$HADOOP_HOME/etc/hadoop
EOF

source /home/testuser/.bashrc 
  1. 修改相应的配置文件
sed -i 's/^export JAVA_HOME=${JAVA_HOME}/export JAVA_HOME=\/home\/testuser\/jdk1.8.0_301\//' /home/testuser/hadoop-2.7.7/etc/hadoop/hadoop-env.sh
sed -i 's/^# export JAVA_HOME=.*/export JAVA_HOME=\/home\/testuser\/jdk1.8.0_301\//' /home/testuser/hadoop-2.7.7/etc/hadoop/mapred-env.sh
sed -i 's/^# export JAVA_HOME=.*/export JAVA_HOME=\/home\/testuser\/jdk1.8.0_301\//' /home/testuser/hadoop-2.7.7/etc/hadoop/yarn-env.sh


cat > /home/testuser/hadoop-2.7.7/etc/hadoop/slaves << EOF
node1
node2
EOF
cat > /home/testuser/hadoop-2.7.7/etc/hadoop/core-site.xml << EOF
<?xml version="1.0" encoding="utf-8"?>

<configuration> 
  <!--默认的HDFS路径-->  
  <property> 
    <name>fs.defaultFS</name>  
    <value>hdfs://mycluster</value> 
  </property>>  
  <property> 
    <name>io.file.buffer.size</name>  
    <value>131072</value> 
  </property>>  
  <!--默认的临时目录-->  
  <property> 
    <name>hadoop.tmp.dir</name>  
    <value>/home/testuser/hadoop-2.7.7/data/tmp</value> 
  </property>>
  <property> 
    <name>ha.zookeeper.quorum</name>  
    <value>master1:2181,node1:2181,node2:2181</value> 
  </property>> 
</configuration>
EOF
cat > /home/testuser/hadoop-2.7.7/etc/hadoop/hdfs-site.xml << EOF
<?xml version="1.0" encoding="utf-8"?>

<configuration> 
  <property> 
    <name>dfs.replication</name>  
    <value>2</value>
  </property>
  <property> 
    <name>dfs.permissions</name>  
    <value>false</value> 
  </property>
  <property> 
    <name>dfs.permissions.enabled</name>  
    <value>false</value> 
  </property>
  <property> 
    <name>dfs.nameservices</name>  
    <value>mycluster</value> 
  </property>  
  <!--指定nameservices是mycluster时的namenode有哪些,这里的值也是逻辑名称,名字随便取,相互不重复即可-->  
  <property> 
    <name>dfs.ha.namenodes.mycluster</name>  
    <value>nn1,nn2</value> 
  </property>  
  <property> 
    <name>dfs.namenode.rpc-address.mycluster.nn1</name>  
    <value>master1:9000</value> 
  </property>  
  <property> 
    <name>dfs.namenode.http-address.mycluster.nn1</name>  
    <value>master1:50070</value> 
  </property>  
  <property> 
    <name>dfs.namenode.rpc-address.mycluster.nn2</name>  
    <value>node1:9000</value> 
  </property>  
  <property> 
    <name>dfs.namenode.http-address.mycluster.nn2</name>  
    <value>node1:50070</value> 
  </property>  
  <!--启动故障自动恢复-->  
  <property> 
    <name>dfs.ha.automatic-failover.enabled</name>  
    <value>true</value> 
  </property>  
  <!--指定mycluster出故障时,哪个实现类负责执行故障切换-->  
  <property> 
    <name>dfs.client.failover.proxy.provider.mycluster</name>  
    <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value> 
  </property>  
  <!-- 指定NameNode元数据在JournalNode上的存放位置 -->  
  <property> 
    <name>dfs.namenode.shared.edits.dir</name>  
    <value>qjournal://master1:8485;node1:8485;node2:8485/mycluster</value> 
  </property>  
  <!-- 声明journalnode服务器存储目录-->  
  <property> 
    <name>dfs.journalnode.edits.dir</name>  
    <value>/home/testuser/hadoop-2.7.7/data/journalnode/jn</value> 
  </property>  
  <!-- 配置隔离机制,即同一时刻只能有一台服务器对外响应 -->  
  <property> 
    <name>dfs.ha.fencing.methods</name>  
    <value>shell(/bin/true)</value> 
  </property>  
  <!-- 使用隔离机制时需要ssh无秘钥登录-->  
  <property> 
    <name>dfs.ha.fencing.ssh.private-key-files</name>  
    <value>/home/testuser/.ssh/id_rsa</value> 
  </property>  
  <property> 
    <name>dfs.ha.fencing.ssh.connect-timeout</name>  
    <value>10000</value> 
  </property>  
  <property> 
    <name>dfs.namenode.handler.count</name>  
    <value>100</value> 
  </property> 
</configuration>
EOF
cat > /home/testuser/hadoop-2.7.7/etc/hadoop/mapred-site.xml << EOF
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
   
<!--MapReduce以Yarn方式运行-->  
<property> 
<name>mapreduce.framework.name</name>  
<value>yarn</value> 
</property> 

</configuration>
EOF
cat > /home/testuser/hadoop-2.7.7/etc/hadoop/yarn-site.xml << EOF
<?xml version="1.0" encoding="utf-8"?>

<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->
<configuration> 
  <!-- Site specific YARN configuration properties --> 
  <property> 
    <name>yarn.resourcemanager.connect.retry-interval.ms</name>  
    <value>2000</value> 
  </property>

  <!--是否开启RM ha,默认是开启的-->  
  <property> 
    <name>yarn.resourcemanager.ha.enabled</name>  
    <value>true</value> 
  </property>  
  
  <!--启动自动恢复-->  
  <property> 
    <name>yarn.resourcemanager.ha.automatic-failover.enable</name>  
    <value>true</value> 
  </property>  

  <!--rm启动内置选举active-->  
  <property> 
    <name>yarn.resourcemanager.ha.automatic-failover.embedded</name>  
    <value>true</value> 
  </property>  

  <!--声明两台resourcemanager的地址--> 
  <property> 
    <name>yarn.resourcemanager.cluster-id</name>  
    <value>rmcluster</value> 
  </property> 
  
  <property> 
    <name>yarn.resourcemanager.ha.rm-ids</name>  
    <value>rm1,rm2</value> 
  </property> 
  <property> 
    <name>yarn.resourcemanager.hostname.rm1</name>  
    <value>master1</value> 
  </property>  
  <property> 
    <name>yarn.resourcemanager.hostname.rm2</name>  
    <value>node1</value> 
  </property> 
  
  <!--启用自动恢复,当任务进行一半,rm坏掉,就要启动自动恢复,默认是false-->  
  <property> 
    <name>yarn.resourcemanager.recovery.enabled</name>  
    <value>true</value> 
  </property>

  <!--状态存储地址-->  
  <property> 
    <name>yarn.resourcemanager.zk.state-store.address</name>  
    <value>master1:2181,node1:2181,node2:2181</value> 
  </property> 
  
  <!--指定zookeeper集群的地址-->  
  <property> 
    <name>yarn.resourcemanager.zk-address</name>  
    <value>master1:2181,node1:2181,node2:2181</value> 
  </property> 
     
  <!--rm1端口号-->  
  <property> 
    <name>yarn.resourcemanager.address.rm1</name>  
    <value>master1:8032</value> 
  </property> 
  
  <!--rm1调度的端口号-->  
  <property> 
    <name>yarn.resourcemanager.scheduler.address.rm1</name>  
    <value>master1:8034</value> 
  </property>   
  
  <!--rm1 webapp端口号-->  
  <property> 
    <name>yarn.resourcemanager.webapp.address.rm1</name>  
    <value>master1:8088</value> 
  </property>   

  <!--rm2端口号-->  
  <property> 
    <name>yarn.resourcemanager.address.rm2</name>  
    <value>node1:8032</value> 
  </property> 
  
  <!--rm2调度的端口号-->  
  <property> 
    <name>yarn.resourcemanager.scheduler.address.rm2</name>  
    <value>node1:8034</value> 
  </property>   
  
  <!--rm2 webapp端口号-->  
  <property> 
    <name>yarn.resourcemanager.webapp.address.rm2</name>  
    <value>node1:8088</value> 
  </property> 
    
  <property> 
    <name>yarn.nodemanager.aux-services</name>  
    <value>mapreduce_shuffle</value> 
  </property>
  
  <!--执行mapreduce需要配置的shffle过程-->  
  <property> 
    <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>  
    <value>org.apache.hadoop.mapred.ShuffleHandler</value> 
  </property> 

</configuration>
EOF
  1. 把hadoop安装包分发给其他节点
[testuser@master1 ~]$ scp -r /home/testuser/hadoop-2.7.7/ node1:/home/testuser/
[testuser@master1 ~]$ scp -r /home/testuser/hadoop-2.7.7/ node2:/home/testuser/

2.7Hadoop搭建(node1,node2)

node1,node2复制好后master1上的安装目录后,需要也设置一下hadoop_home变量等

cat >> /home/testuser/.bashrc << EOF

#hadoop enviroment 
export HADOOP_HOME=/home/testuser/hadoop-2.7.7/
export PATH="\$HADOOP_HOME/bin:\$HADOOP_HOME/sbin:\$PATH"
export HADOOP_CONF_DIR=\$HADOOP_HOME/etc/hadoop
export YARN_CONF_DIR=\$HADOOP_HOME/etc/hadoop
EOF

source /home/testuser/.bashrc 

2.8分别在每个journalnode节点上启动journalnode进程


cd /home/testuser/hadoop-2.7.7

[testuser@master1 hadoop-2.7.7]$ hadoop-daemon.sh start journalnode
[testuser@node1 hadoop-2.7.7]$ hadoop-daemon.sh start journalnode
[testuser@node2 hadoop-2.7.7]$ hadoop-daemon.sh start journalnode

2.9在第一个nn1节点(master1)上

####nn1节点格式化一下namenode
hdfs namenode -format

####nn1节点格式化一下zkfc
hdfs zkfc -formatZK

####启动namenode,会以控制台的方式启动,会在2.10步骤中通过ctrl+c关闭
hdfs namenode

2.10在nn2结点上(node1)

####nn2同步nn1节点元数据信息
hdfs namenode -bootstrapStandby
同步完成后关闭nn1的进程ctrl+C

2.11关闭所有结点的journalNode

####关闭所有结点的journalNode
sbin/hadoop-daemon.sh stop journalnoder

2.12一键启动hdfs和yarn

#########################
```c
###一键启动hdfs(master1)
sbin/start-dfs.sh

###一键启动yarn(master1)
sbin/start-yarn.sh 

启动备用结点RM(node1)
sbin/yarn-daemon.sh start resourcemanager启动备用结点RM

####查看状态
bin/hdfs haadmin -getServiceState nn1
bin/hdfs haadmin -getServiceState nn2

查看RM状态
bin/yarn rmadmin -getServiceState rm1
bin/yarn rmadmin -getServiceState rm2

#########################
webl界面查看yarn
http://192.168.56.101:8088

#########################
故障转换测试

##hdfs
sbin/hadoop-demon.sh stop namenode
sbin/hadoop-demon.sh start namenode

##yarn
sbin/yarn-daemon.sh stop resourcemanager
sbin/yarn-daemon.sh start resourcemanager

进程情况:

[testuser@master1 ~]$ jps
3954 QuorumPeerMain
12712 ResourceManager
13385 Jps
12154 NameNode
12603 DFSZKFailoverController
12479 JournalNode

[testuser@node1 hadoop-2.7.7]$ jps
4321 DataNode
3890 QuorumPeerMain
5058 ResourceManager
4583 NameNode
4488 DFSZKFailoverController
5288 Jps
4905 NodeManager
4397 JournalNode

[testuser@node2 hadoop-2.7.7]$ jps
4161 DataNode
4883 Jps
4420 NodeManager
3831 QuorumPeerMain
4236 JournalNode

2.13测试集群

cat >> wordcount.txt << EOF
Hello hadoop
hello spark
hello bigdata
EOF


hadoop fs -mkdir -p /Hadoop/Input
hadoop fs -put wordcount.txt /Hadoop/Input
hadoop jar /home/testuser/hadoop-2.7.7/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.7.jar wordcount /Hadoop/Input /Hadoop/Output

hadoop fs -cat /Hadoop/Output/*

[testuser@master1 ~]$ hadoop fs -cat /Hadoop/Output/*
Hello   1
bigdata 1
hadoop  1
hello   2
spark   1

hadoop集群搭建测试成功!

二、Hive搭建

1.下载hive

http://archive.apache.org/dist/hive/
wget http://archive.apache.org/dist/hive/hive-2.3.6/apache-hive-2.3.6-bin.tar.gz

2.安装

###1.解压
tar zxvf apache-hive-2.3.6-bin.tar.gz -C /home/testuser
###2.添加环境变量 
cat >> /home/testuser/.bashrc << EOF

#hive enviroment 
export HIVE_HOME=/home/testuser/apache-hive-2.3.6-bin 
export PATH=\$PATH:\$HIVE_HOME/bin
EOF

source /home/testuser/.bashrc
######3.配置 Hive
cd apache-hive-2.3.6-bin/conf/
cp hive-env.sh.template hive-env.sh 
cp hive-default.xml.template hive-site.xml 
cp hive-log4j2.properties.template hive-log4j2.properties 
cp hive-exec-log4j2.properties.template hive-exec-log4j2.properties

cat >> hive-env.sh << EOF
export JAVA_HOME=/home/testuser/jdk1.8.0_301
export HADOOP_HOME=/home/testuser/hadoop-2.7.7
export HIVE_HOME=/home/testuser/apache-hive-2.3.6-bin
export HIVE_CONF_DIR=\$HIVE_HOME/conf
EOF

安装一个mysql提供给hive使用

略,这里我的mysql,账号为root密码123456,接下来的hive-site.xml中配置要用到mysql的信息

配置hive-site.xml,主要改以下配置

####4.配置hive-site.xml,主要改以下配置

  <property>
    <name>hive.exec.local.scratchdir</name>
    <value>/tmp/scratchdir</value>
    <description>Local scratch space for Hive jobs</description>
  </property>
  <property>
    <name>hive.downloaded.resources.dir</name>
    <value>/tmp/hive_resources</value>
    <description>Temporary local directory for added resources in the remote file system.</description>
  </property>
  <property>
    <name>javax.jdo.option.ConnectionPassword</name>
    <value>123456</value>
    <description>password to use against metastore database</description>
  </property>
  <property>
    <name>javax.jdo.option.ConnectionURL</name>
    <value>jdbc:mysql://192.168.56.1:3306/hive?createDatabaseIfNotExist=true&amp;useSSL=false&amp;characterEncoding=UTF-8</value>
    <description>
      JDBC connect string for a JDBC metastore.
      To use SSL to encrypt/authenticate the connection, provide database-specific SSL flag in the connection URL.
      For example, jdbc:postgresql://myhost/db?ssl=true for postgres database.
    </description>
  </property>
  <property>
    <name>javax.jdo.option.ConnectionUserName</name>
    <value>root</value>
    <description>Username to use against metastore database</description>
  </property>
  <property>
    <name>hive.querylog.location</name>
    <value>/tmp/${system:user.name}</value>
    <description>Location of Hive run time structured log file</description>
  </property>
  <property>
    <name>hive.server2.logging.operation.log.location</name>
    <value>/tmp/${system:user.name}/operation_logs</value>
    <description>Top level directory where operation logs are stored if logging functionality is enabled</description>
  </property>
  <property>
    <name>javax.jdo.option.ConnectionDriverName</name>
    <value>com.mysql.cj.jdbc.Driver</value>
    <description>Driver class name for a JDBC metastore</description>
  </property>

####5.其他准备
hdfs dfs -mkdir /tmp
hdfs dfs -mkdir -p /usr/hive/warehouse
hdfs dfs -chmod g+w /tmp
hdfs dfs -chmod g+w /usr/hive/warehouse

####需要初始化hive数据库
schematool -dbType mysql -initSchema

####启动hive
hive 注:需要将jdbc驱动放置在hive安装包的lib目录下

3.hive测试

####hive测试
create table t_source(id int,tel string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE;
cat >> /home/testuser/test_db.txt << EOF
1|hello
2|world
3|liujinghua
EOF
load data local inpath '/home/testuser/test_db.txt' into table t_source;

select * from t_source;
hive> select * from t_source;
OK
1       hello
2       world
3       liujinghua
Time taken: 2.089 seconds, Fetched: 3 row(s)

三、Hbase搭建

1.下载hbase

http://archive.apache.org/dist/hbase/
curl http://archive.apache.org/dist/hbase/2.3.7/hbase-2.3.7-bin.tar.gz -O hbase-2.3.7-bin.tar.gz

2.搭建zookeeper集群

参考hadoop安装高可用HA集群中的2.5节zookeeper集群搭建即可

3.Hbase搭建

###1.解压
tar -zxvf hbase-2.3.7-bin.tar.gz

###2.环境变量
cd /home/testuser/hbase-2.3.7
cat >> /home/testuser/.bashrc << EOF

#hbase enviroment 
export HBASE_HOME=/home/testuser/hbase-2.3.7
export PATH=\$PATH:\$HBASE_HOME/bin
EOF
source /home/testuser/.bashrc

##hbase-site.xml修改

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
/**
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-->
<configuration>
  <property>
    <name>hbase.zookeeper.quorum</name>
    <value>master1,node1,node2</value>
    <!-- 指定zookpeer集群节点 -->
  </property>
  <property>
    <name>hbase.zookeeper.property.dataDir</name>
    <value>/home/testuser/zookeeper-3.4.10/data</value>
    <!-- 指定zookpeer存储目录 -->
  </property>
  <property>
    <name>hbase.zookeeper.property.clientPort</name>
    <value>2181</value>
    <!-- 指定zookpeer端口号 -->
  </property>
  <property>
    <name>hbase.rootDir</name>
    <value>hdfs://master1:9000/hbase</value>
    <!-- 指定HBase在HDFS上的根目录 -->
  </property>
  <property>
    <name>hbase.cluster.distributed</name>
    <value>true</value>
    <!-- 指定true为分布式集群部署 -->
  </property>
  <property>
    <name>hbase.tmp.dir</name>
    <value>/home/testuser/hbase-2.3.7/tmp</value>
  </property>
  <property>
    <name>hbase.unsafe.stream.capability.enforce</name>
    <value>false</value>
  </property>
</configuration>

##hbase_env.sh修改以下配置

export JAVA_HOME=/home/testuser/jdk1.8.0_301
export HBASE_LOG_DIR=/home/testuser/hbase-2.3.7/logs
export HBASE_PID_DIR=/home/testuser/hbase-2.3.7/pids
export HBASE_MANAGES_ZK=false

##其他配置

####
cat > regionservers << EOF
master1
node1
node2
EOF

####备用节点
cat > backup-masters << EOF
node1
EOF

####因HBase启动依赖hdfs配置信息,需要将hdfs配置文件拷贝到主节点hbase的conf目录下
cp /home/testuser/hadoop-2.7.7/etc/hadoop/core-site.xml /home/testuser/hbase-2.3.7/conf/
cp /home/testuser/hadoop-2.7.7/etc/hadoop/hdfs-site.xml /home/testuser/hbase-2.3.7/conf/

####同步安装目录
scp -r hbase-2.3.7 node1:/home/testuser/
scp -r hbase-2.3.7 node2:/home/testuser/

####启动与停止
/home/testuser/hbase-2.3.7/bin/start-hbase.sh
/home/testuser/hbase-2.3.7/bin/stop-hbase.sh

####WEB ui访部
http://192.168.56.101:16010
http://192.168.56.102:16010

####客户端
hbase shell

####hbase基本操作

####hbase基本操作
https://blog.csdn.net/zhouleilei/article/details/7355848

##示例如:
create 't_test',{NAME => 'c1',VERSIONS => 1}
put 't_test','1','c1:username','youym'
put 't_test','1','c1:pwd','youym'
put 't_test','2','c1:username','liujh'
put 't_test','2','c1:pwd','1234youym'
scan 't_test'
disable 't_test'
drop 't_test'

4.Hive与Hbase集成

####编辑hive-site.xml配置文件, 添加hive.zookeeper.quorum, hbase.zookeeper.quorum属性
  <property>
    <name>hive.zookeeper.quorum</name>
    <value>master1,node1,node2</value>
  </property>
  <property>
    <name>hbase.zookeeper.quorum</name>
    <value>master1,node1,node2</value>
  </property>
####编辑hive-env.sh文件,添加HADOOP_HOME, HBASE_HOME属性
export HADOOP_HOME=/home/testuser/hadoop-2.7.7
export HBASE_HOME=/home/testuser/hbase-2.3.7
####命令启动hive,如不报错表示hive与hbase整合成功
hive
####hbase中创建表
create 't_user', 'info'
put 't_user','1','info:name','youym'
put 't_user','1','info:sex','man'
####在hive中创建一张hbase与hive的映射表, 建表语句如下
create external table t_user (
        id string,
        name string,
        sex string
)
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,info:name,info:sex")
TBLPROPERTIES("hbase.table.name" = "t_user")
####通过hive客户端查询该表的数据
select * from t_user;

hive> select * from t_user;
OK
1       youym   man
Time taken: 2.172 seconds, Fetched: 1 row(s)

####hbase中加一条数据
put 't_user','2','info:name','test'
put 't_user','2','info:sex','123456'

###hive再次查询
select * from t_user;

hive> select * from t_user;
OK
1       youym   man
2       test    123456
Time taken: 0.408 seconds, Fetched: 2 row(s)

至此,hive与hbase集成成功.

Logo

腾讯云面向开发者汇聚海量精品云计算使用和开发经验,营造开放的云计算技术生态圈。

更多推荐