hadoop2.7.7+hive2.3.6+hbase2.3.7搭建集群过程
hadoop2.10.1+hive+hbase搭建集群过程一、Haddop2.10.1集群搭建1.普通集群1.1搭建前准备1.2搭建NTP服务器1.3ssh互信(免密码登录)2.高可用Ha集群二、Hive搭建三、Hbase搭建一、Haddop2.10.1集群搭建1.普通集群1.1搭建前准备##防火墙停止systemctl stop firewalldsystemctl disable firewa
hadoop2.7.7+hive+hbase搭建集群过程
一、Haddop2.7.7集群搭建
1.普通集群
一般生成环境下不会让在root下搭建,这里我模拟在testuser下安装hadoop的过程(一些需要root操作的步骤还是需要使用root权限,如防火墙,/etc/hosts文件修改,搭建NTP服务器等)
1.1搭建前准备
以下操作为root权限操作
##防火墙停止
systemctl stop firewalld
systemctl disable firewalld
##设置主机名
hostnamectl set-hostname master1 ###(192.168.56.101)
hostnamectl set-hostname node1 ###(192.168.56.102)
hostnamectl set-hostname node2 ###(192.168.56.103)
#修改/etc/hosts文件(三台机)
cat >> /etc/hosts << EOF
192.168.56.101 master1
192.168.56.102 node1
192.168.56.103 node2
EOF
下载文件
wget https://archive.apache.org/dist/hadoop/common/hadoop-2.7.7/hadoop-2.7.7.tar.gz
1.2搭建NTP服务器
以下操作为root权限操作
集群最后能将三台机的时间同步先做好,避免因时间不同步导致一些问题出现,因此这里先把三台机的NTP同步环境先弄好。
主ntp机器(192.168.56.101):
##主ntp机器(192.168.56.101)
yum install ntp ntpdate -y
systemctl status ntpd
#注释配置
sed -i 's/^server/#&/' /etc/ntp.conf
cat >> /etc/ntp.conf << EOF
server 127.127.1.0 iburst
EOF
systemctl start ntpd
systemctl status ntpd
ntpq -p
systemctl enable ntpd
firewall-cmd --permanent --add-port=123/udp
firewall-cmd --reload
从ntp机器(192.168.56.102,192.168.56.103):
##客户端机器:
yum install ntp ntpdate -y
systemctl status ntpd
#注释配置
sed -i 's/^server/#&/' /etc/ntp.conf
cat >> /etc/ntp.conf << EOF
server 192.168.56.101
restrict 192.168.56.101 nomodify notrap noquery
EOF
ntpdate -u 192.168.56.101
systemctl start ntpd
systemctl enable ntpd
ntpq -p
1.3ssh互信(免密码登录)
使用testuser进行安装,建立testuser账号的ssh互信,以下操作为在testuser下操作
1.##在节点(192.168.56.101)执行下面的命令:
ssh-keygen -t rsa -P '' #一路回车直到生成公钥
#从master1节点拷贝id_rsa.pub到node1主机上,并且改名为id_rsa.pub.master1,如果node1上.ssh目录不存在,就手工建一个
scp /home/testuser/.ssh/id_rsa.pub testuser@node1:/home/testuser/.ssh/id_rsa.pub.master1
#从master1节点拷贝id_rsa.pub到node2主机上,并且改名为id_rsa.pub.master1,如果node2上.ssh目录不存在,就手工建一个
scp /home/testuser/.ssh/id_rsa.pub testuser@node2:/home/testuser/.ssh/id_rsa.pub.master1
2.###在对应的主机下执行如下命令:
cat /home/testuser/.ssh/id_rsa.pub >> /home/testuser/.ssh/authorized_keys #master1主机
cat /home/testuser/.ssh/id_rsa.pub.master1 >> /home/testuser/.ssh/authorized_keys #node1,node2
3.###注意authorized_keys的权限问题
cd .ssh
chmod 600 authorized_keys
4.##测试
在master1上执行测试
ssh node1
ssh node2
1.4jdk安装
使用testuser进行安装
tar -xvf jdk-8u301-linux-x64.tar.gz -C /home/testuser
cat >> /home/testuser/.bashrc << EOF
export JAVA_HOME=/home/testuser/jdk1.8.0_301/
export PATH=\$JAVA_HOME/bin:\$PATH
export CLASSPATH=.:\$JAVA_HOME/lib/dt.jar:\$JAVA_HOME/lib/tools.jar
EOF
source /home/testuser/.bashrc
1.5Hadoop搭建(master1)
- 解压hadoop安装文件
tar -xvf hadoop-2.7.7.tar.gz -C /home/testuser
- 修改相应的配置文件
cat >> /home/testuser/.bashrc << EOF
#hadoop enviroment
export HADOOP_HOME=/home/testuser/hadoop-2.7.7/
export PATH="\$HADOOP_HOME/bin:\$HADOOP_HOME/sbin:\$PATH"
export HADOOP_CONF_DIR=\$HADOOP_HOME/etc/hadoop
export YARN_CONF_DIR=\$HADOOP_HOME/etc/hadoop
EOF
source /home/testuser/.bashrc
- 修改相应的配置文件
sed -i 's/^export JAVA_HOME=${JAVA_HOME}/export JAVA_HOME=\/usr\/local\/jdk1.8.0_301\//' /home/testuser/hadoop-2.7.7/etc/hadoop/hadoop-env.sh
cat > /home/testuser/hadoop-2.7.7/etc/hadoop/slaves << EOF
node1
node2
EOF
cat > /home/testuser/hadoop-2.7.7/etc/hadoop/core-site.xml << EOF
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://master1:9000</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/home/testuser/hadoop-2.7.7/tmp</value>
</property>
</configuration>
EOF
cat > /home/testuser/hadoop-2.7.7/etc/hadoop/hdfs-site.xml << EOF
<configuration>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>master1:50090</value>
</property>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/home/testuser/hadoop-2.7.7/hdfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/home/testuser/hadoop-2.7.7/hdfs/data</value>
</property>
</configuration>
EOF
cat > /home/testuser/hadoop-2.7.7/etc/hadoop/mapred-site.xml << EOF
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>master1:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>master1:19888</value>
</property>
</configuration>
EOF
cat > /home/testuser/hadoop-2.7.7/etc/hadoop/yarn-site.xml << EOF
<?xml version="1.0"?>
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>master1:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>master1:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>master1:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>master1:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>master1:8088</value>
</property>
</configuration>
EOF
- ####格式化一下namenode
####格式化一下namenode
hadoop namenode -format
5.###将文件传送至node1,nod2节点
scp -r /home/testuser/hadoop-2.7.7 testuser@node1:/home/testuser
scp -r /home/testuser/hadoop-2.7.7 testuser@node2:/home/testuser
1.6Hadoop搭建(node1,node2)
修改环境变量
cat >> /home/testuser/.bashrc << EOF
#hadoop enviroment
export HADOOP_HOME=/home/testuser/hadoop-2.7.7/
export PATH="\$HADOOP_HOME/bin:\$HADOOP_HOME/sbin:\$PATH"
export HADOOP_CONF_DIR=\$HADOOP_HOME/etc/hadoop
export YARN_CONF_DIR=\$HADOOP_HOME/etc/hadoop
EOF
source /home/testuser/.bashrc
1.7Hadoop集群启动
####5.启动集群的脚本
/home/testuser/hadoop-2.7.7/sbin/start-all.sh
或
/home/testuser/hadoop-2.7.7/sbin/start-dfs.sh
/home/testuser/hadoop-2.7.7/sbin/start-yarn.sh
进程情况如下:
[testuser@master1 ~]$ jps
4257 SecondaryNameNode
4657 Jps
4406 ResourceManager
4105 NameNode
[testuser@node1 ~]$ jps
4176 NodeManager
4240 Jps
4085 DataNode
[testuser@node2 ~]$ jps
4113 Jps
4024 NodeManager
3932 DataNode
####测试(master1结点上)
cat >> wordcount.txt << EOF
Hello hadoop
hello spark
hello bigdata
EOF
hadoop fs -mkdir -p /Hadoop/Input
hadoop fs -put wordcount.txt /Hadoop/Input
hadoop jar /home/testuser/hadoop-2.7.7/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.7.jar wordcount /Hadoop/Input /Hadoop/Output
hadoop fs -cat /Hadoop/Output/*
####测试结果
[testuser@master1 ~]$ hadoop fs -cat /Hadoop/Output/*
Hello 1
bigdata 1
hadoop 1
hello 2
spark 1
hadoop集群搭建成功!
2.高可用HA集群
角色规划
master1:192.168.56.101
node1:192.168.56.102
node2:192.168.56.103

一般生成环境下不会让在root下搭建,这里我模拟在testuser下安装hadoop的过程(一些需要root操作的步骤还是需要使用root权限,如防火墙,/etc/hosts文件修改,搭建NTP服务器等)
2.1搭建前准备
以下操作为root权限操作
##防火墙停止
systemctl stop firewalld
systemctl disable firewalld
##设置主机名
hostnamectl set-hostname master1 ###(192.168.56.101)
hostnamectl set-hostname node1 ###(192.168.56.102)
hostnamectl set-hostname node2 ###(192.168.56.103)
#修改/etc/hosts文件(三台机)
cat >> /etc/hosts << EOF
192.168.56.101 master1
192.168.56.102 node1
192.168.56.103 node2
EOF
下载文件
wget https://dlcdn.apache.org/hadoop/common/hadoop-2.7.7/hadoop-2.7.7.tar.gz
2.2搭建NTP服务器
以下操作为root权限操作
集群最后能将三台机的时间同步先做好,避免因时间不同步导致一些问题出现,因此这里先把三台机的NTP同步环境先弄好。
主ntp机器(192.168.56.101):
##主ntp机器(192.168.56.101)
yum install ntp ntpdate -y
systemctl status ntpd
#注释配置
sed -i 's/^server/#&/' /etc/ntp.conf
cat >> /etc/ntp.conf << EOF
server 127.127.1.0 iburst
EOF
systemctl start ntpd
systemctl status ntpd
ntpq -p
systemctl enable ntpd
firewall-cmd --permanent --add-port=123/udp
firewall-cmd --reload
从ntp机器(192.168.56.102,192.168.56.103):
##客户端机器:
yum install ntp ntpdate -y
systemctl status ntpd
#注释配置
sed -i 's/^server/#&/' /etc/ntp.conf
cat >> /etc/ntp.conf << EOF
server 192.168.56.101
restrict 192.168.56.101 nomodify notrap noquery
EOF
ntpdate -u 192.168.56.101
systemctl start ntpd
systemctl enable ntpd
ntpq -p
2.3ssh互信(免密码登录)
使用testuser进行安装,建立testuser账号的ssh互信,以下操作为在testuser下操作
1.##在节点(192.168.56.101)执行下面的命令:
ssh-keygen -t rsa -P '' #一路回车直到生成公钥
#从master1节点拷贝id_rsa.pub到node1主机上,并且改名为id_rsa.pub.master1,如果node1上.ssh目录不存在,就手工建一个
scp /home/testuser/.ssh/id_rsa.pub testuser@node1:/home/testuser/.ssh/id_rsa.pub.master1
#从master1节点拷贝id_rsa.pub到node2主机上,并且改名为id_rsa.pub.master1,如果node2上.ssh目录不存在,就手工建一个
scp /home/testuser/.ssh/id_rsa.pub testuser@node2:/home/testuser/.ssh/id_rsa.pub.master1
2.###在对应的主机下执行如下命令:
cat /home/testuser/.ssh/id_rsa.pub >> /home/testuser/.ssh/authorized_keys #master1主机
cat /home/testuser/.ssh/id_rsa.pub.master1 >> /home/testuser/.ssh/authorized_keys #node1,node2
3.###注意authorized_keys的权限问题
cd .ssh
chmod 600 authorized_keys
4.##测试
在master1上执行测试
ssh node1
ssh node2
2.4jdk安装
tar -xvf jdk-8u301-linux-x64.tar.gz -C /home/testuser
cat >> /home/testuser/.bashrc << EOF
export JAVA_HOME=/home/testuser/jdk1.8.0_301/
export PATH=\$JAVA_HOME/bin:\$PATH
export CLASSPATH=.:\$JAVA_HOME/lib/dt.jar:\$JAVA_HOME/lib/tools.jar
EOF
source /home/testuser/.bashrc
2.5zookeepr集群搭建
wget
tar xvf zookeeper-3.4.10.tar.gz -C /home/testuser/
mkdir /home/testuser/zookeeper-3.4.10/data
mkdir /home/testuser/zookeeper-3.4.10/logs
cat >> /home/testuser/zookeeper-3.4.10/conf/zoo.cfg << EOF
tickTime=2000
dataDir=/home/testuser/zookeeper-3.4.10/data
dataLogDir=/home/testuser/zookeeper-3.4.10/logs
clientPort=2181
initLimit=5
syncLimit=2
server.1=192.168.56.101:2888:3888
server.2=192.168.56.102:2888:3888
server.3=192.168.56.103:2888:3888
EOF
echo 1 > /home/testuser/zookeeper-3.4.10/data/myid ##master1
echo 2 > /home/testuser/zookeeper-3.4.10/data/myid ##node1
echo 3 > /home/testuser/zookeeper-3.4.10/data/myid ##node2
启动---->>> /home/testuser/zookeeper-3.4.10/bin/zkServer.sh start
查看状态>>> /home/testuser/zookeeper-3.4.10/bin/zkServer.sh status
2.6Hadoop搭建(master1)
- 解压hadoop安装文件
tar -xvf hadoop-2.7.7.tar.gz -C /home/testuser
- 修改相应的配置文件
cat >> /home/testuser/.bashrc << EOF
#hadoop enviroment
export HADOOP_HOME=/home/testuser/hadoop-2.7.7/
export PATH="\$HADOOP_HOME/bin:\$HADOOP_HOME/sbin:\$PATH"
export HADOOP_CONF_DIR=\$HADOOP_HOME/etc/hadoop
export YARN_CONF_DIR=\$HADOOP_HOME/etc/hadoop
EOF
source /home/testuser/.bashrc
- 修改相应的配置文件
sed -i 's/^export JAVA_HOME=${JAVA_HOME}/export JAVA_HOME=\/home\/testuser\/jdk1.8.0_301\//' /home/testuser/hadoop-2.7.7/etc/hadoop/hadoop-env.sh
sed -i 's/^# export JAVA_HOME=.*/export JAVA_HOME=\/home\/testuser\/jdk1.8.0_301\//' /home/testuser/hadoop-2.7.7/etc/hadoop/mapred-env.sh
sed -i 's/^# export JAVA_HOME=.*/export JAVA_HOME=\/home\/testuser\/jdk1.8.0_301\//' /home/testuser/hadoop-2.7.7/etc/hadoop/yarn-env.sh
cat > /home/testuser/hadoop-2.7.7/etc/hadoop/slaves << EOF
node1
node2
EOF
cat > /home/testuser/hadoop-2.7.7/etc/hadoop/core-site.xml << EOF
<?xml version="1.0" encoding="utf-8"?>
<configuration>
<!--默认的HDFS路径-->
<property>
<name>fs.defaultFS</name>
<value>hdfs://mycluster</value>
</property>>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>>
<!--默认的临时目录-->
<property>
<name>hadoop.tmp.dir</name>
<value>/home/testuser/hadoop-2.7.7/data/tmp</value>
</property>>
<property>
<name>ha.zookeeper.quorum</name>
<value>master1:2181,node1:2181,node2:2181</value>
</property>>
</configuration>
EOF
cat > /home/testuser/hadoop-2.7.7/etc/hadoop/hdfs-site.xml << EOF
<?xml version="1.0" encoding="utf-8"?>
<configuration>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
<property>
<name>dfs.nameservices</name>
<value>mycluster</value>
</property>
<!--指定nameservices是mycluster时的namenode有哪些,这里的值也是逻辑名称,名字随便取,相互不重复即可-->
<property>
<name>dfs.ha.namenodes.mycluster</name>
<value>nn1,nn2</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn1</name>
<value>master1:9000</value>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.nn1</name>
<value>master1:50070</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn2</name>
<value>node1:9000</value>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.nn2</name>
<value>node1:50070</value>
</property>
<!--启动故障自动恢复-->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!--指定mycluster出故障时,哪个实现类负责执行故障切换-->
<property>
<name>dfs.client.failover.proxy.provider.mycluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!-- 指定NameNode元数据在JournalNode上的存放位置 -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://master1:8485;node1:8485;node2:8485/mycluster</value>
</property>
<!-- 声明journalnode服务器存储目录-->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/home/testuser/hadoop-2.7.7/data/journalnode/jn</value>
</property>
<!-- 配置隔离机制,即同一时刻只能有一台服务器对外响应 -->
<property>
<name>dfs.ha.fencing.methods</name>
<value>shell(/bin/true)</value>
</property>
<!-- 使用隔离机制时需要ssh无秘钥登录-->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/testuser/.ssh/id_rsa</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>10000</value>
</property>
<property>
<name>dfs.namenode.handler.count</name>
<value>100</value>
</property>
</configuration>
EOF
cat > /home/testuser/hadoop-2.7.7/etc/hadoop/mapred-site.xml << EOF
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<!--MapReduce以Yarn方式运行-->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
EOF
cat > /home/testuser/hadoop-2.7.7/etc/hadoop/yarn-site.xml << EOF
<?xml version="1.0" encoding="utf-8"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.resourcemanager.connect.retry-interval.ms</name>
<value>2000</value>
</property>
<!--是否开启RM ha,默认是开启的-->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<!--启动自动恢复-->
<property>
<name>yarn.resourcemanager.ha.automatic-failover.enable</name>
<value>true</value>
</property>
<!--rm启动内置选举active-->
<property>
<name>yarn.resourcemanager.ha.automatic-failover.embedded</name>
<value>true</value>
</property>
<!--声明两台resourcemanager的地址-->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>rmcluster</value>
</property>
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>master1</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>node1</value>
</property>
<!--启用自动恢复,当任务进行一半,rm坏掉,就要启动自动恢复,默认是false-->
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<!--状态存储地址-->
<property>
<name>yarn.resourcemanager.zk.state-store.address</name>
<value>master1:2181,node1:2181,node2:2181</value>
</property>
<!--指定zookeeper集群的地址-->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>master1:2181,node1:2181,node2:2181</value>
</property>
<!--rm1端口号-->
<property>
<name>yarn.resourcemanager.address.rm1</name>
<value>master1:8032</value>
</property>
<!--rm1调度的端口号-->
<property>
<name>yarn.resourcemanager.scheduler.address.rm1</name>
<value>master1:8034</value>
</property>
<!--rm1 webapp端口号-->
<property>
<name>yarn.resourcemanager.webapp.address.rm1</name>
<value>master1:8088</value>
</property>
<!--rm2端口号-->
<property>
<name>yarn.resourcemanager.address.rm2</name>
<value>node1:8032</value>
</property>
<!--rm2调度的端口号-->
<property>
<name>yarn.resourcemanager.scheduler.address.rm2</name>
<value>node1:8034</value>
</property>
<!--rm2 webapp端口号-->
<property>
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>node1:8088</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!--执行mapreduce需要配置的shffle过程-->
<property>
<name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
</configuration>
EOF
- 把hadoop安装包分发给其他节点
[testuser@master1 ~]$ scp -r /home/testuser/hadoop-2.7.7/ node1:/home/testuser/
[testuser@master1 ~]$ scp -r /home/testuser/hadoop-2.7.7/ node2:/home/testuser/
2.7Hadoop搭建(node1,node2)
node1,node2复制好后master1上的安装目录后,需要也设置一下hadoop_home变量等
cat >> /home/testuser/.bashrc << EOF
#hadoop enviroment
export HADOOP_HOME=/home/testuser/hadoop-2.7.7/
export PATH="\$HADOOP_HOME/bin:\$HADOOP_HOME/sbin:\$PATH"
export HADOOP_CONF_DIR=\$HADOOP_HOME/etc/hadoop
export YARN_CONF_DIR=\$HADOOP_HOME/etc/hadoop
EOF
source /home/testuser/.bashrc
2.8分别在每个journalnode节点上启动journalnode进程
cd /home/testuser/hadoop-2.7.7
[testuser@master1 hadoop-2.7.7]$ hadoop-daemon.sh start journalnode
[testuser@node1 hadoop-2.7.7]$ hadoop-daemon.sh start journalnode
[testuser@node2 hadoop-2.7.7]$ hadoop-daemon.sh start journalnode
2.9在第一个nn1节点(master1)上
####nn1节点格式化一下namenode
hdfs namenode -format
####nn1节点格式化一下zkfc
hdfs zkfc -formatZK
####启动namenode,会以控制台的方式启动,会在2.10步骤中通过ctrl+c关闭
hdfs namenode
2.10在nn2结点上(node1)
####nn2同步nn1节点元数据信息
hdfs namenode -bootstrapStandby
同步完成后关闭nn1的进程ctrl+C
2.11关闭所有结点的journalNode
####关闭所有结点的journalNode
sbin/hadoop-daemon.sh stop journalnoder
2.12一键启动hdfs和yarn
#########################
```c
###一键启动hdfs(master1)
sbin/start-dfs.sh
###一键启动yarn(master1)
sbin/start-yarn.sh
启动备用结点RM(node1)
sbin/yarn-daemon.sh start resourcemanager启动备用结点RM
####查看状态
bin/hdfs haadmin -getServiceState nn1
bin/hdfs haadmin -getServiceState nn2
查看RM状态
bin/yarn rmadmin -getServiceState rm1
bin/yarn rmadmin -getServiceState rm2
#########################
webl界面查看yarn
http://192.168.56.101:8088
#########################
故障转换测试
##hdfs
sbin/hadoop-demon.sh stop namenode
sbin/hadoop-demon.sh start namenode
##yarn
sbin/yarn-daemon.sh stop resourcemanager
sbin/yarn-daemon.sh start resourcemanager
进程情况:
[testuser@master1 ~]$ jps
3954 QuorumPeerMain
12712 ResourceManager
13385 Jps
12154 NameNode
12603 DFSZKFailoverController
12479 JournalNode
[testuser@node1 hadoop-2.7.7]$ jps
4321 DataNode
3890 QuorumPeerMain
5058 ResourceManager
4583 NameNode
4488 DFSZKFailoverController
5288 Jps
4905 NodeManager
4397 JournalNode
[testuser@node2 hadoop-2.7.7]$ jps
4161 DataNode
4883 Jps
4420 NodeManager
3831 QuorumPeerMain
4236 JournalNode
2.13测试集群
cat >> wordcount.txt << EOF
Hello hadoop
hello spark
hello bigdata
EOF
hadoop fs -mkdir -p /Hadoop/Input
hadoop fs -put wordcount.txt /Hadoop/Input
hadoop jar /home/testuser/hadoop-2.7.7/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.7.jar wordcount /Hadoop/Input /Hadoop/Output
hadoop fs -cat /Hadoop/Output/*
[testuser@master1 ~]$ hadoop fs -cat /Hadoop/Output/*
Hello 1
bigdata 1
hadoop 1
hello 2
spark 1
hadoop集群搭建测试成功!
二、Hive搭建
1.下载hive
http://archive.apache.org/dist/hive/
wget http://archive.apache.org/dist/hive/hive-2.3.6/apache-hive-2.3.6-bin.tar.gz
2.安装
###1.解压
tar zxvf apache-hive-2.3.6-bin.tar.gz -C /home/testuser
###2.添加环境变量
cat >> /home/testuser/.bashrc << EOF
#hive enviroment
export HIVE_HOME=/home/testuser/apache-hive-2.3.6-bin
export PATH=\$PATH:\$HIVE_HOME/bin
EOF
source /home/testuser/.bashrc
######3.配置 Hive
cd apache-hive-2.3.6-bin/conf/
cp hive-env.sh.template hive-env.sh
cp hive-default.xml.template hive-site.xml
cp hive-log4j2.properties.template hive-log4j2.properties
cp hive-exec-log4j2.properties.template hive-exec-log4j2.properties
cat >> hive-env.sh << EOF
export JAVA_HOME=/home/testuser/jdk1.8.0_301
export HADOOP_HOME=/home/testuser/hadoop-2.7.7
export HIVE_HOME=/home/testuser/apache-hive-2.3.6-bin
export HIVE_CONF_DIR=\$HIVE_HOME/conf
EOF
安装一个mysql提供给hive使用
略,这里我的mysql,账号为root密码123456,接下来的hive-site.xml中配置要用到mysql的信息
配置hive-site.xml,主要改以下配置
####4.配置hive-site.xml,主要改以下配置
<property>
<name>hive.exec.local.scratchdir</name>
<value>/tmp/scratchdir</value>
<description>Local scratch space for Hive jobs</description>
</property>
<property>
<name>hive.downloaded.resources.dir</name>
<value>/tmp/hive_resources</value>
<description>Temporary local directory for added resources in the remote file system.</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>123456</value>
<description>password to use against metastore database</description>
</property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://192.168.56.1:3306/hive?createDatabaseIfNotExist=true&useSSL=false&characterEncoding=UTF-8</value>
<description>
JDBC connect string for a JDBC metastore.
To use SSL to encrypt/authenticate the connection, provide database-specific SSL flag in the connection URL.
For example, jdbc:postgresql://myhost/db?ssl=true for postgres database.
</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
<description>Username to use against metastore database</description>
</property>
<property>
<name>hive.querylog.location</name>
<value>/tmp/${system:user.name}</value>
<description>Location of Hive run time structured log file</description>
</property>
<property>
<name>hive.server2.logging.operation.log.location</name>
<value>/tmp/${system:user.name}/operation_logs</value>
<description>Top level directory where operation logs are stored if logging functionality is enabled</description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.cj.jdbc.Driver</value>
<description>Driver class name for a JDBC metastore</description>
</property>
####5.其他准备
hdfs dfs -mkdir /tmp
hdfs dfs -mkdir -p /usr/hive/warehouse
hdfs dfs -chmod g+w /tmp
hdfs dfs -chmod g+w /usr/hive/warehouse
####需要初始化hive数据库
schematool -dbType mysql -initSchema
####启动hive
hive 注:需要将jdbc驱动放置在hive安装包的lib目录下
3.hive测试
####hive测试
create table t_source(id int,tel string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE;
cat >> /home/testuser/test_db.txt << EOF
1|hello
2|world
3|liujinghua
EOF
load data local inpath '/home/testuser/test_db.txt' into table t_source;
select * from t_source;
hive> select * from t_source;
OK
1 hello
2 world
3 liujinghua
Time taken: 2.089 seconds, Fetched: 3 row(s)
三、Hbase搭建
1.下载hbase
http://archive.apache.org/dist/hbase/
curl http://archive.apache.org/dist/hbase/2.3.7/hbase-2.3.7-bin.tar.gz -O hbase-2.3.7-bin.tar.gz
2.搭建zookeeper集群
参考hadoop安装高可用HA集群中的2.5节zookeeper集群搭建即可
3.Hbase搭建
###1.解压
tar -zxvf hbase-2.3.7-bin.tar.gz
###2.环境变量
cd /home/testuser/hbase-2.3.7
cat >> /home/testuser/.bashrc << EOF
#hbase enviroment
export HBASE_HOME=/home/testuser/hbase-2.3.7
export PATH=\$PATH:\$HBASE_HOME/bin
EOF
source /home/testuser/.bashrc
##hbase-site.xml修改
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-->
<configuration>
<property>
<name>hbase.zookeeper.quorum</name>
<value>master1,node1,node2</value>
<!-- 指定zookpeer集群节点 -->
</property>
<property>
<name>hbase.zookeeper.property.dataDir</name>
<value>/home/testuser/zookeeper-3.4.10/data</value>
<!-- 指定zookpeer存储目录 -->
</property>
<property>
<name>hbase.zookeeper.property.clientPort</name>
<value>2181</value>
<!-- 指定zookpeer端口号 -->
</property>
<property>
<name>hbase.rootDir</name>
<value>hdfs://master1:9000/hbase</value>
<!-- 指定HBase在HDFS上的根目录 -->
</property>
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
<!-- 指定true为分布式集群部署 -->
</property>
<property>
<name>hbase.tmp.dir</name>
<value>/home/testuser/hbase-2.3.7/tmp</value>
</property>
<property>
<name>hbase.unsafe.stream.capability.enforce</name>
<value>false</value>
</property>
</configuration>
##hbase_env.sh修改以下配置
export JAVA_HOME=/home/testuser/jdk1.8.0_301
export HBASE_LOG_DIR=/home/testuser/hbase-2.3.7/logs
export HBASE_PID_DIR=/home/testuser/hbase-2.3.7/pids
export HBASE_MANAGES_ZK=false
##其他配置
####
cat > regionservers << EOF
master1
node1
node2
EOF
####备用节点
cat > backup-masters << EOF
node1
EOF
####因HBase启动依赖hdfs配置信息,需要将hdfs配置文件拷贝到主节点hbase的conf目录下
cp /home/testuser/hadoop-2.7.7/etc/hadoop/core-site.xml /home/testuser/hbase-2.3.7/conf/
cp /home/testuser/hadoop-2.7.7/etc/hadoop/hdfs-site.xml /home/testuser/hbase-2.3.7/conf/
####同步安装目录
scp -r hbase-2.3.7 node1:/home/testuser/
scp -r hbase-2.3.7 node2:/home/testuser/
####启动与停止
/home/testuser/hbase-2.3.7/bin/start-hbase.sh
/home/testuser/hbase-2.3.7/bin/stop-hbase.sh
####WEB ui访部
http://192.168.56.101:16010
http://192.168.56.102:16010
####客户端
hbase shell
####hbase基本操作
####hbase基本操作
https://blog.csdn.net/zhouleilei/article/details/7355848
##示例如:
create 't_test',{NAME => 'c1',VERSIONS => 1}
put 't_test','1','c1:username','youym'
put 't_test','1','c1:pwd','youym'
put 't_test','2','c1:username','liujh'
put 't_test','2','c1:pwd','1234youym'
scan 't_test'
disable 't_test'
drop 't_test'
4.Hive与Hbase集成
####编辑hive-site.xml配置文件, 添加hive.zookeeper.quorum, hbase.zookeeper.quorum属性
<property>
<name>hive.zookeeper.quorum</name>
<value>master1,node1,node2</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>master1,node1,node2</value>
</property>
####编辑hive-env.sh文件,添加HADOOP_HOME, HBASE_HOME属性
export HADOOP_HOME=/home/testuser/hadoop-2.7.7
export HBASE_HOME=/home/testuser/hbase-2.3.7
####命令启动hive,如不报错表示hive与hbase整合成功
hive
####hbase中创建表
create 't_user', 'info'
put 't_user','1','info:name','youym'
put 't_user','1','info:sex','man'
####在hive中创建一张hbase与hive的映射表, 建表语句如下
create external table t_user (
id string,
name string,
sex string
)
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,info:name,info:sex")
TBLPROPERTIES("hbase.table.name" = "t_user")
####通过hive客户端查询该表的数据
select * from t_user;
hive> select * from t_user;
OK
1 youym man
Time taken: 2.172 seconds, Fetched: 1 row(s)
####hbase中加一条数据
put 't_user','2','info:name','test'
put 't_user','2','info:sex','123456'
###hive再次查询
select * from t_user;
hive> select * from t_user;
OK
1 youym man
2 test 123456
Time taken: 0.408 seconds, Fetched: 2 row(s)
至此,hive与hbase集成成功.
更多推荐
所有评论(0)