Hive拓展
Hive安装
- 解压
tar -zxvf /resourcee/apache-hive-3.1.2-bin.tar -C /opt/module/
- 修改
mv /opt/module/apache-hive-3.1.2-bin /opt/module/hive
- 环境变量
sudo vim /etc/profile.d/my_env.sh
# 追加内容:
#HIVE_HOME
export HIVE_HOME=/opt/module/hive
export PATH=$PATH:$HIVE_HOME/bin
# 分发后,刷新环境变量
source /etc/profile.d/my_env.sh
- 兼容性
为避免初始化过程guava
版本不同产生报错,将hadoop的guava复制到hive的lib中
mv /opt/modele/hive/lib/guava-19.0.jar /opt/modele/hive/lib/guava-19.0.jar.bak
cp /opt/module/hadoop/share/hadoop/common/lib/guava-27.0-jre.jar /opt/modele/hive/lib/
MySQL安装
- 解压
cd /resource && mkdir mysql_lib
tar -xf mysql-5.7.35-1.el7.x86_64.rpm-bundle.tar -C mysql_lib/
- 卸载mariadb
sudo rpm -qa | grep mariadb | xargs sudo rpm -e --nodeps
- 安装MySQL依赖
cd mysql_lib
sudo rpm -ivh mysql-community-common-5.7.35-1.el7.x86_64.rpm
sudo rpm -ivh mysql-community-libs-5.7.35-1.el7.x86_64.rpm
sudo rpm -ivh mysql-community-libs-compat-5.7.35-1.el7.x86_64.rpm
sudo rpm -ivh mysql-community-client-5.7.35-1.el7.x86_64.rpm
sudo rpm -ivh mysql-community-server-5.7.35-1.el7.x86_64.rpm
- 启动
sudo systemctl start mysqld
- 查看临时密码
# 记录并保存
sudo cat /var/log/mysqld.log | grep password
- 进入
mysql -uroot -p'[刚刚查询到的密码]'
- 设置复杂密码
set password=password("Qs23=zs32");
- 更改密码策略
set global validate_password_policy=0;
set global validate_password_length=4;
- 设置简单密码
set password=password("123456");
- 测试
use mysql
select user, host from user;
update user set host="%" where user="root";
flush privileges;
- 配置hive元数据
create database metastore;
配置
hive基础配置
- 引入JDBC
cp /resource/mysql-connector-java-5.1.37.jar $HIVE_HOME/lib
- 编辑配置文件
vim $HIVE_HOME/conf/hive-site.xml
编辑内容
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- jdbc连接的URL -->
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://hive001:3306/metastore?useSSL=false</value>
</property>
<!-- jdbc连接的Driver-->
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<!-- jdbc连接的username-->
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
</property>
<!-- jdbc连接的password -->
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>123456</value>
</property>
<!-- Hive默认在HDFS的工作目录 -->
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/user/hive/warehouse</value>
</property>
<!--显示当前库和表头-->
<property>
<name>hive.cli.print.header</name>
<value>true</value>
<description>Whether to print the names of the columns in query output.</description>
</property>
<property>
<name>hive.cli.print.current.db</name>
<value>true</value>
<description>Whether to include the current database in the Hive prompt.</description>
</property>
</configuration>
配置日志路径
mv hive-log4j2.properties.template hive-log4j2.properties
# 修改参数为
property.hive.log.dir=/opt/module/hive/logs
修改hive-env.sh
mv hive-env.sh.template hive-env.sh
# 修改参数为
export HADOOP_HEAPSIZE=2048
- 初始化
cd /opt/module/hive
bin/schematool -dbType mysql -initSchema -verbose
- 验证
bin/hive
启动无报错则初始化成功
mysql -uroot -p"123456"
show databases;
use metastore;
有相关信息则初始化成功
Hiveserver2配置
- 修改hadoop配置
vim $HADOOP_HOME/etc/hadoop/core-site.xml
追加配置
<!--配置所有节点的hadoop用户都可作为代理用户-->
<property>
<name>hadoop.proxyuser.hadoop.hosts</name>
<value>*</value>
</property>
<!--配置hadoop用户能够代理的用户组为任意组-->
<property>
<name>hadoop.proxyuser.hadoop.groups</name>
<value>*</value>
</property>
<!--配置hadoop用户能够代理的用户为任意用户-->
<property>
<name>hadoop.proxyuser.hadoop.users</name>
<value>*</value>
</property>
- Hive配置
vim hive-site.xml
追加配置
<!-- 指定hiveserver2连接的host -->
<property>
<name>hive.server2.thrift.bind.host</name>
<value>hive001</value>
</property>
<!-- 指定hiveserver2连接的端口号 -->
<property>
<name>hive.server2.thrift.port</name>
<value>10000</value>
</property>
- 测试
bin/hive --service hiveserver2
bin/beeline -u jdbc:hive2://hive001:10000 -n hadoop
如下,则配置完成
Connecting to jdbc:hive2://hadoop102:10000
Connected to: Apache Hive (version 3.1.3)
Driver: Hive JDBC (version 3.1.3)
Transaction isolation: TRANSACTION_REPEATABLE_READ
Beeline version 3.1.3 by Apache Hive
0: jdbc:hive2://hive001:10000>
可以继续使用IDEA的数据库功能连接测试
- metastore独立服务模式
配置hive-site.xml
,追加
<!-- 指定metastore服务的地址 -->
<property>
<name>hive.metastore.uris</name>
<value>thrift://hive001:9083</value>
</property>
脚本配置
vim $HIVE_HOME/bin/hiveservices.sh
编辑内容
#!/bin/bash
HIVE_LOG_DIR=$HIVE_HOME/logs
if [ ! -d $HIVE_LOG_DIR ]
then
mkdir -p $HIVE_LOG_DIR
fi
#检查进程是否运行正常,参数1为进程名,参数2为进程端口
function check_process()
{
pid=$(ps -ef 2>/dev/null | grep -v grep | grep -i $1 | awk '{print $2}')
ppid=$(netstat -nltp 2>/dev/null | grep $2 | awk '{print $7}' | cut -d '/' -f 1)
echo $pid
[[ "$pid" =~ "$ppid" ]] && [ "$ppid" ] && return 0 || return 1
}
function hive_start()
{
metapid=$(check_process HiveMetastore 9083)
cmd="nohup hive --service metastore >$HIVE_LOG_DIR/metastore.log 2>&1 &"
[ -z "$metapid" ] && eval $cmd || echo "Metastroe服务已启动"
server2pid=$(check_process HiveServer2 10000)
cmd="nohup hive --service hiveserver2 >$HIVE_LOG_DIR/hiveServer2.log 2>&1 &"
[ -z "$server2pid" ] && eval $cmd || echo "HiveServer2服务已启动"
}
function hive_stop()
{
metapid=$(check_process HiveMetastore 9083)
[ "$metapid" ] && kill $metapid || echo "Metastore服务未启动"
server2pid=$(check_process HiveServer2 10000)
[ "$server2pid" ] && kill $server2pid || echo "HiveServer2服务未启动"
}
case $1 in
"start")
hive_start
;;
"stop")
hive_stop
;;
"restart")
hive_stop
sleep 2
hive_start
;;
"status")
check_process HiveMetastore 9083 >/dev/null && echo "Metastore服务运行正常" || echo "Metastore服务运行异常"
check_process HiveServer2 10000 >/dev/null && echo "HiveServer2服务运行正常" || echo "HiveServer2服务运行异常"
;;
*)
echo Invalid Args!
echo 'Usage: '$(basename $0)' start|stop|restart|status'
;;
esac
添加执行权限
chmod +x $HIVE_HOME/bin/hiveservices.sh
后台启动
hiveservices.sh start