大数据常用脚本
一键启动
一键启动常用服务
#!/bin/bash
if [ ! $1 ]
then echo "please input [start|stop]"
exit 1
fi
#start hadoop
echo " ----------- $1 dfs ------------ "
ssh root@node01 "source /etc/profile;${HADOOP_HOME}/sbin/${1}-dfs.sh"
echo " ----------- $1 yarn ---------- "
ssh root@node01 "source /etc/profile;${HADOOP_HOME}/sbin/${1}-yarn.sh"
sleep 1s
echo " ----------- $1 zookeeper ----------"
#start zookeeper
for (( i=1; i<=3; i++ ))
do
# << E0F 只是一个标识,可以换做其他任意字符,多行复杂脚本使用
echo "node0$i zk ${1} ..."
ssh root@node0$i "source /etc/profile; zkServer.sh ${1}"
echo "node0$i ${1} 完成."
done
echo " ----------- $1 kafka ------------"
# start kafka
if [ ${1} == 'stop' ]
then
for (( i=1; i<=3; i++ ))
do
echo "node0$i kafka ${1} ..."
ssh root@node0$i "source /etc/profile;${KAFKA_HOME}/bin/kafka-server-stop.sh"
if [ `ps -ef|grep Kafka | wc -l` -gt 1 ]; then
ssh root@node0$i `ps -ef | grep Kafka | grep -v grep | awk '{print $2}' | xargs kill -9`
fi
echo "node0$i ${1} 完成."
done
else
for (( i=1; i<=3; i++ ))
do
echo "node0$i kafka ${1} ..."
ssh root@node0$i "source /etc/profile;${KAFKA_HOME}/bin/kafka-server-${1}.sh -daemon /export/servers/kafka/config/server.properties"
echo "node0$i ${1} 完成."
done
sleep 1s
fi
# start flink
# /export/servers/flink/bin/${1}-cluster.sh
# start dolphinscheduler
# /opt/soft/dolphinscheduler/bin/start-all.sh
# systemctl restart nginx
查看服务启动情况
#!/bin/bash
for i in node01 node02 node03
do
echo " <<<<<<<<<<<<<<<<<<<< $i $1 <<<<<<<<<<<<<<<<<<<<<"
ssh $i "source /etc/profile;$*"
done
Kafka 启动与关闭
Kafka 一键启动
#!/bin/bash
KAFKA_HOME=/export/server/kafka_2.12-2.4.1
for number in {1..3}
do
host=node${number}
echo ${host}
/usr/bin/ssh ${host} "cd ${KAFKA_HOME};source /etc/profile;export JMX_PORT=9988;${KAFKA_HOME}/bin/kafka-server-start.sh ${KAFKA_HOME}/config/server.properties >>/dev/null 2>&1 &"
echo "${host} started"
done
Kafka 一键关闭
#!/bin/bash
KAFKA_HOME=/export/server/kafka_2.12-2.4.1
for number in {1..3}
do
host=node${number}
echo ${host}
/usr/bin/ssh ${host} "cd ${KAFKA_HOME};source /etc/profile;${KAFKA_HOME}/bin/kafka-server-stop.sh"
echo "${host} stoped"
done
Zookeeper
Zookeeper 启动
#!/bin/bash
ZK_HOME=/export/server/zookeeper-3.4.6
for number in {1..3}
do
host=node${number}
echo ${host}
/usr/bin/ssh ${host} "cd ${ZK_HOME};source /etc/profile;${ZK_HOME}/bin/zkServer.sh start"
echo "${host} started"
done
Zookeeper 停止
#!/bin/bash
ZK_HOME=/export/server/zookeeper-3.4.6
for number in {1..3}
do
host=node${number}
echo ${host}
/usr/bin/ssh ${host} "cd ${ZK_HOME};source /etc/profile;${ZK_HOME}/bin/zkServer.sh stop"
echo "${host} stoped"
done
Zookeeper 查看状态
#!/bin/bash
ZK_HOME=/export/server/zookeeper-3.4.6
for number in {1..3}
do
host=node${number}
echo ${host}
/usr/bin/ssh ${host} "cd ${ZK_HOME};source /etc/profile;${ZK_HOME}/bin/zkServer.sh status"
done
Sqoop数据抽取与验证
数据抽取
export SQOOP_HOME=/export/server/sqoop-1.4.7.bin_hadoop-2.6.0
$SQOOP_HOME/bin/sqoop import \
--connect jdbc:mysql://192.168.88.163:3306/insurance \
--username root \
--password 123456 \
--table dd_table \
--hive-table insurance_ods.dd_table \
--hive-import \
--hive-overwrite \
--fields-terminated-by '\t' \
--delete-target-dir \
-m 1
抽取与验证
export SQOOP_HOME=/export/server/sqoop-1.4.7.bin_hadoop-2.6.0
$SQOOP_HOME/bin/sqoop import \
--connect jdbc:mysql://192.168.88.163:3306/insurance \
--username root \
--password 123456 \
--table dd_table \
--hive-table insurance_ods.dd_table \
--hive-import \
--hive-overwrite \
--fields-terminated-by '\t' \
--delete-target-dir \
-m 1
#1、查询MySQL的表dd_table的条数
mysql_log=`$SQOOP_HOME/bin/sqoop eval \
--connect jdbc:mysql://192.168.88.163:3306/insurance \
--username root \
--password 123456 \
--query "select count(1) from dd_table"
`
mysql_cnt=`echo $mysql_log | awk -F'|' {'print $4'} | awk {'print $1'}`
#2、查询hive的表dd_table的条数
hive_log=`hive -e "select count(1) from insurance_ods.dd_table"`
#3、比较2边的数字是否一样。
if [ $mysql_cnt -eq $hive_log ] ; then
echo "mysql表的数据量=$mysql_cnt,hive表的数据量=$hive_log,是相等的"
else
echo "mysql表的数据量=$mysql_cnt,hive表的数据量=$hive_log,不是相等的"
fi
Hive 加载分区数据
#!/bin/bash
dt=`date -d '1 days ago' +'%Y%m%d'`
tableName=$1
ssh node03 `/export/server/hive/bin/hive -e "use test_ods;alter table ${tableName} add partition(dt=${dt}) location 'hdfs://node1:8020/apps/warehouse/ods.db/${tableName}/${dt}"`
if [ $? -eq 0 ]; then
echo "load $tableName partition $dt succesful."
else
echo "load $tableName partition $dt error."
fi
大数据常用脚本
https://jface001.github.io/2020/06/10/大数据常用脚本/