Hive库表元数据批量导出
- 应大数据同事要求,需要将 market 开头的hive库进行元数据备份,清单如下:
[hadoop@namenode1 ~]$ hadoop fs -du -h /user/hive/warehouse | sort -hrk1 | grep "/market"
781.3 M 2.3 G /user/hive/warehouse/market_metabase.db
513.6 M 1.5 G /user/hive/warehouse/market_dm.db
505.0 M 1.5 G /user/hive/warehouse/market_bdp.db
461.7 M 1.4 G /user/hive/warehouse/market_risk_box_mysql.db
167.7 M 503.0 M /user/hive/warehouse/market_risk.db
71.9 M 215.8 M /user/hive/warehouse/market_monitor_platform.db
51.7 G 155.2 G /user/hive/warehouse/market_asset.db
49.1 K 147.4 K /user/hive/warehouse/market_shwl.db
6.1 G 18.4 G /user/hive/warehouse/market_zt.db
6.0 M 18.0 M /user/hive/warehouse/market_fkd.db
5.3 M 15.8 M /user/hive/warehouse/market_wlj.db
3.3 M 9.9 M /user/hive/warehouse/market_v.db
2.9 M 8.6 M /user/hive/warehouse/market_hadoop_10.db
1.7 M 5.0 M /user/hive/warehouse/market_bxgt.db
- 为此编写了一个对hive库进行ddl备份的批量脚本,仅供参考:
#!/bin/bash
hive=jdbc:hive2://192.168.1.6:10000
date=$(date +"%Y%m%d")
dir=/opt/hive_market_ddl
mkdir -p $dir
exec=$(beeline -u $hive -n hive -e "show databases like 'market.*';" > $dir/dblist.txt)
databases=(${exec//\\n/})
for db in $(cat $dir/dblist.txt)
do
if [[ $db != +* && $db != _* && $db != \| && $db != database_name ]]
then
echo "CREATE DATABASE IF NOT EXISTS $db;" > $dir/${db}_$date.sql
exec=$(beeline -u $hive -n hive -e "use $db; show tables;")
exec=${exec//|/}
tables=(${exec//\\n/})
for tab in "${tables[@]}"
do
if [[ $tab != +* && $tab != tab_name ]]
then
echo "数据表: "$tab
exec_create_table_ddl=$(beeline -u $hive -n hive -e "use $db; show create table $tab;")
lines=(${exec_create_table_ddl//\|/})
ddl=''
for line in "${lines[@]}"
do
if [[ $line == ROW ]]; then
break
elif [[ $line != +* && $line != createtab_stmt ]]; then
ddl="$ddl $line"
fi
done;
ddl="$ddl ;"
echo $ddl >> $dir/${db}_$date.sql
fi
done;
fi
done