事件日志采集

  • 1、启动Flume Agent(适当的修改参数,128M滚动一次)

    1
    2
    # 启动flume
    flume-ng agent --conf /opt/lagou/servers/flume-1.9.0/conf --conf-file /opt/lagou/servers/flume-1.9.0/conf/flume-log2hdfs3.conf -name a1 -Dflume.root.logger=INFO,console
  • 2、生成数据(文件大小约640M,100W条事件日志)

    1
    2
    3
    cd /data/lagoudw/jars

    java -cp data-generator-1.1-SNAPSHOT-jar-with-dependencies.jar com.lagou.ecommerce.AppEvent 1000000 2020-08-02 > /data/lagoudw/logs/event/events0802.log
  • 3、数据采集完成后,检查HDFS结果

    1
    hdfs dfs -ls /user/data/logs/event

ODS层建表和数据加载

1
2
3
4
5
6
7
drop table if exists ods.ods_log_event;
CREATE EXTERNAL TABLE ods.ods_log_event(
`str` string
)
PARTITIONED BY (`dt` string)
STORED AS TEXTFILE
LOCATION '/user/data/logs/event';

创建/data/lagoudw/script/advertisement/ods_load_event_log.sh

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
cd /data/lagoudw/script/advertisement

vim ods_load_event_log.sh

#!/bin/bash
source /etc/profile

if [ -n "$1" ]
then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi

sql="
alter table ods.ods_log_event add partition (dt='$do_date');
"

hive -e "$sql"

hive数据加载

1
2
3
4
sh ods_load_event_log.sh 2020-07-20
sh ods_load_event_log.sh 2020-07-21
sh ods_load_event_log.sh 2020-07-22
sh ods_load_event_log.sh 2020-07-23