活跃会员:打开应用的会员即为活跃会员;

新增会员:第一次使用应用的用户,定义为新增会员;

留存会员:某段时间的新增会员,经过一段时间后,仍继续使用应用认为是留存会员;

活跃会员指标需求:每日、每周、每月的活跃会员数

DWD:会员的每日启动信息明细(会员都是活跃会员;某个会员可能会出现多次)

DWS:每日活跃会员信息(关键)、每周活跃会员信息、每月活跃会员信息

每日活跃会员信息 ===> 每周活跃会员信息

每日活跃会员信息 ===> 每月活跃会员信息

ADS:每日、每周、每月活跃会员数(输出)

1
2
ADS表结构:
daycnt weekcnt monthcnt dt

备注:周、月为自然周、自然月

处理过程:

1、建表(每日、每周、每月活跃会员信息)

2、每日启动明细 ===> 每日活跃会员

3、每日活跃会员 => 每周活跃会员;每日活跃会员 => 每月活跃会员

4、汇总生成ADS层的数据

创建DWS层表

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
use dws;

drop table if exists dws.dws_member_start_day;

create table dws.dws_member_start_day (
`device_id` string,
`uid` string,
`app_v` string,
`os_type` string,
`language` string,
`channel` string,
`area` string,
`brand` string
)
COMMENT '会员日启动汇总'
partitioned by(dt string)
stored as parquet;

drop table if exists dws.dws_member_start_week;

create table dws.dws_member_start_week(
`device_id` string,
`uid` string,
`app_v` string,
`os_type` string,
`language` string,
`channel` string,
`area` string,
`brand` string,
`week` string
)
COMMENT '会员周启动汇总'
PARTITIONED BY (`dt` string)
stored as parquet;

drop table if exists dws.dws_member_start_month;

create table dws.dws_member_start_month(
`device_id` string,
`uid` string,
`app_v` string,
`os_type` string,
`language` string,
`channel` string,
`area` string,
`brand` string,
`month` string
)
COMMENT '会员月启动汇总'
PARTITIONED BY (`dt` string)
stored as parquet;

加载DWS层数据

创建script/member_active/dws_load_member_start.sh

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/bin/bash
source /etc/profile

# 可以输入日期;如果未输入日期取昨天的时间
if [ -n "$1" ]
then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi

# 定义要执行的SQL
# 汇总得到每日活跃会员信息;每日数据汇总得到每周、每月数据
sql="
insert overwrite table dws.dws_member_start_day
partition(dt='$do_date')
select device_id,
concat_ws('|', collect_set(uid)),
concat_ws('|', collect_set(app_v)),
concat_ws('|', collect_set(os_type)),
concat_ws('|', collect_set(language)),
concat_ws('|', collect_set(channel)),
concat_ws('|', collect_set(area)),
concat_ws('|', collect_set(brand))
from dwd.dwd_start_log
where dt='$do_date'
group by device_id;

-- 汇总得到每周活跃会员
insert overwrite table dws.dws_member_start_week
partition(dt='$do_date')
select device_id,
concat_ws('|', collect_set(uid)),
concat_ws('|', collect_set(app_v)),
concat_ws('|', collect_set(os_type)),
concat_ws('|', collect_set(language)),
concat_ws('|', collect_set(channel)),
concat_ws('|', collect_set(area)),
concat_ws('|', collect_set(brand)),
date_add(next_day('$do_date', 'mo'), -7)
from dws.dws_member_start_day
where dt >= date_add(next_day('$do_date', 'mo'), -7)
and dt <= '$do_date'
group by device_id;

-- 汇总得到每月活跃会员
insert overwrite table dws.dws_member_start_month
partition(dt='$do_date')
select device_id,
concat_ws('|', collect_set(uid)),
concat_ws('|', collect_set(app_v)),
concat_ws('|', collect_set(os_type)),
concat_ws('|', collect_set(language)),
concat_ws('|', collect_set(channel)),
concat_ws('|', collect_set(area)),
concat_ws('|', collect_set(brand)),
date_format('$do_date', 'yyyy-MM')
from dws.dws_member_start_day
where dt >= date_format('$do_date', 'yyyy-MM-01')
and dt <= '$do_date'
group by device_id;
"

hive -e "$sql"

注意shell的引号

ODS => DWD => DWS(每日、每周、每月活跃会员的汇总表)

创建ADS层表

计算当天、当周、当月活跃会员数量

1
2
3
4
5
6
7
8
9
10
11
12
use ADS;

drop table if exists ads.ads_member_active_count;

create table ads.ads_member_active_count(
`day_count` int COMMENT '当日会员数量',
`week_count` int COMMENT '当周会员数量',
`month_count` int COMMENT '当月会员数量'
)
COMMENT '活跃会员数'
partitioned by(dt string)
row format delimited fields terminated by ',';

加载ADS层数据

创建script/member_active/ads_load_member_active.sh

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/bin/bash
source /etc/profile

if [ -n "$1" ]
then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi

sql="
with tmp as(
select 'day' datelabel, count(*) cnt, dt
from dws.dws_member_start_day
where dt='$do_date'
group by dt
union all
select 'week' datelabel, count(*) cnt, dt
from dws.dws_member_start_week
where dt='$do_date'
group by dt
union all
select 'month' datelabel, count(*) cnt, dt
from dws.dws_member_start_month
where dt='$do_date'
group by dt
)
insert overwrite table ads.ads_member_active_count
partition(dt='$do_date')
select sum(case when datelabel='day' then cnt end) as day_count,
sum(case when datelabel='week' then cnt end) as week_count,
sum(case when datelabel='month' then cnt end) as month_count
from tmp
group by dt;
"

hive -e "$sql"
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/bin/bash
source /etc/profile

if [ -n "$1" ]
then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi

sql="
insert overwrite table ads.ads_member_active_count
partition(dt='$do_date')
select daycnt, weekcnt, monthcnt
from (
select dt, count(*) daycnt
from dws.dws_member_start_day
where dt='$do_date'
group by dt
) day
join (
select dt, count(*) weekcnt
from dws.dws_member_start_week
where dt='$do_date'
group by dt
) week on day.dt=week.dt
join (
select dt, count(*) monthcnt
from dws.dws_member_start_month
where dt='$do_date'
group by dt
) month on day.dt=month.dt;
"

hive -e "$sql"

小结