新增会员:第一次使用应用的用户,定义为新增会员;卸载再次安装的设备,不会被算作一次新增。

新增会员先计算 => 计算会员留存

需求:每日新增会员数

计算步骤:

  • 计算新增会员

  • 更新所有会员信息

改进后方法:

  • 在所有会员信息中增加时间列,表示这个会员是哪一天成为新增会员

  • 只需要一张表:所有会员的信息(id,dt)

  • 将新增会员 插入所有会员表中

案例:如何计算新增会员

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
-- 日启动表 => DWS
drop table t1;
create table t1(
id int,
dt string
)
row format delimited fields terminated by ',';
load data local inpath '/data/lagoudw/data/t10.dat' into table t1;

-- 数据
4,2020-08-02
5,2020-08-02
6,2020-08-02
7,2020-08-02
8,2020-08-02
9,2020-08-02

-- 全量数据 => DWS
drop table t2;
create table t2(
id int,
dt string
)
row format delimited fields terminated by ',';
load data local inpath '/data/lagoudw/data/t2.dat' into table t2;

-- 数据
1,2020-08-01
2,2020-08-01
3,2020-08-01
4,2020-08-01
5,2020-08-01
6,2020-08-01

-- 找出 2020-08-02 的新用户
select t1.id, t1.dt, t2.id, t2.dt
from t1
left join t2 on t1.id=t2.id
where t1.dt="2020-08-02";

select t1.id, t1.dt
from t1
left join t2 on t1.id=t2.id
where t1.dt="2020-08-02"
and t2.id is null;

-- 将找到 2020-08-02 新用户数据插入t2表中
insert into table t2
select t1.id, t1.dt
from t1
left join t2 on t1.id=t2.id
where t1.dt="2020-08-02"
and t2.id is null;

-- 检查结果
select * from t2;

-- t1 加载 2020-08-03 的数据
14,2020-08-03
15,2020-08-03
16,2020-08-03
17,2020-08-03
18,2020-08-03
19,2020-08-03
load data local inpath '/data/lagoudw/data/t3.dat' into table t1;

-- 将找到 2020-08-03 新用户数据插入t2表中
insert into table t2
select t1.id, t1.dt
from t1
left join t2 on t1.id=t2.id
where t1.dt="2020-08-03"
and t2.id is null;

-- 检查结果
select * from t2;

创建DWS层表

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
use DWS;

drop table if exists dws.dws_member_add_day;

create table dws.dws_member_add_day (
`device_id` string,
`uid` string,
`app_v` string,
`os_type` string,
`language` string,
`channel` string,
`area` string,
`brand` string,
`dt` string
)
COMMENT '每日新增会员明细'
stored as parquet;

加载DWS层数据

创建script/member_active/dws_load_member_add_day.sh

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#!/bin/bash
source /etc/profile

if [ -n "$1" ]
then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi

sql="
insert into table dws.dws_member_add_day
select t1.device_id, t1.uid, t1.app_v, t1.os_type, t1.language, t1.channel, t1.area, t1.brand, '$do_date'
from dws.dws_member_start_day t1
left join dws.dws_member_add_day t2 on t1.device_id=t2.device_id
where t1.dt='$do_date'
and t2.device_id is null;
"

hive -e "$sql"

创建ADS层表

1
2
3
4
5
6
7
8
9
use ADS;

drop table if exists ads.ads_new_member_cnt;

create table ads.ads_new_member_cnt (
`cnt` string
)
partitioned by(dt string)
row format delimited fields terminated by ',';

加载ADS层数据

创建script/member_active/ads_load_member_add.sh

#!/bin/bash
source /etc/profile

if [ -n "$1" ]
then
  do_date=$1
else
  do_date=`date -d "-1 day" +%F`
fi

sql="
insert overwrite table ads.ads_new_member_cnt partition (dt='$do_date')
select count(1)
from dws.dws_member_add_day
where dt = '$do_date'
"

hive -e "$sql"

小结