会员活跃度分析之Datax数据导出
ADS有4张表需要从数据仓库的ADS层导入MySQL,即:Hive => MySQL
1 | ads.ads_member_active_count |
-
MySQL 建表
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41create database dwads;
-- 活跃会员数
drop table if exists dwads.ads_member_active_count;
create table dwads.ads_member_active_count(
`dt` varchar(10) COMMENT '统计日期',
`day_count` int COMMENT '当日会员数量',
`week_count` int COMMENT '当周会员数量',
`month_count` int COMMENT '当月会员数量',
primary key (dt)
);
-- 新增会员数
drop table if exists dwads.ads_new_member_cnt;
create table dwads.ads_new_member_cnt (
`dt` varchar(10) COMMENT '统计日期',
`cnt` int,
primary key (dt)
);
-- 会员留存数
drop table if exists dwads.ads_member_retention_count;
create table dwads.ads_member_retention_count (
`dt` varchar(10) COMMENT '统计日期',
`add_date` varchar(10) comment '新增日期',
`retention_day` int comment '截止当前日期留存天数',
`retention_count` bigint comment '留存数',
primary key (dt)
) COMMENT '会员留存情况';
-- 会员留存率
drop table if exists dwads.ads_member_retention_rate;
create table dwads.ads_member_retention_rate (
`dt` varchar(10) COMMENT '统计日期',
`add_date` varchar(10) comment '新增日期',
`retention_day` int comment '截止当前日期留存天数',
`retention_count` bigint comment '留存数',
`new_mid_count` bigint comment '当日会员新增数',
`retention_ratio` decimal(10,2) comment '留存率',
primary key (dt)
) COMMENT '会员留存率';-
导出活跃会员数(ads_member_active_count)
-
创建Datax的json配置文件
如:export_member_active_count.json,编写hdfsreader => mysqlwriter
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66{
"job":{
"setting":{
"speed":{
"channel":1
}
},
"content":[
{
"reader":{
"name":"hdfsreader",
"parameter":{
"path":"/user/hive/warehouse/ads.db/ads_member_active_count/dt=$do_date/*",
"defaultFS":"hdfs://192.168.91.121:9000",
"column":[
{
"type":"string",
"value":"$do_date"
},
{
"index":0,
"type":"string"
},
{
"index":1,
"type":"string"
},
{
"index":2,
"type":"string"
}
],
"fileType":"text",
"encoding":"UTF-8",
"fieldDelimiter":","
}
},
"writer":{
"name":"mysqlwriter",
"parameter":{
"writeMode":"replace",
"username":"hive",
"password":"12345678",
"column":[
"dt",
"day_count",
"week_count",
"month_count"
],
"preSql":[
""
],
"connection":[
{
"jdbcUrl":"jdbc:mysql://192.168.91.123:3306/dwads?useUnicode=true&characterEncoding=utf-8",
"table":[
"ads_member_active_count"
]
}
]
}
}
}
]
}
} -
执行Datax命令
1
2
3cd /opt/lagou/servers/datax/bin
python datax.py -p "-Ddo_date=2020-07-21" /data/script/datax_json/export_member_active_count.json -
编写导出脚本
如:export_member_active_count.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16cd /data/script/member_active
vim export_member_active_count.sh
#!/bin/bash
JSON=/data/script/datax_json
source /etc/profile
if [ -n "$1" ]
then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi
python $DATAX_HOME/bin/datax.py -p "-Ddo_date=$do_date" $JSON/export_member_active_count.json -
执行导出脚本
1
sh export_member_active_count.sh 2020-07-22
-
-
导出新增会员数(ads_new_member_cnt)
-
创建Datax的json配置文件
如:export_new_member_cnt.json,编写hdfsreader => mysqlwriter
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56{
"job":{
"setting":{
"speed":{
"channel":1
}
},
"content":[
{
"reader":{
"name":"hdfsreader",
"parameter":{
"path":"/user/hive/warehouse/ads.db/ads_new_member_cnt/dt=$do_date/*",
"defaultFS":"hdfs://192.168.91.121:9000",
"column":[
{
"type":"string",
"value":"$do_date"
},
{
"index":0,
"type":"string"
}
],
"fileType":"text",
"encoding":"UTF-8",
"fieldDelimiter":","
}
},
"writer":{
"name":"mysqlwriter",
"parameter":{
"writeMode":"replace",
"username":"hive",
"password":"12345678",
"column":[
"dt",
"cnt"
],
"preSql":[
""
],
"connection":[
{
"jdbcUrl":"jdbc:mysql://192.168.91.123:3306/dwads?useUnicode=true&characterEncoding=utf-8",
"table":[
"ads_new_member_cnt"
]
}
]
}
}
}
]
}
} -
执行Datax命令
1
2
3cd /opt/lagou/servers/datax/bin
python datax.py -p "-Ddo_date=2020-07-21" /data/script/datax_json/export_new_member_cnt.json -
编写导出脚本
如:export_new_member_cnt.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16cd /data/script/member_active
vim export_new_member_cnt.sh
#!/bin/bash
JSON=/data/script/datax_json
source /etc/profile
if [ -n "$1" ]
then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi
python $DATAX_HOME/bin/datax.py -p "-Ddo_date=$do_date" $JSON/export_new_member_cnt.json -
执行导出脚本
1
sh export_new_member_cnt.sh 2020-07-22
-
-
导出会员留存数(ads_member_retention_count)
-
创建Datax的json配置文件
如:export_member_retention_count.json,编写hdfsreader => mysqlwriter
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66{
"job":{
"setting":{
"speed":{
"channel":1
}
},
"content":[
{
"reader":{
"name":"hdfsreader",
"parameter":{
"path":"/user/hive/warehouse/ads.db/ads_member_retention_count/dt=$do_date/*",
"defaultFS":"hdfs://192.168.91.121:9000",
"column":[
{
"type":"string",
"value":"$do_date"
},
{
"index":0,
"type":"string"
},
{
"index":1,
"type":"string"
},
{
"index":2,
"type":"string"
}
],
"fileType":"text",
"encoding":"UTF-8",
"fieldDelimiter":","
}
},
"writer":{
"name":"mysqlwriter",
"parameter":{
"writeMode":"replace",
"username":"hive",
"password":"12345678",
"column":[
"dt",
"add_date",
"retention_day",
"retention_count"
],
"preSql":[
""
],
"connection":[
{
"jdbcUrl":"jdbc:mysql://192.168.91.123:3306/dwads?useUnicode=true&characterEncoding=utf-8",
"table":[
"ads_member_retention_count"
]
}
]
}
}
}
]
}
} -
执行Datax命令
1
2
3cd /opt/lagou/servers/datax/bin
python datax.py -p "-Ddo_date=2020-07-21" /data/script/datax_json/export_member_retention_count.json -
编写导出脚本
如:export_member_retention_count.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16cd /data/script/member_active
vim export_member_retention_count.sh
#!/bin/bash
JSON=/data/script/datax_json
source /etc/profile
if [ -n "$1" ]
then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi
python $DATAX_HOME/bin/datax.py -p "-Ddo_date=$do_date" $JSON/export_member_retention_count.json -
执行导出脚本
1
sh export_member_retention_count.sh 2020-07-22
-
-
导出会员留存率(ads_member_retention_rate)
-
创建Datax的json配置文件
如:export_member_retention_rate.json,编写hdfsreader => mysqlwriter
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76{
"job":{
"setting":{
"speed":{
"channel":1
}
},
"content":[
{
"reader":{
"name":"hdfsreader",
"parameter":{
"path":"/user/hive/warehouse/ads.db/ads_member_retention_rate/dt=$do_date/*",
"defaultFS":"hdfs://192.168.91.121:9000",
"column":[
{
"type":"string",
"value":"$do_date"
},
{
"index":0,
"type":"string"
},
{
"index":1,
"type":"string"
},
{
"index":2,
"type":"string"
},
{
"index":3,
"type":"string"
},
{
"index":4,
"type":"string"
}
],
"fileType":"text",
"encoding":"UTF-8",
"fieldDelimiter":","
}
},
"writer":{
"name":"mysqlwriter",
"parameter":{
"writeMode":"replace",
"username":"hive",
"password":"12345678",
"column":[
"dt",
"add_date",
"retention_day",
"retention_count",
"new_mid_count",
"retention_ratio"
],
"preSql":[
""
],
"connection":[
{
"jdbcUrl":"jdbc:mysql://192.168.91.123:3306/dwads?useUnicode=true&characterEncoding=utf-8",
"table":[
"ads_member_retention_rate"
]
}
]
}
}
}
]
}
} -
执行Datax命令
1
2
3cd /opt/lagou/servers/datax/bin
python datax.py -p "-Ddo_date=2020-07-21" /data/script/datax_json/export_member_retention_rate.json -
编写导出脚本
如:export_member_retention_rate.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16cd /data/script/member_active
vim export_member_retention_rate.sh
#!/bin/bash
JSON=/data/script/datax_json
source /etc/profile
if [ -n "$1" ]
then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi
python $DATAX_HOME/bin/datax.py -p "-Ddo_date=$do_date" $JSON/export_member_retention_rate.json -
执行导出脚本
1
sh export_member_retention_rate.sh 2020-07-22
-
-
本博客所有文章除特别声明外,均采用 CC BY-NC-SA 4.0 许可协议。转载请注明来自 WeiJia_Rao!