已经确定的事情:DataX、导出7张表的数据。

MySQL导出:全量导出、增量导出(导出前一天的数据)。

业务数据保存在MySQL中,每日凌晨导入上一天的表数据。

  • 表数据量少,采用全量方式导出MySQL。

  • 表数据量大,而且根据字段能区分出每天新增数据,采用增量方式导出MySQL。


3张增量表:

  • 订单表 lagou_trade_orders

  • 订单产品表 lagou_order_produce

  • 产品信息表 lagou_product_info

4张全量表:

  • 产品分类表 lagou_product_category

  • 商家店铺表 lagou_shops

  • 商家地域组织表 lagou_shop_admin_org

  • 支付方式表 lagou_payment

全量数据导入

MySQL => HDFS => Hive

每日加载全量数据,形成新的分区;(ODS如何建表有指导左右)

MySQLReader ===> HdfsWriter

产品分类表

创建/data/script/datax_json/product_category.json

1
2
3
cd /data/script/datax_json

vim product_category.json

product_category.json文件内容:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
{
"job":{
"setting":{
"speed":{
"channel":1
}
},
"content":[
{
"reader":{
"name":"mysqlreader",
"parameter":{
"username":"root",
"password":"12345678",
"column":[
"catId",
"parentId",
"catName",
"isShow",
"sortNum",
"isDel",
"createTime",
"level"
],
"connection":[
{
"table":[
"lagou_product_category"
],
"jdbcUrl":[
"jdbc:mysql://192.168.91.123:3306/ebiz"
]
}
]
}
},
"writer":{
"name":"hdfswriter",
"parameter":{
"defaultFS":"hdfs://192.168.91.121:9000",
"fileType":"text",
"path":"/user/hive/warehouse/trade.db/product_category/dt=$do_date",
"fileName":"product_category_$do_date",
"column":[
{
"name":"catId",
"type":"INT"
},
{
"name":"parentId",
"type":"INT"
},
{
"name":"catName",
"type":"STRING"
},
{
"name":"isShow",
"type":"TINYINT"
},
{
"name":"sortNum",
"type":"INT"
},
{
"name":"isDel",
"type":"TINYINT"
},
{
"name":"createTime",
"type":"STRING"
},
{
"name":"level",
"type":"TINYINT"
}
],
"writeMode":"append",
"fieldDelimiter":","
}
}
}
]
}
}

备注:

  • 数据量小的表没有必要使用多个channel;使用多个channel会生成多个小文件

  • 执行命令之前要在HDFS上创建对应的目录:/user/hive/warehouse/trade.db/lagou_product_category/dt=yyyy-mm-dd

1
2
3
4
5
6
7
[root@Linux123 ~]# do_date='2020-07-01'

# 创建目录
[root@Linux123 ~]# hdfs dfs -mkdir -p /user/hive/warehouse/trade.db/product_category/dt=$do_date

# 数据迁移
[root@Linux123 ~]# python $DATAX_HOME/bin/datax.py -p "-Ddo_date=$do_date" /data/script/datax_json/product_category.json

商家店铺表

创建/data/script/datax_json/shops.json

1
2
3
cd /data/script/datax_json

vim shops.json

shops.json文件内容:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
{
"job":{
"setting":{
"speed":{
"channel":1
},
"errorLimit":{
"record":0
}
},
"content":[
{
"reader":{
"name":"mysqlreader",
"parameter":{
"username":"root",
"password":"12345678",
"column":[
"shopId",
"userId",
"areaId",
"shopName",
"shopLevel",
"status",
"createTime",
"modifyTime"
],
"connection":[
{
"table":[
"lagou_shops"
],
"jdbcUrl":[
"jdbc:mysql://Linux123:3306/ebiz"
]
}
]
}
},
"writer":{
"name":"hdfswriter",
"parameter":{
"defaultFS":"hdfs://Linux121:9000",
"fileType":"text",
"path":"/user/hive/warehouse/trade.db/shops/dt=$do_date",
"fileName":"shops_$do_date",
"column":[
{
"name":"shopId",
"type":"INT"
},
{
"name":"userId",
"type":"INT"
},
{
"name":"areaId",
"type":"INT"
},
{
"name":"shopName",
"type":"STRING"
},
{
"name":"shopLevel",
"type":"TINYINT"
},
{
"name":"status",
"type":"TINYINT"
},
{
"name":"createTime",
"type":"STRING"
},
{
"name":"modifyTime",
"type":"STRING"
}
],
"writeMode":"append",
"fieldDelimiter":","
}
}
}
]
}
}
1
2
3
4
5
6
7
[root@Linux123 ~]# do_date='2020-07-01'

# 创建目录
[root@Linux123 ~]# hdfs dfs -mkdir -p /user/hive/warehouse/trade.db/shops/dt=$do_date

# 数据迁移
[root@Linux123 ~]# python $DATAX_HOME/bin/datax.py -p "-Ddo_date=$do_date" /data/script/datax_json/shops.json

商家地域组织表

创建/data/script/datax_json/shop_org.json

1
2
3
cd /data/script/datax_json

vim shop_org.json

shop_org.json文件内容:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
{
"job":{
"setting":{
"speed":{
"channel":1
},
"errorLimit":{
"record":0
}
},
"content":[
{
"reader":{
"name":"mysqlreader",
"parameter":{
"username":"root",
"password":"12345678",
"column":[
"id",
"parentId",
"orgName",
"orgLevel",
"isDelete",
"createTime",
"updateTime",
"isShow",
"orgType"
],
"connection":[
{
"table":[
"lagou_shop_admin_org"
],
"jdbcUrl":[
"jdbc:mysql://Linux123:3306/ebiz"
]
}
]
}
},
"writer":{
"name":"hdfswriter",
"parameter":{
"defaultFS":"hdfs://Linux121:9000",
"fileType":"text",
"path":"/user/hive/warehouse/trade.db/shop_org/dt=$do_date",
"fileName":"shop_admin_org_$do_date.dat",
"column":[
{
"name":"id",
"type":"INT"
},
{
"name":"parentId",
"type":"INT"
},
{
"name":"orgName",
"type":"STRING"
},
{
"name":"orgLevel",
"type":"TINYINT"
},
{
"name":"isDelete",
"type":"TINYINT"
},
{
"name":"createTime",
"type":"STRING"
},
{
"name":"updateTime",
"type":"STRING"
},
{
"name":"isShow",
"type":"TINYINT"
},
{
"name":"orgType",
"type":"TINYINT"
}
],
"writeMode":"append",
"fieldDelimiter":","
}
}
}
]
}
}
1
2
3
4
5
6
7
[root@Linux123 ~]# do_date='2020-07-01'

# 创建目录
[root@Linux123 ~]# hdfs dfs -mkdir -p /user/hive/warehouse/trade.db/shop_org/dt=$do_date

# 数据迁移
[root@Linux123 ~]# python $DATAX_HOME/bin/datax.py -p "-Ddo_date=$do_date" /data/script/datax_json/shop_org.json

支付方式表

创建/data/script/datax_json/payments.json

1
2
3
cd /data/script/datax_json

vim payments.json

payments.json文件内容:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
{
"job":{
"setting":{
"speed":{
"channel":1
},
"errorLimit":{
"record":0
}
},
"content":[
{
"reader":{
"name":"mysqlreader",
"parameter":{
"username":"root",
"password":"12345678",
"column":[
"id",
"payMethod",
"payName",
"description",
"payOrder",
"online"
],
"connection":[
{
"table":[
"lagou_payments"
],
"jdbcUrl":[
"jdbc:mysql://Linux123:3306/ebiz"
]
}
]
}
},
"writer":{
"name":"hdfswriter",
"parameter":{
"defaultFS":"hdfs://Linux121:9000",
"fileType":"text",
"path":"/user/hive/warehouse/trade.db/payments/dt=$do_date",
"fileName":"payments_$do_date.dat",
"column":[
{
"name":"id",
"type":"INT"
},
{
"name":"payMethod",
"type":"STRING"
},
{
"name":"payName",
"type":"STRING"
},
{
"name":"description",
"type":"STRING"
},
{
"name":"payOrder",
"type":"INT"
},
{
"name":"online",
"type":"TINYINT"
}
],
"writeMode":"append",
"fieldDelimiter":","
}
}
}
]
}
}
1
2
3
4
5
6
7
[root@Linux123 ~]# do_date='2020-07-01'

# 创建目录
[root@Linux123 ~]# hdfs dfs -mkdir -p /user/hive/warehouse/trade.db/payments/dt=$do_date

# 数据迁移
[root@Linux123 ~]# python $DATAX_HOME/bin/datax.py -p "-Ddo_date=$do_date" /data/script/datax_json/payments.json

增量数据导入

初始数据装载(执行一次);可以将前面的全量加载作为初次装载。

每日加载增量数据(每日数据形成分区)。

交易订单表

创建/data/script/datax_json/orders.json

备注:条件的选择,选择时间字段 modifiedTime

1
2
3
cd /data/script/datax_json

vim orders.json

orders.json文件内容:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
{
"job":{
"setting":{
"speed":{
"channel":1
},
"errorLimit":{
"record":0
}
},
"content":[
{
"reader":{
"name":"mysqlreader",
"parameter":{
"username":"root",
"password":"12345678",
"connection":[
{
"querySql":[
"select orderId, orderNo, userId, status, productMoney, totalMoney, payMethod, isPay, areaId, tradeSrc, tradeType, isRefund, dataFlag, createTime, payTime, modifiedTime from lagou_trade_orders where date_format(modifiedTime, '%Y-%m-%d')='$do_date'"
],
"jdbcUrl":[
"jdbc:mysql://Linux123:3306/ebiz"
]
}
]
}
},
"writer":{
"name":"hdfswriter",
"parameter":{
"defaultFS":"hdfs://Linux121:9000",
"fileType":"text",
"path":"/user/hive/warehouse/trade.db/orders/dt=$do_date",
"fileName":"orders_$do_date",
"column":[
{
"name":"orderId",
"type":"INT"
},
{
"name":"orderNo",
"type":"STRING"
},
{
"name":"userId",
"type":"BIGINT"
},
{
"name":"status",
"type":"TINYINT"
},
{
"name":"productMoney",
"type":"Float"
},
{
"name":"totalMoney",
"type":"Float"
},
{
"name":"payMethod",
"type":"TINYINT"
},
{
"name":"isPay",
"type":"TINYINT"
},
{
"name":"areaId",
"type":"INT"
},
{
"name":"tradeSrc",
"type":"TINYINT"
},
{
"name":"tradeType",
"type":"INT"
},
{
"name":"isRefund",
"type":"TINYINT"
},
{
"name":"dataFlag",
"type":"TINYINT"
},
{
"name":"createTime",
"type":"STRING"
},
{
"name":"payTime",
"type":"STRING"
},
{
"name":"modifiedTime",
"type":"STRING"
}
],
"writeMode":"append",
"fieldDelimiter":","
}
}
}
]
}
}
1
2
3
4
5
6
7
[root@Linux123 ~]# do_date='2020-07-12'

# 创建目录
[root@Linux123 ~]# hdfs dfs -mkdir -p /user/hive/warehouse/trade.db/orders/dt=$do_date

# 数据迁移
[root@Linux123 ~]# python $DATAX_HOME/bin/datax.py -p "-Ddo_date=$do_date" /data/script/datax_json/orders.json

订单产品表

创建/data/script/datax_json/order_product.json

1
2
3
cd /data/script/datax_json

vim order_product.json

order_product.json文件内容:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
{
"job":{
"setting":{
"speed":{
"channel":1
},
"errorLimit":{
"record":0
}
},
"content":[
{
"reader":{
"name":"mysqlreader",
"parameter":{
"username":"root",
"password":"12345678",
"connection":[
{
"querySql":[
"select id, orderId, productId, productNum, productPrice, money, extra, createTime from lagou_order_product where date_format(createTime, '%Y-%m-%d') = '$do_date' "
],
"jdbcUrl":[
"jdbc:mysql://Linux123:3306/ebiz"
]
}
]
}
},
"writer":{
"name":"hdfswriter",
"parameter":{
"defaultFS":"hdfs://Linux121:9000",
"fileType":"text",
"path":"/user/hive/warehouse/trade.db/order_product/dt=$do_date",
"fileName":"order_product_$do_date.dat",
"column":[
{
"name":"id",
"type":"INT"
},
{
"name":"orderId",
"type":"INT"
},
{
"name":"productId",
"type":"INT"
},
{
"name":"productNum",
"type":"INT"
},
{
"name":"productPrice",
"type":"Float"
},
{
"name":"money",
"type":"Float"
},
{
"name":"extra",
"type":"STRING"
},
{
"name":"createTime",
"type":"STRING"
}
],
"writeMode":"append",
"fieldDelimiter":","
}
}
}
]
}
}
1
2
3
4
5
6
7
[root@Linux123 ~]# do_date='2020-07-12'

# 创建目录
[root@Linux123 ~]# hdfs dfs -mkdir -p /user/hive/warehouse/trade.db/order_product/dt=$do_date

# 数据迁移
[root@Linux123 ~]# python $DATAX_HOME/bin/datax.py -p "-Ddo_date=$do_date" /data/script/datax_json/order_product.json

产品信息表

创建/data/script/datax_json/product_info.json

1
2
3
cd /data/script/datax_json

vim product_info.json

product_info.json文件内容:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
{
"job":{
"setting":{
"speed":{
"channel":1
},
"errorLimit":{
"record":0
}
},
"content":[
{
"reader":{
"name":"mysqlreader",
"parameter":{
"username":"root",
"password":"12345678",
"connection":[
{
"querySql":[
"select productid, productname, shopid, price, issale, status, categoryid, createtime, modifytime from lagou_product_info where date_format(modifyTime, '%Y-%m-%d') = '$do_date' "
],
"jdbcUrl":[
"jdbc:mysql://Linux123:3306/ebiz"
]
}
]
}
},
"writer":{
"name":"hdfswriter",
"parameter":{
"defaultFS":"hdfs://Linux121:9000",
"fileType":"text",
"path":"/user/hive/warehouse/trade.db/product_info/dt=$do_date",
"fileName":"product_info_$do_date.dat",
"column":[
{
"name":"productid",
"type":"BIGINT"
},
{
"name":"productname",
"type":"STRING"
},
{
"name":"shopid",
"type":"STRING"
},
{
"name":"price",
"type":"FLOAT"
},
{
"name":"issale",
"type":"TINYINT"
},
{
"name":"status",
"type":"TINYINT"
},
{
"name":"categoryid",
"type":"STRING"
},
{
"name":"createTime",
"type":"STRING"
},
{
"name":"modifytime",
"type":"STRING"
}
],
"writeMode":"append",
"fieldDelimiter":","
}
}
}
]
}
}
1
2
3
4
5
6
7
[root@Linux123 ~]# do_date='2020-07-12'

# 创建目录
[root@Linux123 ~]# hdfs dfs -mkdir -p /user/hive/warehouse/trade.db/product_info/dt=$do_date

# 数据迁移
[root@Linux123 ~]# python $DATAX_HOME/bin/datax.py -p "-Ddo_date=$do_date" /data/script/datax_json/product_info.json