ODS建表:

ODS层的表结构与源数据基本类似(列名及数据类型);

ODS层的表名遵循统一的规范;

ODS层建表

所有的表都是分区表;字段之间的分隔符为 , ;为表的数据数据文件指定了位置;

交易订单表

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
DROP TABLE IF EXISTS `ods.ods_trade_orders`;
CREATE EXTERNAL TABLE `ods.ods_trade_orders`(
`orderid` int,
`orderno` string,
`userid` bigint,
`status` tinyint,
`productmoney` decimal(10, 0),
`totalmoney` decimal(10, 0),
`paymethod` tinyint,
`ispay` tinyint,
`areaid` int,
`tradesrc` tinyint,
`tradetype` int,
`isrefund` tinyint,
`dataflag` tinyint,
`createtime` string,
`paytime` string,
`modifiedtime` string
)
COMMENT '交易订单表'
PARTITIONED BY (`dt` string)
row format delimited fields terminated by ','
location '/user/hive/warehouse/trade.db/orders/';

订单产品表

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
DROP TABLE IF EXISTS `ods.ods_trade_order_product`;
CREATE EXTERNAL TABLE `ods.ods_trade_order_product`(
`id` string,
`orderid` decimal(10,2),
`productid` string,
`productnum` string,
`productprice` string,
`money` string,
`extra` string,
`createtime` string
)
COMMENT '订单产品表'
PARTITIONED BY (`dt` string)
row format delimited fields terminated by ','
location '/user/hive/warehouse/trade.db/order_product/';

产品信息表

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
DROP TABLE IF EXISTS `ods.ods_trade_product_info`;
CREATE EXTERNAL TABLE `ods.ods_trade_product_info`(
`productid` bigint,
`productname` string,
`shopid` string,
`price` decimal(10,0),
`issale` tinyint,
`status` tinyint,
`categoryid` string,
`createtime` string,
`modifytime` string
)
COMMENT '产品信息表'
PARTITIONED BY (`dt` string)
row format delimited fields terminated by ','
location '/user/hive/warehouse/trade.db/product_info/';

产品分类表

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
DROP TABLE IF EXISTS `ods.ods_trade_product_category`;
CREATE EXTERNAL TABLE `ods.ods_trade_product_category`(
`catid` int,
`parentid` int,
`catname` string,
`isshow` tinyint,
`sortnum` int,
`isdel` tinyint,
`createtime` string,
`level` tinyint
)
COMMENT '产品分类表'
PARTITIONED BY (`dt` string)
row format delimited fields terminated by ','
location '/user/hive/warehouse/trade.db/product_category';

商家店铺表

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
DROP TABLE IF EXISTS `ods.ods_trade_shops`;
CREATE EXTERNAL TABLE `ods.ods_trade_shops`(
`shopid` int,
`userid` int,
`areaid` int,
`shopname` string,
`shoplevel` tinyint,
`status` tinyint,
`createtime` string,
`modifytime` string
)
COMMENT '商家店铺表'
PARTITIONED BY (`dt` string)
row format delimited fields terminated by ','
location '/user/hive/warehouse/trade.db/shops';

商家地域组织表

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
DROP TABLE IF EXISTS `ods.ods_trade_shop_admin_org`;
CREATE EXTERNAL TABLE `ods.ods_trade_shop_admin_org`(
`id` int,
`parentid` int,
`orgname` string,
`orglevel` tinyint,
`isdelete` tinyint,
`createtime` string,
`updatetime` string,
`isshow` tinyint,
`orgType` tinyint
)
COMMENT '商家地域组织表'
PARTITIONED BY (`dt` string)
row format delimited fields terminated by ','
location '/user/hive/warehouse/trade.db/shop_org/';

支付方式表

1
2
3
4
5
6
7
8
9
10
11
12
13
DROP TABLE IF EXISTS `ods.ods_trade_payments`;
CREATE EXTERNAL TABLE `ods.ods_trade_payments`(
`id` string,
`paymethod` string,
`payname` string,
`description` string,
`payorder` int,
`online` tinyint
)
COMMENT '支付方式表'
PARTITIONED BY (`dt` string)
row format delimited fields terminated by ','
location '/user/hive/warehouse/trade.db/payments/';

ODS层数据加载

DataX仅仅是将数据导入到了 HDFS ,数据并没有与Hive表建立关联。

脚本的任务:数据迁移、数据加载到ODS层。

对于增量加载数据而言:初始数据加载;该任务仅执行一次,不在脚本中。

创建/data/script/trade/ods_load_trade.sh。ods层数据加载。

1
2
3
cd /data/script/trade

vim ods_load_trade.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/bin/bash
source /etc/profile

if [ -n "$1" ] ;then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi

# 创建目录
hdfs dfs -mkdir -p /user/hive/warehouse/trade.db/product_category/dt=$do_date
hdfs dfs -mkdir -p /user/hive/warehouse/trade.db/shops/dt=$do_date
hdfs dfs -mkdir -p /user/hive/warehouse/trade.db/shop_org/dt=$do_date
hdfs dfs -mkdir -p /user/hive/warehouse/trade.db/payments/dt=$do_date
hdfs dfs -mkdir -p /user/hive/warehouse/trade.db/orders/dt=$do_date
hdfs dfs -mkdir -p /user/hive/warehouse/trade.db/order_product/dt=$do_date
hdfs dfs -mkdir -p /user/hive/warehouse/trade.db/product_info/dt=$do_date

# 数据迁移
python $DATAX_HOME/bin/datax.py -p "-Ddo_date=$do_date" /data/script/datax_json/product_category.json
python $DATAX_HOME/bin/datax.py -p "-Ddo_date=$do_date" /data/script/datax_json/shops.json
python $DATAX_HOME/bin/datax.py -p "-Ddo_date=$do_date" /data/script/datax_json/shop_org.json
python $DATAX_HOME/bin/datax.py -p "-Ddo_date=$do_date" /data/script/datax_json/payments.json
python $DATAX_HOME/bin/datax.py -p "-Ddo_date=$do_date" /data/script/datax_json/orders.json
python $DATAX_HOME/bin/datax.py -p "-Ddo_date=$do_date" /data/script/datax_json/order_product.json
python $DATAX_HOME/bin/datax.py -p "-Ddo_date=$do_date" /data/script/datax_json/product_info.json

# 加载 ODS 层数据
sql="
alter table ods.ods_trade_orders add partition(dt='$do_date');
alter table ods.ods_trade_order_product add partition(dt='$do_date');
alter table ods.ods_trade_product_info add partition(dt='$do_date');
alter table ods.ods_trade_product_category add partition(dt='$do_date');
alter table ods.ods_trade_shops add partition(dt='$do_date');
alter table ods.ods_trade_shop_admin_org add partition(dt='$do_date');
alter table ods.ods_trade_payments add partition(dt='$do_date');
"

hive -e "$sql"

创建/data/script/trade/ods_delete_trade.sh。ods层数据删除。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
#!/bin/bash
source /etc/profile

if [ -n "$1" ] ;then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi

# 删除目录
hdfs dfs -rm -r /user/hive/warehouse/trade.db/product_category/dt=$do_date
hdfs dfs -rm -r /user/hive/warehouse/trade.db/shops/dt=$do_date
hdfs dfs -rm -r /user//hive/warehouse/trade.db/shop_org/dt=$do_date
hdfs dfs -rm -r /user//hive/warehouse/trade.db/payments/dt=$do_date
hdfs dfs -rm -r /user//hive/warehouse/trade.db/orders/dt=$do_date
hdfs dfs -rm -r /user//hive/warehouse/trade.db/order_product/dt=$do_date
hdfs dfs -rm -r /user//hive/warehouse/trade.db/product_info/dt=$do_date

# 加载 ODS 层数据
sql="
alter table ods.ods_trade_orders drop partition(dt='$do_date');
alter table ods.ods_trade_order_product drop partition(dt='$do_date');
alter table ods.ods_trade_product_info drop partition(dt='$do_date');
alter table ods.ods_trade_product_category drop partition(dt='$do_date');
alter table ods.ods_trade_shops drop partition(dt='$do_date');
alter table ods.ods_trade_shop_admin_org drop partition(dt='$do_date');
alter table ods.ods_trade_payments drop partition(dt='$do_date');
"

hive -e "$sql"

特点:工作量大,繁琐,容易出错;与数据采集工作在一起;