数据导出

ads.ads_trade_order_analysis 分区表,使用DataX导出到MySQL

  • 在MySQL创建对应的表

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    drop table if exists dwads.ads_trade_order_analysis;
    create table dwads.ads_trade_order_analysis(
    areatype varchar(100), -- 区域范围:区域类型(全国、大 区、城市)
    regionname varchar(100), -- 区域名称
    cityname varchar(100), -- 城市名称
    categorytype varchar(100), -- 商品分类类型(一级、二级)
    category1 varchar(100), -- 商品一级分类名称
    category2 varchar(100), -- 商品二级分类名称
    totalcount BIG INT, -- 订单数量
    total_productnum BIG INT, -- 商品数量
    totalmoney double,
    dt varchar(10)
    );
  • 创建配置文件(json)

    创建/data/script/trade/ads_trade_order_analysis.json

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    {
    "job":{
    "setting":{
    "speed":{
    "channel":1
    }
    },
    "content":[
    {
    "reader":{
    "name":"hdfsreader",
    "parameter":{
    "path":"/user/hive/warehouse/ads.db/ads_trade_order_analysis/dt=$do _date/*",
    "defaultFS":"hdfs://192.168.91.121:9000",
    "column":[
    {
    "index":0,
    "type":"string"
    },
    {
    "index":1,
    "type":"string"
    },
    {
    "index":2,
    "type":"string"
    },
    {
    "index":3,
    "type":"string"
    },
    {
    "index":4,
    "type":"string"
    },
    {
    "index":5,
    "type":"string"
    },
    {
    "index":6,
    "type":"string"
    },
    {
    "index":7,
    "type":"string"
    },
    {
    "index":8,
    "type":"string"
    },
    {
    "type":"string",
    "value":"$do_date"
    }
    ],
    "fileType":"text",
    "encoding":"UTF-8",
    "fieldDelimiter":","
    }
    },
    "writer":{
    "name":"mysqlwriter",
    "parameter":{
    "writeMode":"insert",
    "username":"hive",
    "password":"12345678",
    "column":[
    "areatype",
    "regionname",
    "cityname",
    "categorytype",
    "category1",
    "category2",
    "totalcount",
    "total_productnum",
    totalmoney",
    "dt"
    ],
    "preSql":[
    "delete from ads_trade_order_analysis where dt='$do_date'"
    ],
    "connection":[
    {
    "jdbcUrl":"jdbc:mysql://192.168.91.123:3306/dwads?useUnicode=true&characterEncoding=utf-8",
    "table":[
    "ads_trade_order_analysis"
    ]
    }
    ]
    }
    }
    }
    ]
    }
    }
  • 执行命令,使用json配置文件;测试

    1
    python /data/modules/datax/bin/datax.py -p "-Ddo_date=2020-07-12" /data/script/trade/ads_trade_order_analysis.json
  • 编写执行脚本(shell)

    创建/data/script/trade/ads_trade_order_analysis.sh

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    #!/bin/bash
    source /etc/profile
    JSON=/data/script

    if [ -n "$1" ]
    then
    do_date=$1
    else
    do_date=`date -d "-1 day" +%F`
    fi

    python $DATAX_HOME/bin/datax.py -p "-Ddo_date=$do_date" $JSON/trade/ads_trade_order_analysis.json
  • shell脚本的测试

    1
    sh /data/script/trade/ads_trade_order_analysis.sh 2020-07-12

小结


脚本调用次序:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# 加载ODS数据(含DataX迁移数据)
/data/script/trade/ods_load_trade.sh

# 加载DIM层数据
/data/script/trade/dim_load_product_cat.sh /data/lagoudw/script/trade/dim_load_shop_org.sh
/data/script/trade/dim_load_payment.sh /data/lagoudw/script/trade/dim_load_product_info.sh

# 加载DWD层数据
/data/script/trade/dwd_load_trade_orders.sh

# 加载DWS层数据
/data/script/trade/dws_load_trade_orders.sh

# 加载ADS层数据
/data/script/trade/ads_load_trade_order_analysis.sh

主要技术点:

  • 拉链表。创建、使用与回滚;商品信息表、订单表(周期性事实表;分区表+拉链表)

  • 宽表(逆规范化):商品分类表、商品地域组织表、订单明细及订单明细宽表(轻度汇总的事实表)