查看系统函数

1
2
3
4
5
6
7
8
-- 查看系统自带函数
show functions;

-- 显示函数的用法信息
desc function upper;

-- 显示函数的用法信息并且有样例
desc function extended upper;

日期函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
-- 当前前日期
select current_date;
OK
_c0
2022-06-20

select unix_timestamp();
OK
_c0
1655712764

-- 建议使用current_timestamp,有没有括号都可以
select current_timestamp();
OK
_c0
2022-06-20 09:13:14.557

-- 时间戳转日期
select from_unixtime(1505456567);
OK
_c0
2017-09-15 07:22:47

select from_unixtime(1505456567, 'yyyyMMdd');
OK
_c0
20170915

select from_unixtime(1505456567, 'yyyy-MM-dd HH:mm:ss');
OK
_c0
2017-09-15 07:22:47

-- 日期转时间戳
select unix_timestamp('2019-09-15 14:23:00');
OK
_c0
1568553780

-- 计算时间差
select datediff('2020-04-18','2019-11-21');
OK
_c0
149

select datediff('2019-11-21', '2020-04-18');
OK
_c0
-149

-- 查询当月第几天
select dayofmonth(current_date);
OK
_c0
20

-- 计算月末:
select last_day(current_date);
OK
_c0
2022-06-30

-- 当月第1天:
select date_sub(current_date, dayofmonth(current_date)-1);
OK
_c0
2022-06-01

-- 下个月第1天:
select add_months(date_sub(current_date, dayofmonth(current_date)-1), 1)
OK
_c0
2022-07-01

-- 字符串转时间(字符串必须为:yyyy-MM-dd格式)
select to_date('2020-01-01');
OK
_c0
2020-01-01

select to_date('2020-01-01 12:12:12');
OK
_c0
2020-01-01

-- 日期、时间戳、字符串类型格式化输出标准时间格式
select date_format(current_timestamp(), 'yyyy-MM-dd HH:mm:ss');
OK
_c0
2022-06-20 09:30:17

select date_format(current_date(), 'yyyyMMdd');
OK
_c0
20220620

select date_format('2020-06-01', 'yyyy-MM-dd HH:mm:ss');
OK
_c0
2020-06-01 00:00:00

-- 计算emp表中,每个人的工龄
select *, round(datediff(current_date, hiredate)/365,1) workingyears
from emp;
OK
emp.empno emp.ename emp.job emp.mgr emp.hiredate emp.sal emp.comm emp.deptno workingyears
NULL CLERK 7902 NULL NULL NULL 20 NULL NULL
7499 ALLEN SALESMAN 7698 2011-02-20 1600 300 30 11.3
7521 WARD SALESMAN 7698 2011-02-22 1250 500 30 11.3
7566 JONES MANAGER 7839 2011-04-02 2975 NULL 20 11.2
7654 MARTIN SALESMAN 7698 2011-09-28 1250 1400 30 10.7
7698 BLAKE MANAGER 7839 2011-05-01 2850 NULL 30 11.1
7782 CLARK MANAGER 7839 2011-06-09 2450 NULL 10 11.0
7788 SCOTT ANALYST 7566 2017-07-13 3000 NULL 20 4.9
7839 KING PRESIDENT NULL 2011-11-07 5000 NULL 10 10.6
7844 TURNER SALESMAN 7698 2011-09-08 1500 0 30 10.8
7876 ADAMS CLERK 7788 2017-07-13 1100 NULL 20 4.9
7900 JAMES CLERK 7698 2011-12-03 950 NULL 30 10.6
7902 FORD ANALYST 7566 2011-12-03 3000 NULL 20 10.6
7934 MILLER CLERK 7782 2012-01-23 1300 NULL 10 10.4

字符串函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
-- 转小写。lower
select lower("HELLO WORLD");
OK
_c0
hello world

-- 转大写。upper
select lower(ename), ename from emp;
OK
_c0 ename
clerk CLERK
allen ALLEN
ward WARD
jones JONES
martin MARTIN
blake BLAKE
clark CLARK
scott SCOTT
king KING
turner TURNER
adams ADAMS
james JAMES
ford FORD
miller MILLER

-- 求字符串长度。length
select length(ename), ename from emp;
OK
_c0 ename
5 CLERK
5 ALLEN
4 WARD
5 JONES
6 MARTIN
5 BLAKE
5 CLARK
5 SCOTT
4 KING
6 TURNER
5 ADAMS
5 JAMES
4 FORD
6 MILLER

-- 字符串拼接。 concat / ||
select empno || " " ||ename idname from emp;
OK
idname
NULL
7499 ALLEN
7521 WARD
7566 JONES
7654 MARTIN
7698 BLAKE
7782 CLARK
7788 SCOTT
7839 KING
7844 TURNER
7876 ADAMS
7900 JAMES
7902 FORD
7934 MILLER

select concat(empno, " " ,ename) idname from emp;
OK
idname
NULL
7499 ALLEN
7521 WARD
7566 JONES
7654 MARTIN
7698 BLAKE
7782 CLARK
7788 SCOTT
7839 KING
7844 TURNER
7876 ADAMS
7900 JAMES
7902 FORD
7934 MILLER

-- 指定分隔符。concat_ws(separator, [string | array(string)]+)
SELECT concat_ws('.', 'www', array('lagou', 'com'));
OK
_c0
www.lagou.com

select concat_ws(" ", ename, job) from emp;
OK
_c0
CLERK 7902
ALLEN SALESMAN
WARD SALESMAN
JONES MANAGER
MARTIN SALESMAN
BLAKE MANAGER
CLARK MANAGER
SCOTT ANALYST
KING PRESIDENT
TURNER SALESMAN
ADAMS CLERK
JAMES CLERK
FORD ANALYST
MILLER CLERK

-- 求子串。substr
SELECT substr('www.lagou.com', 5);
OK
_c0
lagou.com

SELECT substr('www.lagou.com', -5);
OK
_c0
u.com

SELECT substr('www.lagou.com', 5, 5);
OK
_c0
lagou

-- 字符串切分。split,注意 '.' 要转义
select split("www.lagou.com", "\\.");
OK
_c0
["www","lagou","com"]

数学函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
-- 四舍五入。round
select round(314.15926);
OK
_c0
314

select round(314.15926, 2);
OK
_c0
314.16

select round(314.15926, -2);
OK
_c0
300

-- 向上取整。ceil
select ceil(3.1415926);

-- 向下取整。floor
select floor(3.1415926);
OK
_c0
4

-- 其他数学函数包括:绝对值、平方、开方、对数运算、三角运算等
select abs(-19), power(10,4), sqrt(100), log2(4), log10(100);
OK
_c0 _c1 _c2 _c3 _c4
19 10000.0 10.0 2.0 2.0

条件函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
-- 将emp表的员工工资等级分类:0-1500、1500-3000、3000以上
-- if (boolean testCondition, T valueTrue, T valueFalseOrNull)
select sal,
if (sal<1500, 1, if (sal < 3000, 2, 3))
from emp;
OK
sal _c1
NULL 3
1600 2
1250 1
2975 2
1250 1
2850 2
2450 2
3000 3
5000 3
1500 2
1100 1
950 1
3000 3
1300 1

-- CASE WHEN a THEN b [WHEN c THEN d]* [ELSE e] END
-- 复杂条件用 case when 更直观
select sal,
case when sal<=1500 then 1
when sal<=3000 then 2
else 3
end sallevel
from emp;
OK
sal sallevel
NULL 3
1600 2
1250 1
2975 2
1250 1
2850 2
2450 2
3000 2
5000 3
1500 1
1100 1
950 1
3000 2
1300 1

-- 以下语句等价
select ename, deptno,
case deptno when 10 then 'accounting'
when 20 then 'research'
when 30 then 'sales'
else 'unknown'
end deptname
from emp;

select ename, deptno,
case when deptno=10 then 'accounting'
when deptno=20 then 'research'
when deptno=30 then 'sales'
else 'unknown'
end deptname
from emp;

-- COALESCE(T v1, T v2, ...)。返回参数中的第一个非空值;如果所有值都为 NULL,那么返回NULL
select sal, coalesce(comm, 0) from emp;
OK
sal _c1
NULL 20
1600 300
1250 500
2975 0
1250 1400
2850 0
2450 0
3000 0
5000 0
1500 0
1100 0
950 0
3000 0
1300 0

-- isnull(a) isnotnull(a)
select * from emp where isnull(comm);
OK
emp.empno emp.ename emp.job emp.mgr emp.hiredate emp.sal emp.comm emp.deptno
7566 JONES MANAGER 7839 2011-04-02 2975 NULL 20
7698 BLAKE MANAGER 7839 2011-05-01 2850 NULL 30
7782 CLARK MANAGER 7839 2011-06-09 2450 NULL 10
7788 SCOTT ANALYST 7566 2017-07-13 3000 NULL 20
7839 KING PRESIDENT NULL 2011-11-07 5000 NULL 10
7876 ADAMS CLERK 7788 2017-07-13 1100 NULL 20
7900 JAMES CLERK 7698 2011-12-03 950 NULL 30
7902 FORD ANALYST 7566 2011-12-03 3000 NULL 20
7934 MILLER CLERK 7782 2012-01-23 1300 NULL 10

select * from emp where isnotnull(comm);
OK
emp.empno emp.ename emp.job emp.mgr emp.hiredate emp.sal emp.comm emp.deptno
NULL CLERK 7902 NULL NULL NULL 20 NULL
7499 ALLEN SALESMAN 7698 2011-02-20 1600 300 30
7521 WARD SALESMAN 7698 2011-02-22 1250 500 30
7654 MARTIN SALESMAN 7698 2011-09-28 1250 1400 30
7844 TURNER SALESMAN 7698 2011-09-08 1500 0 30


-- nvl(T value, T default_value)
select empno, ename, job, mgr, hiredate, deptno, sal + nvl(comm,0) sumsal from emp;
OK
empno ename job mgr hiredate deptno sumsal
NULL CLERK 7902 NULL NULL NULL NULL
7499 ALLEN SALESMAN 7698 2011-02-20 30 1900
7521 WARD SALESMAN 7698 2011-02-22 30 1750
7566 JONES MANAGER 7839 2011-04-02 20 2975
7654 MARTIN SALESMAN 7698 2011-09-28 30 2650
7698 BLAKE MANAGER 7839 2011-05-01 30 2850
7782 CLARK MANAGER 7839 2011-06-09 10 2450
7788 SCOTT ANALYST 7566 2017-07-13 20 3000
7839 KING PRESIDENT NULL 2011-11-07 10 5000
7844 TURNER SALESMAN 7698 2011-09-08 30 1500
7876 ADAMS CLERK 7788 2017-07-13 20 1100
7900 JAMES CLERK 7698 2011-12-03 30 950
7902 FORD ANALYST 7566 2011-12-03 20 3000
7934 MILLER CLERK 7782 2012-01-23 10 1300

-- nullif(x, y) 相等为空,否则为x
SELECT nullif("b", "b"), nullif("b", "a");
OK
_c0 _c1
NULL b

UDTF函数

UDTF : User Defined Table-Generating Functions。用户定义表生成函数,一行输入,多行输出。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
-- explode,炸裂函数
-- 就是将一行中复杂的 array 或者 map 结构拆分成多行
select explode(array('A','B','C')) as col;
OK
col
A
B
C

select explode(map('a', 8, 'b', 88, 'c', 888));
OK
key value
a 8
b 88
c 888

-- UDTF's are not supported outside the SELECT clause, nor nested in expressions
-- SELECT pageid, explode(adid_list) AS myCol... is not supported
-- SELECT explode(explode(adid_list)) AS myCol... is not supported

-- lateral view 常与 表生成函数explode结合使用
-- lateral view 语法:
LATERAL VIEW udtf(expression) tableAlias AS columnAlias (',' columnAlias)* fromClause: FROM baseTable (lateralView)*

-- lateral view 的基本使用
with t1 as (
select 'OK' cola, split('www.lagou.com', '\\.') colb
)
select cola, colc from t1 lateral view explode(colb) t2 as colc;
OK
cola colc
OK www
OK lagou
OK com

UDTF 案例1

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
-- 数据(id tags):
1 1,2,3
2 2,3
3 1,2

--编写sql,实现如下结果:
1 1
1 2
1 3
2 2
2 3
3 1
3 2

-- 建表加载数据
create table tab1(
id int,
tags string
)
row format delimited fields terminated by '\t';
load data local inpath '/home/hadoop/data/tab1.dat' into table tab1;

-- SQL
select id, split(tags, ',') from tab1;
select id, tag from tab1 lateral view explode(split(tags, ",")) t1 as tag;

UDTF 案例2

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
-- 数据准备
lisi|Chinese:90,Math:80,English:70
wangwu|Chinese:88,Math:90,English:96
maliu|Chinese:99,Math:65,English:60

-- 创建表
create table studscore(
name string ,
score map<String,string>
)
row format delimited fields terminated by '|'
collection items terminated by ','
map keys terminated by ':';

-- 加载数据
load data local inpath '/home/hadoop/data/score.dat' overwrite into table studscore;

-- 需求:找到每个学员的最好成绩
-- 第一步,使用 explode 函数将map结构拆分为多行
select explode(score) as (subject, socre) from studscore;

-- 第二步:explode常与 lateral view 函数联用,这两个函数结合在一起能关 联其他字段
select name, subject, score1 as score
from studscore
lateral view explode(score) t1 as subject, score1;

-- 第三步:找到每个学员的最好成绩
select name, max(mark) maxscore
from (
select name, subject, mark
from studscore
lateral view explode(score) t1 as subject, mark
) t1
group by name;

with tmp as (
select name, subject, mark
from studscore
lateral view explode(score) t1 as subject, mark
)
select name, max(mark) maxscore
from tmp
group by name;

小结

将一行数据转换成多行数据,可以用于array和map类型的数据;

lateral view 与 explode 联用,解决 UDTF 不能添加额外列的问题