1、连续7天登录的用户

建表和加载数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
-- 数据。uid dt status(1 正常登录,0 异常)
1 2019-07-11 1 1 2019-07-12 1
1 2019-07-13 1 1 2019-07-14 1
1 2019-07-15 1 1 2019-07-16 1
1 2019-07-17 1 1 2019-07-18 1
2 2019-07-11 1 2 2019-07-12 1
2 2019-07-13 0 2 2019-07-14 1
2 2019-07-15 1 2 2019-07-16 0
2 2019-07-17 1 2 2019-07-18 0
3 2019-07-11 1 3 2019-07-12 1
3 2019-07-13 1 3 2019-07-14 0
3 2019-07-15 1 3 2019-07-16 1
3 2019-07-17 1 3 2019-07-18 1

-- 建表语句
create table ulogin(
uid int,
dt date,
status int
)
row format delimited fields terminated by ' ';

-- 加载数据
load data local inpath '/home/hadoop/data/ulogin.dat' into table ulogin;

实例语句

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
-- 1、使用 row_number 在组内给数据编号(rownum)
select uid, dt,
row_number() over (partition by uid order by dt) rownum
from ulogin
where status=1;

-- 2、某个值 - rownum = gid,得到结果可以作为后面分组计算的依据
select uid, dt,
date_sub(dt,
row_number() over (partition by uid order by dt)
) gid
from ulogin
where status=1;

-- 3、根据求得的gid,作为分组条件,求最终结果
select uid,
count(*) logincount
from (
select uid, dt,
date_sub(dt,
row_number() over (partition by uid order by dt)
) gid
from ulogin where status=1
) t1
group by uid, gid
having logincount>=7;

2、编写sql语句实现每班前三名,分数一样并列,同时求出前三名按名次排序的分差

建表和加载数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
-- 数据。sid class score
1 1901 90
2 1901 90
3 1901 83
4 1901 60
5 1902 66
6 1902 23
7 1902 99
8 1902 67
9 1902 87

-- 待求结果数据如下: class score rank lagscore
1901 90 1 0
1901 90 1 0
1901 83 2 -7
1901 60 3 -23
1902 99 1 0
1902 87 2 -12
1902 67 3 -20

-- 建表语句
create table stu(
sno int,
class string,
score int
)
row format delimited fields terminated by ' ';

-- 加载数据
load data local inpath '/home/hadoop/data/stu.dat' into table stu;

实例语句

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
-- 1、上排名函数,分数一样并列,所以用dense_rank
select sno, class, score,
dense_rank() over (partition by class order by score desc) as rank
from stu;

-- 2、将上一行数据下移,相减即得到分数差
with tmp as (
select sno, class, score,
dense_rank() over (partition by class order by score desc) as rank from stu
)
select class, score, rank,
score - lag(score) over (partition by class order by score desc) lagscore
from tmp
where rank<=3;

-- 3、处理 NULL
with tmp as (
select sno, class, score,
dense_rank() over (partition by class order by score desc) as rank from stu
)
select class, score, rank,
nvl(score - lag(score) over (partition by class order by score desc), 0) lagscore
from tmp
where rank<=3;

3、行 <=> 列

实例一

建表和加载数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
-- 数据:id course
1 java
1 hadoop
1 hive
1 hbase
2 java
2 hive
2 spark
2 flink
3 java
3 hadoop
3 hive
3 kafka

-- 编写sql,得到结果如下(1表示选修,0表示未选修)
id java hadoop hive hbase spark flink kafka
1 1 1 1 1 0 0 0
2 1 0 1 0 1 1 0
3 1 1 1 0 0 0 1

-- 建表加载数据
create table rowline1(
id string,
course string
)
row format delimited fields terminated by ' ';
load data local inpath '/root/data/data1.dat' into table rowline1;

实例语句

1
2
3
4
5
6
7
8
9
10
11
-- 使用case when;group by + sum
select id,
sum(case when course="java" then 1 else 0 end) as java,
sum(case when course="hadoop" then 1 else 0 end) as hadoop,
sum(case when course="hive" then 1 else 0 end) as hive,
sum(case when course="hbase" then 1 else 0 end) as hbase,
sum(case when course="spark" then 1 else 0 end) as spark,
sum(case when course="flink" then 1 else 0 end) as flink,
sum(case when course="kafka" then 1 else 0 end) as kafka
from rowline1
group by id;

实例二

建表和加载数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
-- 数据。id1 id2 flag
a b 2
a b 1
a b 3
c d 6
c d 8
c d 8

-- 编写sql实现如下结果 id1 id2 flag
a b 2|1|3
c d 6|8

-- 创建表 & 加载数据
create table rowline2(
id1 string,
id2 string,
flag int
)
row format delimited fields terminated by ' ';
load data local inpath '/root/data/data2.dat' into table rowline2;

实例语句

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
-- 第一步 将元素聚拢
select id1, id2,
collect_set(flag) flag
from rowline2
group by id1, id2;

select id1, id2,
collect_list(flag) flag
from rowline2
group by id1, id2;

select id1, id2,
sort_array(collect_set(flag)) flag
from rowline2
group by id1, id2;

-- 第二步 将元素连接在一起
select id1, id2,
concat_ws("|",
collect_set(flag)
) flag
from rowline2
group by id1, id2;

-- 这里报错,CONCAT_WS must be "string or array<string>"。加一个 类型转换即可
select id1, id2,
concat_ws("|",
collect_set(cast (flag as string))
) flag
from rowline2
group by id1, id2;
  • 还原数据
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
-- 创建表 rowline3
create table rowline3 as
select id1, id2,
concat_ws("|",
collect_set(cast (flag as string))
) flag
from rowline2
group by id1, id2;

-- 第一步:将复杂的数据展开
select explode(split(flag, "\\|")) flat from rowline3;

-- 第二步:lateral view 后与其他字段关联
select id1, id2, newflag
from rowline3
lateral view explode(split(flag, "\\|")) t1 as newflag;