我们有如下的用户访问数据
userId visitDate visitCount
u01 2017/1/21 5
u02 2017/1/23 6
u03 2017/1/22 8
u04 2017/1/20 3
u01 2017/1/23 6
u01 2017/2/21 8
U02 2017/1/23 6
U01 2017/2/22 4
要求使用SQL统计出每个用户的累积访问次数,如下表所示:
用户id 月份 小计 累积
u01 2017-01 11 11
u01 2017-02 12 23
u02 2017-01 12 12
u03 2017-01 8 8
u04 2017-01 3 3
数据集
u01 2017/1/21 5
u02 2017/1/23 6
u03 2017/1/22 8
u04 2017/1/20 3
u01 2017/1/23 6
u01 2017/2/21 8
u02 2017/1/23 6
u01 2017/2/22 4
1)创建表
create table action
(userId string,
visitDate string,
visitCount int)
row format delimited fields terminated by "\t";
2)修改数据格式
select userId,
date_format(regexp_replace(visitDate,"/","-"),'yyyy-MM') month,
visitCount
from action;
3)计算每人单月访问量
with a as (
select userId,
date_format(regexp_replace(visitDate, "/", "-"), 'yyyy-MM') month,
visitCount
from action
) select userId,
month,
sum(visitCount) ct
from a group by userId, month;
4)按月累计访问量
with a as (
select userId,
date_format(regexp_replace(visitDate, "/", "-"), 'yyyy-MM') month,
visitCount
from action
),
b as (select userId,
month,
sum(visitCount) ct
from a
group by userId, month)
select userId,
month,
ct,
sum(ct) over(distribute by userId sort by month)
from b;
或者也可以是
select userId,
mn,
mn_count,
sum(mn_count) over (partition by userId order by mn)
from (select userId,
mn,
sum(visitCount) mn_count
from (select userId,
date_format(regexp_replace(visitDate, '/', '-'), 'yyyy-MM') mn,
visitCount
from action) t1
group by userId, mn) t2;
运行结果
+------+-------+--+--+
|userid|month |ct|c3|
+------+-------+--+--+
|u01 |2017-01|11|11|
|u01 |2017-02|12|23|
|u02 |2017-01|12|12|
|u03 |2017-01|8 |8 |
|u04 |2017-01|3 |3 |
+------+-------+--+--+
总结
要求使用SQL统计出每个用户的累积访问次数
1 首先求出每个用户每月的累计访问次数
2 同一用户不同月份的数据进行聚合,即(distribute by userId sort by month)
如果是求每个月不同用户的累计访问次数呢?如下
with a as (
select userId,
date_format(regexp_replace(visitDate, "/", "-"), 'yyyy-MM') month,
visitCount
from action
),
b as (select userId,
month,
sum(visitCount) ct
from a
group by userId, month)
select month,
userId,
ct,
sum(ct) over (distribute by month sort by userId)
from b;
运行结果如下
+-------+------+--+--+
|month |userid|ct|c3|
+-------+------+--+--+
|2017-01|u01 |11|11|
|2017-01|u02 |12|23|
|2017-01|u03 |8 |31|
|2017-01|u04 |3 |34|
|2017-02|u01 |12|12|
+-------+------+--+--+