SQL查询日期时间列表的累积频率
时间:2020-03-06 15:03:38 来源:igfitidea点击:
我在数据库列中有一个时间列表(代表对网站的访问)。
我需要按时间间隔对它们进行分组,然后获得这些日期的"累积频率"表。
例如,我可能有:
9:01 9:04 9:11 9:13 9:22 9:24 9:28
我想将其转换为
9:05 - 2 9:15 - 4 9:25 - 6 9:30 - 7
我怎样才能做到这一点?我什至可以在SQL中轻松实现这一目标吗?我可以很容易地用C#做到这一点
解决方案
创建一个表" periods",描述我们希望将一天划分成的时段。
SELECT periods.name, count(time) FROM periods, times WHERE period.start <= times.time AND times.time < period.end GROUP BY periods.name
我应该指出,根据问题的"意图",为了对访问者的流量进行分析,我编写了此语句以汇总统一组中的计数。
否则(如"示例"组中那样)将比较5分钟间隔内的计数与10分钟间隔内的计数,这没有意义。
我们必须遵循用户需求的"意图",而不是字面意义上的"阅读"。 :-)
create table #myDates
(
myDate datetime
);
go
insert into #myDates values ('10/02/2008 09:01:23');
insert into #myDates values ('10/02/2008 09:03:23');
insert into #myDates values ('10/02/2008 09:05:23');
insert into #myDates values ('10/02/2008 09:07:23');
insert into #myDates values ('10/02/2008 09:11:23');
insert into #myDates values ('10/02/2008 09:14:23');
insert into #myDates values ('10/02/2008 09:19:23');
insert into #myDates values ('10/02/2008 09:21:23');
insert into #myDates values ('10/02/2008 09:21:23');
insert into #myDates values ('10/02/2008 09:21:23');
insert into #myDates values ('10/02/2008 09:21:23');
insert into #myDates values ('10/02/2008 09:21:23');
insert into #myDates values ('10/02/2008 09:26:23');
insert into #myDates values ('10/02/2008 09:27:23');
insert into #myDates values ('10/02/2008 09:29:23');
go
declare @interval int;
set @interval = 10;
select
convert(varchar(5), dateadd(minute,@interval - datepart(minute, myDate) % @interval, myDate), 108) timeGroup,
count(*)
from
#myDates
group by
convert(varchar(5), dateadd(minute,@interval - datepart(minute, myDate) % @interval, myDate), 108)
retuns:
timeGroup
--------- -----------
09:10 4
09:20 3
09:30 8
这使用了很多SQL技巧(SQL Server 2005):
CREATE TABLE [dbo].[stackoverflow_165571](
[visit] [datetime] NOT NULL
) ON [PRIMARY]
GO
;WITH buckets AS (
SELECT dateadd(mi, (1 + datediff(mi, 0, visit - 1 - dateadd(dd, 0, datediff(dd, 0, visit))) / 5) * 5, 0) AS visit_bucket
,COUNT(*) AS visit_count
FROM stackoverflow_165571
GROUP BY dateadd(mi, (1 + datediff(mi, 0, visit - 1 - dateadd(dd, 0, datediff(dd, 0, visit))) / 5) * 5, 0)
)
SELECT LEFT(CONVERT(varchar, l.visit_bucket, 8), 5) + ' - ' + CONVERT(varchar, SUM(r.visit_count))
FROM buckets l
LEFT JOIN buckets r
ON r.visit_bucket <= l.visit_bucket
GROUP BY l.visit_bucket
ORDER BY l.visit_bucket
请注意,它将所有时间都放在同一天,并假定它们在日期时间列中。它不像示例所做的那样唯一的事情是从时间表示中去除前导零。
创建一个表,其中包含我们希望获得的总计间隔,然后将两个表连接在一起。
如:
time_entry.time_entry
-----------------------
2008-10-02 09:01:00.000
2008-10-02 09:04:00.000
2008-10-02 09:11:00.000
2008-10-02 09:13:00.000
2008-10-02 09:22:00.000
2008-10-02 09:24:00.000
2008-10-02 09:28:00.000
time_interval.time_end
-----------------------
2008-10-02 09:05:00.000
2008-10-02 09:15:00.000
2008-10-02 09:25:00.000
2008-10-02 09:30:00.000
SELECT
ti.time_end,
COUNT(*) AS 'interval_total'
FROM time_interval ti
INNER JOIN time_entry te
ON te.time_entry < ti.time_end
GROUP BY ti.time_end;
time_end interval_total
----------------------- -------------
2008-10-02 09:05:00.000 2
2008-10-02 09:15:00.000 4
2008-10-02 09:25:00.000 6
2008-10-02 09:30:00.000 7
如果不是想要累计总数,而是想要某个范围内的总数,那么我们可以在time_interval表中添加time_start列,并将查询更改为
SELECT
ti.time_end,
COUNT(*) AS 'interval_total'
FROM time_interval ti
INNER JOIN time_entry te
ON te.time_entry >= ti.time_start
AND te.time_entry < ti.time_end
GROUP BY ti.time_end;
create table accu_times (time_val datetime not null, constraint pk_accu_times primary key (time_val));
go
insert into accu_times values ('9:01');
insert into accu_times values ('9:05');
insert into accu_times values ('9:11');
insert into accu_times values ('9:13');
insert into accu_times values ('9:22');
insert into accu_times values ('9:24');
insert into accu_times values ('9:28');
go
select rounded_time,
(
select count(*)
from accu_times as at2
where at2.time_val <= rt.rounded_time
) as accu_count
from (
select distinct
dateadd(minute, round((datepart(minute, at.time_val) + 2)*2, -1)/2,
dateadd(hour, datepart(hour, at.time_val), 0)
) as rounded_time
from accu_times as at
) as rt
go
drop table accu_times
结果是:
rounded_time accu_count ----------------------- ----------- 1900-01-01 09:05:00.000 2 1900-01-01 09:15:00.000 4 1900-01-01 09:25:00.000 6 1900-01-01 09:30:00.000 7
哦,所有这些东西太复杂了。
归一化为秒,除以存储桶间隔,截断并重新乘以:
select sec_to_time(floor(time_to_sec(d)/300)*300), count(*) from d group by sec_to_time(floor(time_to_sec(d)/300)*300)
使用罗恩·萨维奇(Ron Savage)的数据,我得到
+----------+----------+ | i | count(*) | +----------+----------+ | 09:00:00 | 1 | | 09:05:00 | 3 | | 09:10:00 | 1 | | 09:15:00 | 1 | | 09:20:00 | 6 | | 09:25:00 | 2 | | 09:30:00 | 1 | +----------+----------+
我们可能希望使用ceil()或者round()代替floor()。
更新:使用创建的表
create table d (
d datetime
);

