Aggregate at 10 minutes for last 24 hours in db - sql-server

The following query splits given time range data into 5 minutes but it does this thing from start of timerange provided
With MNE
AS
(
SELECT *,DATEDIFF(dd,0,t.datetime) AS dayoffset,
DATEDIFF(ss,MIN(t.datetime) OVER (PARTITION BY DATEDIFF(dd,0,t.datetime)),t.datetime)/60 AS MinOffset
FROM cw.datas t
WHERE t.DATETIME <= GETDATE()
AND t.DATETIME > DATEADD(SECOND, -DATEDIFF(SECOND, CAST(CAST(GETDATE() AS DATE) AS DATETIME),
GETDATE()) % (60 * 5), DATEADD(DAY, - 1,GETDATE()))
)
SELECT MIN(z.datetime) AS StartDatetime,
MAX(z.datetime) AS ENdDatetime,
sum(z.value) AS TotalSum
FROM MNE z
GROUP BY dayoffset,(MinOffset-1)/10
order by StartDatetime
With current time as 2020-09-14 12:28:43.793, The output comes in following way :
StartDatetime,ENdDatetime,TotalSum
2020-09-13 12:25:03.000, 2020-09-13 12:31:00.000, 63763.51
2020-09-13 12:31:03.000, 2020-09-13 12:36:00.000, 48348.34
2020-09-13 12:36:03.000, 2020-09-13 12:41:00.000, 54387.69
--
--
2020-09-14 12:16:00.000, 2020-09-14 12:20:57.000, 54353.72
2020-09-14 12:21:00.000, 2020-09-14 12:25:57.000, 53780.48
2020-09-14 12:26:00.000, 2020-09-14 12:28:42.000, 25328.56
I want aggregation of split from current timestamp i.e. 2020-09-14 12:28:43.793 back to last 24 hours.
Expected result :
StartDatetime,ENdDatetime,TotalSum
--
--
2020-09-14 11:58:43.000, 2020-09-14 12:08:43.000, 354654
2020-09-14 12:08:43.000, 2020-09-14 12:18:43.000, 354353
2020-09-14 12:18:43.000, 2020-09-14 12:28:43.000, 354665
Help appreciated.
Sample Data :
datas
datetime, abc, def, ghi, value
2020-09-11 12:22:36.000, AYSH, mains, SAE, 363.12
2020-09-11 12:22:39.000, AYSH, mains, SAE, 358.2
2020-09-11 12:22:42.000, AYSH, mains, SAE, 353.66
2020-09-11 12:22:45.000, AYSH, mains, SAE, 349.14
2020-09-11 12:22:48.000, AYSH, mains, SAE, 344.84
2020-09-11 12:22:51.000, AYSH, mains, SAE, 340.63
2020-09-11 12:22:54.000, AYSH, mains, SAE, 336.45

You did not provide table definitions for cw.datas so I cannot apply my solution to your query.
This recursive CTE generates the 144 intervals with duration of 10 minutes starting from now and going back 24 hours.
with stamps as
(
select getdate() as Stamp, 0 as Interval
union all
select dateadd(MI, -10, s.Stamp), s.Interval+1
from stamps s
where s.Interval < 144 -- 24 hours * 6 10-minute intervals = 144 10-minute intervals
)
select s.Stamp as FromDateTime,
dateadd(MI, 10, s.Stamp) as ToDateTime,
s.Interval
from stamps s
order by s.Stamp
option(maxrecursion 144);
Output looks like:
FromDateTime ToDateTime Interval
--------------------------- --------------------------- ---------
2020-09-13 14:00:54.740 2020-09-13 14:10:54.740 144
2020-09-13 14:10:54.740 2020-09-13 14:20:54.740 143
2020-09-13 14:20:54.740 2020-09-13 14:30:54.740 142
2020-09-13 14:30:54.740 2020-09-13 14:40:54.740 141
2020-09-13 14:40:54.740 2020-09-13 14:50:54.740 140
...
2020-09-14 13:10:54.740 2020-09-14 13:20:54.740 5
2020-09-14 13:20:54.740 2020-09-14 13:30:54.740 4
2020-09-14 13:30:54.740 2020-09-14 13:40:54.740 3
2020-09-14 13:40:54.740 2020-09-14 13:50:54.740 2
2020-09-14 13:50:54.740 2020-09-14 14:00:54.740 1
2020-09-14 14:00:54.740 2020-09-14 14:10:54.740 0
Fiddle 1
Combination with the given sample data.
Sample data
create table data
(
stamp datetime,
abc nvarchar(4),
def nvarchar(5),
ghi nvarchar(3),
value decimal(10,2)
);
insert into data (stamp, abc, def, ghi, value) values
('2020-09-11 12:22:36.000', 'AYSH', 'mains', 'SAE', 363.12),
('2020-09-11 12:22:39.000', 'AYSH', 'mains', 'SAE', 358.2 ),
('2020-09-11 12:22:42.000', 'AYSH', 'mains', 'SAE', 353.66),
('2020-09-11 12:22:45.000', 'AYSH', 'mains', 'SAE', 349.14),
('2020-09-11 12:22:48.000', 'AYSH', 'mains', 'SAE', 344.84),
('2020-09-11 12:22:51.000', 'AYSH', 'mains', 'SAE', 340.63),
('2020-09-11 12:22:54.000', 'AYSH', 'mains', 'SAE', 336.45);
Solution
Defines a reference datetime because getdate() would produce few results with sample data for 2020-09-11.
declare #refStamp datetime = '2020-09-12 00:00:00.000'; -- replacement for getdate()
with stamps as
(
select #refStamp as FromDateTime,
dateadd(MI,10,#refStamp) as ToDateTime,
0 as Interval
union all
select dateadd(MI, -10, s.FromDateTime),
s.FromDateTime,
s.Interval+1
from stamps s
where s.Interval < 144 -- 24 hours * 6 10-minute intervals = 144 10-minute intervals
)
select s.FromDateTime,
s.ToDateTime,
d.abc,
sum(d.value) as SumValues
from stamps s
left join data d
on d.stamp >= s.FromDateTime -- greater than or equal to FromDateTime
and d.stamp < s.ToDateTime -- smaller than ToDateTime
where s.FromDateTime >= '2020-09-11 12:00:00.000' -- limit output results (part 1)
and s.FromDateTime <= '2020-09-11 13:00:00.000' -- limit output results (part 2)
group by s.FromDateTime, s.ToDateTime, d.abc
order by s.FromDateTime
option(maxrecursion 144);
Result
FromDateTime ToDateTime abc SumValues
------------------------ ------------------------ ----- ----------
2020-09-11 12:00:00.000 2020-09-11 12:10:00.000 NULL NULL
2020-09-11 12:10:00.000 2020-09-11 12:20:00.000 NULL NULL
2020-09-11 12:20:00.000 2020-09-11 12:30:00.000 AYSH 2446.04
2020-09-11 12:30:00.000 2020-09-11 12:40:00.000 NULL NULL
2020-09-11 12:40:00.000 2020-09-11 12:50:00.000 NULL NULL
2020-09-11 12:50:00.000 2020-09-11 13:00:00.000 NULL NULL
2020-09-11 13:00:00.000 2020-09-11 13:10:00.000 NULL NULL
Fiddle 2

If the data is dense then it's really not necessary to use the recursive query. Just compute the number of seconds elapsed since startTime. And this form lets you easily adjust the range start and size of the interval.
with timeRelatedFields as (
select
dateadd(hour, -24,
dateadd(millisecond, -datepart(millisecond, getdate()),
getdate())) as startTime,
600 as intervalSeconds
), tableTimeRelatedFields as (
select *,
d.value as consideredValue,
datediff(second, st.startTime, d."datetime") / intervalSeconds as interval
from cw.datas as d cross apply timeRelatedFields as st
where d."datetime" >= startTime and d."datetime" < getdate()
)
select
dateadd(second, intervalSeconds * interval, startTime) as StartDateTime,
dateadd(second, intervalSeconds * (interval + 1), startTime) as EndDateTime,
sum(consideredValue) as TotalSum
from tableTimeRelatedFields
group by interval;

Related

T-SQL : set start of the week from named date and show week nr of the date

I have a code that traces a number of week of the date in a period that set by parameters
CREATE TABLE #test
(
job int,
dateL datetime
)
INSERT INTO #test
VALUES (1, '2021-10-04'),
(2, '2021-10-05'),
(3, '2021-10-11'),
(4, '2021-10-12')
DECLARE #startdate datetime = '2021-10-05',
#enddate datetime = '2021-12-03'
SELECT
dateL,
(CASE
WHEN t.DateL BETWEEN #startDate and #endDate
THEN (DATEDIFF(wk,#startDate, t.DateL)) + 1
ELSE -1
END) AS WeekNumber
FROM
#test t
DROP TABLE #test
The results of this
dateL WeekNumber
---------------------------------------
2021-10-04 00:00:00.000 -1
2021-10-05 00:00:00.000 1
2021-10-11 00:00:00.000 2
2021-10-12 00:00:00.000 2
But it now quite what I need, and I don't understand how to set this to start count from #startDate.
So it should count like:
05.10 - 11.10 - first week
12.20 - 18.10 - second week
and so on,
So it would look like that
dateL WeekNumber
-----------------------------------
2021-10-04 00:00:00.000 -1
2021-10-05 00:00:00.000 1
2021-10-11 00:00:00.000 *1*
2021-10-12 00:00:00.000 2
I tried to set ##datefirst to
datepart(weekday, #startDate)
but it just ignores me.
Could someone may be recommend something, thanks!

Calculate time between startdate and enddate and subtracting days that have no worktime

My goal is to check if an email is answered within 24 hours during workdays. de definition of a workday is if there is time registered in another table. this because we sometimes work on a Saturday or a Sunday or to exclude holidays. I made a view from that table that gives a 1 if the date has worktime or a 0 if there is no worktime registered.
DateWorked
HasWorked
2021-04-01 00:00:00.000
1
2021-04-02 00:00:00.000
1
2021-04-03 00:00:00.000
1
2021-04-04 00:00:00.000
0
2021-04-05 00:00:00.000
1
So for example a few situations:
1. MailIncoming: 2021-04-01 16:30:00, MailAnswering: 2021-04-02 14:00:00
This one is easy, I don't have to subtract anything and the mail is answered within 24 hours.
2. MailIncoming: 2021-04-01 09:30:00, MailAnswering: 2021-04-03 14:00:00
This one is also easy, I don't have to subtract anything and the mail is not answered within 24 hours.
3. MailIncoming: 2021-04-03 12:30:00, MailAnswering: 2021-04-05 10:00:00
There is 1 day where no one has worked, so I need to subtract 1 whole day from the total time, and in that case the email is answered within 24 hours during workdays.
4. MailIncoming: 2021-04-04 11:00:00, MailAnswering: 2021-04-05 18:00:00
The remaining 13 hours from 04 do not count toward the '24 hours during workdays' so the email is answered within 24 during workdays.
Also, there can be multiple dates with zero after each other.
So the outcome I'm looking for is:
MailIncoming
MailAnswering
TotalTime
TotalTimeWithoutDaysNotWorked
2021-04-04 11:00:00.000
2021-04-05 18:00:00.000
31
18
How can I calculate this last column? Or am I approaching this in the wrong way?
The query needs a way to generate calculated dates between MailIncoming and MailAnswering so there can be a LEFT JOIN (or INNER JOIN) to the WorkingDay table. In this case the query uses dbo.fnTally which is known to be a fast and efficient way to generate rows.
tables
drop table if exists #WorkingDay;
go
create table #WorkingDay(
DateWorked Date,
HasNotWorked int);
drop table if exists #MailIncoming;
go
create table #MailIncoming(
MailIncoming DateTime,
MailAnswering DateTime);
insert into #WorkingDay values
('2021-04-01', 0),
('2021-04-02', 0),
('2021-04-03', 0),
('2021-04-04', 1),
('2021-04-05', 0),
('2021-04-06', 0);
insert into #MailIncoming values
('2021-04-01 16:30:00', '2021-04-02 14:00:00'),
('2021-04-01 09:30:00', '2021-04-03 14:00:00'),
('2021-04-03 12:30:00', '2021-04-05 10:00:00'),
('2021-04-04 11:00:00', '2021-04-05 18:00:00');
dbo.fnTally
CREATE FUNCTION [dbo].[fnTally]
/**********************************************************************************************************************
Jeff Moden Script on SSC: https://www.sqlservercentral.com/scripts/create-a-tally-function-fntally
**********************************************************************************************************************/
(#ZeroOrOne BIT, #MaxN BIGINT)
RETURNS TABLE WITH SCHEMABINDING AS
RETURN WITH
H2(N) AS ( SELECT 1
FROM (VALUES
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
,(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
,(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
,(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
,(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
,(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
,(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
,(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
,(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
,(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
,(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
,(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
,(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
,(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
,(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
,(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
)V(N)) --16^2 or 256 rows
, H4(N) AS (SELECT 1 FROM H2 a, H2 b) --16^4 or 65,536 rows
, H8(N) AS (SELECT 1 FROM H4 a, H4 b) --16^8 or 4,294,967,296 rows
SELECT N = 0 WHERE #ZeroOrOne = 0 UNION ALL
SELECT TOP(#MaxN)
N = ROW_NUMBER() OVER (ORDER BY N)
FROM H8
;
query
select mi.MailIncoming, mi.MailAnswering,
avg(datediff(hour, MailIncoming, MailAnswering)) hrs_to_ans,
sum(case when w.HasNotWorked=1 and
v.calc_dt > mi_dt.inc_dt and
v.calc_dt < mi_dt.ans_dt
then -24
when w.HasNotWorked=1
then datediff(hour, dateadd(day, 1, mi_dt.inc_dt), mi.MailIncoming)
else 0 end) hrs_to_sub
from #MailIncoming mi
cross apply (values (cast(MailIncoming as date),
cast(MailAnswering as date))) mi_dt(inc_dt, ans_dt)
cross apply dbo.fnTally(0, datediff(day, mi.MailIncoming, mi.MailAnswering)) fn
cross apply (values (dateadd(day, fn.n, mi_dt.inc_dt))) v(calc_dt)
left join #WorkingDay w on v.calc_dt=w.DateWorked
group by mi.MailIncoming, mi.MailAnswering
order by mi.MailIncoming;
MailIncoming MailAnswering hrs_to_ans hrs_to_sub
2021-04-01 09:30:00.000 2021-04-03 14:00:00.000 53 0
2021-04-01 16:30:00.000 2021-04-02 14:00:00.000 22 0
2021-04-03 12:30:00.000 2021-04-05 10:00:00.000 46 -24
2021-04-04 11:00:00.000 2021-04-05 18:00:00.000 31 -13
I suggest you to use a column HasNotWorked, so the tables are
create table WorkingDay(DateWorked Date, HasNotWorked int);
create table MailIncoming(MailIncoming DateTime, MailAnswering DateTime);
and the rows
insert into WorkingDay values('2021-04-01', 0);
insert into WorkingDay values('2021-04-02', 0);
insert into WorkingDay values('2021-04-03', 0);
insert into WorkingDay values('2021-04-04', 1);
insert into WorkingDay values('2021-04-05', 0);
insert into WorkingDay values('2021-04-06', 0);
insert into MailIncoming values('2021-04-04 11:00:00.000', '2021-04-06 18:00:00.000');
I want calculate the start date. If is in working day, we must consider the hour of the mail, else the first working day with
case when
(select HasNotWorked from WorkingDay where DateWorked = convert(date, MailIncoming)) = 1 then
(select min(DateWorked) from WorkingDay where DateWorked > MailIncoming and HasNotWorked = 0)
else MailIncoming end as startDate
and discard the day that are not working day
((select sum(HasNotWorked) from WorkingDay where DateWorked between convert(date, startDate)
and convert(date, MailAnswering)
) * 24) as numNotWorkingDay
so the query could be
select startDate, MailAnswering, MailIncoming, hour, numNotWorkingDay, hour - numNotWorkingDay hourWitoutWorkingDay
from (
select
MailAnswering, startDate, MailIncoming,
DateDiff("hh", startDate, MailAnswering) hour,
((select sum(HasNotWorked) from WorkingDay where DateWorked between convert(date, startDate)
and convert(date, MailAnswering)
) * 24) as numNotWorkingDay
from (
select *,
case when
(select HasNotWorked from WorkingDay where DateWorked = convert(date, MailIncoming)) = 1 then
(select min(DateWorked) from WorkingDay where DateWorked > MailIncoming and HasNotWorked = 0)
else MailIncoming end as startDate
from MailIncoming) as startCalc
) as calcTable;
sqlfiddle

SQL Server : finding gaps in employment - island and gap problem

I have been going through stack overflow to try and work this out over the last week and I still can't work out a viable solution so was wondering if anyone could offer me some help/advice?
Explanation of the data structures
I have the following tables:
Position table (zz_position) which is used to hold the details of the
position (Job ID) include the date range that it is valid for.
PosNo Description Date_From Date_To
---------------------------------------------------------
10001 System Administrator 20170101 20231231
Resource table (zz_resource) which is used to hold the details of a resource (employee) including the date that they joined the company and left it
resID description date_from date_to
------------------------------------------
100 Sam 20160101 20991231
101 Joe 20150101 20991231
Employment table (zz_employment) which is used to link position to resources within a date from and to range
PosNo resID Date_From Date_To seqNo
---------------------------------------------------
10001 100 20180101 20180401 1
10001 101 20180601 20191231 2
10001 100 20200101 20991231 3
Problem
Now due to people changing positions, a post might not be filled for a period of time and what I am trying to do is produce a report that I can use to give me a breakdown of the status of a post at any point in time.
I know that I can produce one which fully maps each day using a calendar table however what I want is a report which produces the data in the following aggregated format:
PosNo resID Date_From Date_To seqNo
-------------------------------------------------
10001 NULL 20170101 20171231 0
10001 100 20180101 20180401 1
10001 NULL 20180402 20180530 0
10001 101 20180601 20191231 2
10001 100 20200101 20231231 3
insert into zz_employment
values ('10001', '100', '2018-01-01 00:00:00.000', '2018-04-01 00:00:00.000', 1),
('10001', '101', '2018-06-01 00:00:00.000', '2019-12-31 00:00:00.000', 2),
('10001', '100', '2020-01-01 00:00:00.000', '2099-12-31 00:00:00.000', 3)
(note how the report has taken the two lines in the table and produced a fully speced out life of the employment where the first null line date from is pulled from the position start date and the last line date to is pulled from the position end date.
Ideally I would like this as a view/function however due to the complexity I would be more than happy to have a series of T SQL statements that I can run each night as part of a data warehouse routine.
Rules
all dates are truncated to datetime so that an date_to is referencing the date that it ends not the date and time that it ends
if the post/employment/resource has no end date then it will be denoted as 20991231
if the employment itself is open ended then the date to in the employment table is denoted as 20991231 even through the position itself might end in 20231231. Ideally I would like the result to respect the position end date.
SQL code:
CREATE TABLE zz_position
(
posNo varchar(25) NOT NULL,
description varchar(25) NOT NULL,
date_from datetime NULL,
date_to datetime NULL
)
insert into zz_position
values ('10001', 'System Administrator', '2017-01-01 00:00:00.000', '2020-12-31 00:00:00.000')
go
CREATE TABLE zz_resource
(
resID varchar(25) NOT NULL,
description varchar(25) NOT NULL,
date_from datetime NULL,
date_to datetime NULL
)
insert into zz_resource
values ('100', 'Sam', '2016-01-01 00:00:00.000', '2099-12-31 00:00:00.000'),
('101', 'Joe', '2015-01-01 00:00:00.000', '2099-12-31 00:00:00.000')
go
CREATE TABLE zz_employment
(
posNo varchar(25) NOT NULL,
resID varchar(25) NOT NULL,
date_from datetime NULL,
date_to datetime NULL,
seqNo int NULL
)
insert into zz_employment
values ('10001', '100', '2018-01-01 00:00:00.000', '2018-04-01 00:00:00.000', 1),
('10001', '101', '2018-06-01 00:00:00.000', '2019-12-31 00:00:00.000', 2),
('10001', '100', '2020-01-01 00:00:00.000', '2099-12-31 00:00:00.000', 3)
There are 2 caveats for this problem:
A calendar table.
A way to correctly group unemployed periods when there's an employed period in between.
The following solution uses a calendar table (SQL included) and an DATEDIFF() with anchor-date trick to group correctly for the 2nd point.
Complete DB Fiddle here.
Solution (explanation below):
;WITH AllPositionDates AS
(
SELECT
T.posNo,
C.GeneratedDate
FROM
zz_position AS T
INNER JOIN Calendar AS C ON C.GeneratedDate BETWEEN T.date_from AND T.date_to
),
AllEmployedDates AS
(
SELECT
T.posNo,
T.resID,
T.seqNo,
C.GeneratedDate
FROM
zz_employment AS T
INNER JOIN Calendar AS C ON C.GeneratedDate BETWEEN T.date_from AND T.date_to
),
PositionsByEmployed AS
(
SELECT
P.posNo,
P.GeneratedDate,
E.resID,
E.seqNo,
NullRowNumber = ROW_NUMBER() OVER (
PARTITION BY
P.posNo,
CASE WHEN E.posNo IS NULL THEN 1 ELSE 2 END
ORDER BY
P.GeneratedDate ASC)
FROM
AllPositionDates AS P
LEFT JOIN AllEmployedDates AS E ON
P.posNo = E.posNo AND
P.GeneratedDate = E.GeneratedDate
)
SELECT
P.posNo,
P.resID,
Date_From = MIN(P.GeneratedDate),
Date_To = MAX(P.GeneratedDate),
seqNo = ISNULL(P.seqNo, 0)
FROM
PositionsByEmployed AS P
GROUP BY
P.posNo,
P.resID,
P.seqNo,
CASE WHEN P.resId IS NULL THEN P.NullRowNumber - DATEDIFF(DAY, '2000-01-01', P.GeneratedDate) END -- GroupingValueGroupingValue
ORDER BY
P.posNo,
Date_From,
Date_To
The result:
posNo resID Date_From Date_To seqNo
10001 NULL 2017-01-01 2017-12-31 0
10001 100 2018-01-01 2018-04-01 1
10001 NULL 2018-04-02 2018-05-31 0
10001 101 2018-06-01 2019-12-31 2
10001 100 2020-01-01 2020-12-31 3
Explanation
First the creating of a calendar table. This holds 1 row for each day and in this example it's limited to the first and last possible day of the job positions:
DECLARE #DateStart DATE = (SELECT MIN(P.date_from) FROM zz_position AS P)
DECLARE #DateEnd DATE = (SELECT(MAX(P.date_to)) FROM zz_position AS P)
;WITH GeneratedDates AS
(
SELECT
GeneratedDate = #DateStart
UNION ALL
SELECT
GeneratedDate = DATEADD(DAY, 1, G.GeneratedDate)
FROM
GeneratedDates AS G
WHERE
DATEADD(DAY, 1, G.GeneratedDate) <= #DateEnd
)
SELECT
DateID = IDENTITY(INT, 1, 1),
G.GeneratedDate
INTO
Calendar
FROM
GeneratedDates AS G
OPTION
(MAXRECURSION 0)
This generates the following (up to 2020-12-31, which is max date from sample data):
DateID GeneratedDate
1 2017-01-01
2 2017-01-02
3 2017-01-03
4 2017-01-04
5 2017-01-05
6 2017-01-06
7 2017-01-07
Now we use a join with a between to "spread" the periods of both the positions and the employees periods (on different CTEs), so we get 1 row for each day, for each position/employee.
-- AllPositionDates
SELECT
T.posNo,
C.GeneratedDate
FROM
zz_position AS T
INNER JOIN Calendar AS C ON C.GeneratedDate BETWEEN T.date_from AND T.date_to
-- AllEmployedDates
SELECT
T.posNo,
T.resID,
T.seqNo,
C.GeneratedDate
FROM
zz_employment AS T
INNER JOIN Calendar AS C ON C.GeneratedDate BETWEEN T.date_from AND T.date_to
With these, we join them together by position and date using LEFT JOIN, so we get all days of each position and the matching employee (if exists). We also calculate a row number for all NULL values for each position that we are gonna use later. Note that this row number increases 1 by 1 with each following date accordingly.
;WITH AllPositionDates AS
(
SELECT
T.posNo,
C.GeneratedDate
FROM
zz_position AS T
INNER JOIN Calendar AS C ON C.GeneratedDate BETWEEN T.date_from AND T.date_to
),
AllEmployedDates AS
(
SELECT
T.posNo,
T.resID,
T.seqNo,
C.GeneratedDate
FROM
zz_employment AS T
INNER JOIN Calendar AS C ON C.GeneratedDate BETWEEN T.date_from AND T.date_to
)
-- PositionsByEmployee
SELECT
P.posNo,
P.GeneratedDate,
E.resID,
E.seqNo,
NullRowNumber = ROW_NUMBER() OVER (
PARTITION BY
P.posNo,
CASE WHEN E.posNo IS NULL THEN 1 ELSE 2 END
ORDER BY
P.GeneratedDate ASC)
FROM
AllPositionDates AS P
LEFT JOIN AllEmployedDates AS E ON
P.posNo = E.posNo AND
P.GeneratedDate = E.GeneratedDate
Now with the tricky part. If we calculate the amount of days of difference between a hard-coded date and each day, we get a similar "row number" that increases consistently for each date.
SELECT
P.posNo,
P.GeneratedDate,
DateDiff = DATEDIFF(DAY, '2000-01-01', P.GeneratedDate),
P.NullRowNumber
FROM
PositionsByEmployed AS P -- This is declare with the WITH (full solution below)
ORDER BY
P.posNo,
P.GeneratedDate
We get the following:
posNo GeneratedDate DateDiff NullRowNumber
10001 2017-01-01 6210 1
10001 2017-01-02 6211 2
10001 2017-01-03 6212 3
10001 2017-01-04 6213 4
10001 2017-01-05 6214 5
10001 2017-01-06 6215 6
10001 2017-01-07 6216 7
10001 2017-01-08 6217 8
10001 2017-01-09 6218 9
If we add another column with the rest of these 2 you will see that the value remains the same:
SELECT
P.posNo,
P.GeneratedDate,
DateDiff = DATEDIFF(DAY, '2000-01-01', P.GeneratedDate),
P.NullRowNumber,
GroupingValue = P.NullRowNumber - DATEDIFF(DAY, '2000-01-01', P.GeneratedDate)
FROM
PositionsByEmployed AS P
ORDER BY
P.posNo,
P.GeneratedDate
We get:
posNo GeneratedDate DateDiff NullRowNumber GroupingValue
10001 2017-01-01 6210 1 -6209
10001 2017-01-02 6211 2 -6209
10001 2017-01-03 6212 3 -6209
10001 2017-01-04 6213 4 -6209
10001 2017-01-05 6214 5 -6209
10001 2017-01-06 6215 6 -6209
10001 2017-01-07 6216 7 -6209
10001 2017-01-08 6217 8 -6209
10001 2017-01-09 6218 9 -6209
10001 2017-01-10 6219 10 -6209
But if we scroll down until we see values that are NULL for employee (from the ROW_NUMBER() PARTITION BY expression E.PosNo), we see that the rest differs, since the ROW_NUMBER() kept increasing 1 by 1 and the DATEDIFF jumped because there are employed people in between:
posNo GeneratedDate DateDiff NullRowNumber GroupingValue
10001 2017-12-28 6571 362 -6209
10001 2017-12-29 6572 363 -6209
10001 2017-12-30 6573 364 -6209
10001 2017-12-31 6574 365 -6209
...
10001 2018-04-02 6666 366 -6300
10001 2018-04-03 6667 367 -6300
10001 2018-04-04 6668 368 -6300
10001 2018-04-05 6669 369 -6300
10001 2018-04-06 6670 370 -6300
10001 2018-04-07 6671 371 -6300
Use use this "GroupingValue" as an additional GROUP BY to correctly separate position intervals that fall outside employed intervals.

ms sql group by 2 hours

i have this ms-sql code below, running on sql server 2017:
DECLARE #report_int TABLE
(
atimestamp DATETIME,
adate DATE,
ahour INT,
intvalue INT,
attribute CHAR(254),
total INT
)
INSERT INTO #report_int
SELECT
el.servertimestamp,
cast(el.servertimestamp as date) adate,
(DATEPART(HH,el.servertimestamp)) ahour,
el.intvalue,
el.attributes,
0
FROM
eventlog el
where
el.servertimestamp BETWEEN '6/29/2017'
AND DATEADD(dd, +1, '6/29/2019')
and (attributes LIKE '%N<=>OlympieioIn%' OR attributes LIKE '%N<=>OlympieioOut%')
order by
el.servertimestamp
DECLARE #report TABLE
(
adate datetime,
ahour int,
came_in bigint,
gone_out bigint
)
--fill report with dates and hours
INSERT INTO #report (adate, ahour) SELECT DISTINCT adate, ahour FROM #report_int
--reset
UPDATE #report set came_in = 0, gone_out = 0
--update in totals, min and max times
UPDATE #report SET came_in = i.total from
#report r,
(
select adate, ahour, COUNT(*) total
FROM #report_int
WHERE attribute LIKE 'N<=>OlympieioIn'
GROUP BY adate, ahour
) i
WHERE
i.adate = r.adate
and i.ahour = r.ahour
--update out totals
UPDATE #report SET gone_out = i.total from
#report r,
(
SELECT adate, ahour, COUNT(*) total
FROM #report_int
WHERE attribute LIKE 'N<=>OlympieioOut'
GROUP BY adate, ahour
) i
where
i.adate = r.adate
and i.ahour = r.ahour
select
adate,
ahour,
RIGHT('00'+cast(ahour as varchar),2)+':00 - '+RIGHT('00'+cast(ahour+1 as varchar),2)+':00' timeframe,
came_in,
gone_out
from
#report
This code shows how many people came in or gone out of a building every day. As you can see, the amount of people who come in and go out, are displayed in an hourly grouping.
Below is a screenshot of the executed code.
What i want to do is group these data in a 2-hour grouping.For example, the grouping of the 3rd and 4th hour of the executed code above, will be:
DATE(adate): 2018-05-12
Hour(ahour): 15
Timeframe: 15:00-17:00
came_in: 0
gone_out: 2
Your help will be much appreciated,
thank you.
One way to get a datetime to the last 2 hour slot would be to use:
DATEADD(HOUR, DATEDIFF(HOUR, 0, YourDateColumn) - (DATEDIFF(HOUR, 0, YourDateColumn) % 2), 0)
If we then take some (random) sample data, you get the following:
WITH VTE AS (
SELECT CONVERT(datetime2(0),V.DT) AS DT
FROM (VALUES('2018-07-04T00:11:32'),('2018-07-04T01:17:12'),('2018-07-04T02:00:01'),('2018-07-04T07:49:59'),('2018-07-04T07:59:58'),('2018-07-04T08:00:00')) V(DT))
SELECT DT, DATEADD(HOUR, DATEDIFF(HOUR, 0, DT) - (DATEDIFF(HOUR, 0, DT) % 2), 0) AS DT2h
FROM VTE;
Which returns:
DT DT2h
--------------------------- -----------------------
2018-07-04 00:11:32 2018-07-04 00:00:00.000
2018-07-04 01:17:12 2018-07-04 00:00:00.000
2018-07-04 02:00:01 2018-07-04 02:00:00.000
2018-07-04 07:49:59 2018-07-04 06:00:00.000
2018-07-04 07:59:58 2018-07-04 06:00:00.000
2018-07-04 08:00:00 2018-07-04 08:00:00.000
If you wanted a COUNT by 2 hour slot:
WITH VTE AS (
SELECT CONVERT(datetime2(0),V.DT) AS DT
FROM (VALUES('2018-07-04T00:11:32'),('2018-07-04T01:17:12'),('2018-07-04T02:00:01'),('2018-07-04T07:49:59'),('2018-07-04T07:59:58'),('2018-07-04T08:00:00')) V(DT))
SELECT DATEADD(HOUR, DATEDIFF(HOUR, 0, DT) - (DATEDIFF(HOUR, 0, DT) % 2), 0) AS DT2h, COUNT(DT) AS C
FROM VTE
GROUP BY DATEADD(HOUR, DATEDIFF(HOUR, 0, DT) - (DATEDIFF(HOUR, 0, DT) % 2), 0);
Which returns:
DT2h C
----------------------- -----------
2018-07-04 00:00:00.000 2
2018-07-04 02:00:00.000 1
2018-07-04 06:00:00.000 2
2018-07-04 08:00:00.000 1
I would simply manually create a timetable and join it.
The timetable would have 24 rows (one for each hour) and could look like this:
hour hourtimeframe twohourtimeframe
13 13:00 - 14:00 13:00 - 15:00

islands and gaps tsql

I have been struggling with a problem that should be pretty simple actually but after a full week of reading, googling, experimenting and so on, my colleague and we cannot find the proper solution. :(
The problem: We have a table with two values:
an employeenumber (P_ID, int) <--- identification of employee
a date (starttime, datetime) <--- time employee checked in
We need to know what periods each employee has been working.
When two dates are less then #gap days apart, they belong to the same period
For each employee there can be multiple records for any given day but I just need to know which dates he worked, I am not interested in the time part
As soon as there is a gap > #gap days, the next date is considered the start of a new range
A range is at least 1 day (example: 21-9-2011 | 21-09-2011) but has no maximum length. (An employee checking in every #gap - 1 days should result in a period from the first day he checked in until today)
What we think we need are the islands in this table where the gap in days is greater than #variable (#gap = 30 means 30 days)
So an example:
SOURCETABLE:
P_ID | starttime
------|------------------
12121 | 24-03-2009 7:30
12121 | 24-03-2009 14:25
12345 | 27-06-2011 10:00
99999 | 01-05-2012 4:50
12345 | 27-06-2011 10:30
12345 | 28-06-2011 11:00
98765 | 13-04-2012 10:00
12345 | 21-07-2011 9:00
99999 | 03-05-2012 23:15
12345 | 21-09-2011 12:00
45454 | 12-07-2010 8:00
12345 | 21-09-2011 17:00
99999 | 06-05-2012 11:05
99999 | 20-05-2012 12:45
98765 | 26-04-2012 16:00
12345 | 07-07-2012 14:00
99999 | 01-06-2012 13:55
12345 | 13-08-2012 13:00
Now what I need as a result is:
PERIODS:
P_ID | Start | End
-------------------------------
12121 | 24-03-2009 | 24-03-2009
12345 | 27-06-2012 | 21-07-2012
12345 | 21-09-2012 | 21-09-2012
12345 | 07-07-2012 | (today) OR 13-08-2012 <-- (less than #gap days ago) OR (last date in table)
45454 | 12-07-2010 | 12-07-2010
45454 | 17-06-2012 | 17-06-2012
98765 | 13-04-2012 | 26-04-2012
99999 | 01-05-2012 | 01-06-2012
I hope this is clear this way, I already thank you for reading this far, it would be great if you could contribute!
I've done a rough script that should get you started. Haven't bothered refining the datetimes and the endpoint comparisons might need tweaking.
select
P_ID,
src.starttime,
endtime = case when src.starttime <> lst.starttime or lst.starttime < DATEADD(dd,-1 * #gap,GETDATE()) then lst.starttime else GETDATE() end,
frst.starttime,
lst.starttime
from #SOURCETABLE src
outer apply (select starttime = MIN(starttime) from #SOURCETABLE sub where src.p_id = sub.p_id and sub.starttime > DATEADD(dd,-1 * #gap,src.starttime)) frst
outer apply (select starttime = MAX(starttime) from #SOURCETABLE sub where src.p_id = sub.p_id and src.starttime > DATEADD(dd,-1 * #gap,sub.starttime)) lst
where src.starttime = frst.starttime
order by P_ID, src.starttime
I get the following output, which is a litle different to yours, but I think its ok:
P_ID starttime endtime starttime starttime
----------- ----------------------- ----------------------- ----------------------- -----------------------
12121 2009-03-24 07:30:00.000 2009-03-24 14:25:00.000 2009-03-24 07:30:00.000 2009-03-24 14:25:00.000
12345 2011-06-27 10:00:00.000 2011-07-21 09:00:00.000 2011-06-27 10:00:00.000 2011-07-21 09:00:00.000
12345 2011-09-21 12:00:00.000 2011-09-21 17:00:00.000 2011-09-21 12:00:00.000 2011-09-21 17:00:00.000
12345 2012-07-07 14:00:00.000 2012-07-07 14:00:00.000 2012-07-07 14:00:00.000 2012-07-07 14:00:00.000
12345 2012-08-13 13:00:00.000 2012-08-16 11:23:25.787 2012-08-13 13:00:00.000 2012-08-13 13:00:00.000
45454 2010-07-12 08:00:00.000 2010-07-12 08:00:00.000 2010-07-12 08:00:00.000 2010-07-12 08:00:00.000
98765 2012-04-13 10:00:00.000 2012-04-26 16:00:00.000 2012-04-13 10:00:00.000 2012-04-26 16:00:00.000
The last two output cols are the results of the outer apply sections, and are just there for debugging.
This is based on the following setup:
declare #gap int
set #gap = 30
set dateformat dmy
-----P_ID----|----starttime----
declare #SOURCETABLE table (P_ID int, starttime datetime)
insert #SourceTable values
(12121,'24-03-2009 7:30'),
(12121,'24-03-2009 14:25'),
(12345,'27-06-2011 10:00'),
(12345,'27-06-2011 10:30'),
(12345,'28-06-2011 11:00'),
(98765,'13-04-2012 10:00'),
(12345,'21-07-2011 9:00'),
(12345,'21-09-2011 12:00'),
(45454,'12-07-2010 8:00'),
(12345,'21-09-2011 17:00'),
(98765,'26-04-2012 16:00'),
(12345,'07-07-2012 14:00'),
(12345,'13-08-2012 13:00')
UPDATE: Slight rethink. Now uses a CTE to work out the gaps forwards and backwards from each item, then aggregates those:
--Get the gap between each starttime and the next and prev (use 999 to indicate non-closed intervals)
;WITH CTE_Gaps As (
select
p_id,
src.starttime,
nextgap = coalesce(DATEDIFF(dd,src.starttime,nxt.starttime),999), --Gap to the next entry
prevgap = coalesce(DATEDIFF(dd,prv.starttime,src.starttime),999), --Gap to the previous entry
isold = case when DATEDIFF(dd,src.starttime,getdate()) > #gap then 1 else 0 end --Is starttime more than gap days ago?
from
#SOURCETABLE src
cross apply (select starttime = MIN(starttime) from #SOURCETABLE sub where src.p_id = sub.p_id and sub.starttime > src.starttime) nxt
cross apply (select starttime = max(starttime) from #SOURCETABLE sub where src.p_id = sub.p_id and sub.starttime < src.starttime) prv
)
--select * from CTE_Gaps
select
p_id,
starttime = min(gap.starttime),
endtime = nxt.starttime
from
CTE_Gaps gap
--Find the next starttime where its gap to the next > #gap
cross apply (select starttime = MIN(sub.starttime) from CTE_Gaps sub where gap.p_id = sub.p_id and sub.starttime >= gap.starttime and sub.nextgap > #gap) nxt
group by P_ID, nxt.starttime
order by P_ID, nxt.starttime
Jon most definitively has shown us the right direction. Performance was horrible though (4million+ records in the database). And it looked like we were missing some information. With all that we learned from you we came up with the solution below. It uses elements of all the proposed answers and cycles through 3 temptables before finally spewing results but performance is good enough, as well as the data it generates.
declare #gap int
declare #Employee_id int
set #gap = 30
set dateformat dmy
--------------------------------------------------------------- #temp1 --------------------------------------------------
CREATE TABLE #temp1 ( EmployeeID int, starttime date)
INSERT INTO #temp1 ( EmployeeID, starttime)
select distinct ck.Employee_id,
cast(ck.starttime as date)
from SERVER1.DB1.dbo.checkins pd
inner join SERVER1.DB1.dbo.Team t on ck.team_id = t.id
where t.productive = 1
--------------------------------------------------------------- #temp2 --------------------------------------------------
create table #temp2 (ROWNR int, Employeeid int, ENDOFCHECKIN datetime, FIRSTCHECKIN datetime)
INSERT INTO #temp2
select Row_number() OVER (partition by EmployeeID ORDER BY t.prev) + 1 as ROWNR,
EmployeeID,
DATEADD(DAY, 1, t.Prev) AS start_gap,
DATEADD(DAY, 0, t.next) AS end_gap
from
(
select a.EmployeeID,
a.starttime as Prev,
(
select min(b.starttime)
from #temp1 as b
where starttime > a.starttime and b.EmployeeID = a.EmployeeID
) as Next
from #temp1 as a) as t
where datediff(day, prev, next ) > 30
group by EmployeeID,
t.Prev,
t.next
union -- add first known date for Employee
select 1 as ROWNR,
EmployeeID,
NULL,
min(starttime)
from #temp1 ct
group by ct.EmployeeID
--------------------------------------------------------------- #temp3 --------------------------------------------------
create table #temp3 (ROWNR int, Employeeid int, ENDOFCHECKIN datetime, STARTOFCHECKIN datetime)
INSERT INTO #temp3
select ROWNR,
Employeeid,
ENDOFCHECKIN,
FIRSTCHECKIN
from #temp2
union -- add last known date for Employee
select (select count(*) from #temp2 b where Employeeid = ct.Employeeid)+1 as ROWNR,
ct.Employeeid,
(select dateadd(d,1,max(starttime)) from #temp1 c where Employeeid = ct.Employeeid),
NULL
from #temp2 ct
group by ct.EmployeeID
---------------------------------------finally check our data-------------------------------------------------
select a1.Employeeid,
a1.STARTOFCHECKIN as STARTOFCHECKIN,
ENDOFCHECKIN = CASE WHEN b1.ENDOFCHECKIN <= a1.STARTOFCHECKIN THEN a1.ENDOFCHECKIN ELSE b1.ENDOFCHECKIN END,
year(a1.STARTOFCHECKIN) as JaarSTARTOFCHECKIN,
JaarENDOFCHECKIN = CASE WHEN b1.ENDOFCHECKIN <= a1.STARTOFCHECKIN THEN year(a1.ENDOFCHECKIN) ELSE year(b1.ENDOFCHECKIN) END,
Month(a1.STARTOFCHECKIN) as MaandSTARTOFCHECKIN,
MaandENDOFCHECKIN = CASE WHEN b1.ENDOFCHECKIN <= a1.STARTOFCHECKIN THEN month(a1.ENDOFCHECKIN) ELSE month(b1.ENDOFCHECKIN) END,
(year(a1.STARTOFCHECKIN)*100)+month(a1.STARTOFCHECKIN) as JaarMaandSTARTOFCHECKIN,
JaarMaandENDOFCHECKIN = CASE WHEN b1.ENDOFCHECKIN <= a1.STARTOFCHECKIN THEN (year(a1.ENDOFCHECKIN)*100)+month(a1.STARTOFCHECKIN) ELSE (year(b1.ENDOFCHECKIN)*100)+month(b1.ENDOFCHECKIN) END,
datediff(M,a1.STARTOFCHECKIN,b1.ENDOFCHECKIN) as MONTHSCHECKEDIN
from #temp3 a1
full outer join #temp3 b1 on a1.ROWNR = b1.ROWNR -1 and a1.Employeeid = b1.Employeeid
where not (a1.STARTOFCHECKIN is null AND b1.ENDOFCHECKIN is null)
order by a1.Employeeid, a1.STARTOFCHECKIN

Resources