Combine continuous datetime intervals by type - sql-server

Say we have such a table:
declare #periods table (
s date,
e date,
t tinyint
);
with date intervals without gaps ordered by start date (s)
insert into #periods values
('2013-01-01' , '2013-01-02', 3),
('2013-01-02' , '2013-01-04', 1),
('2013-01-04' , '2013-01-05', 1),
('2013-01-05' , '2013-01-06', 2),
('2013-01-06' , '2013-01-07', 2),
('2013-01-07' , '2013-01-08', 2),
('2013-01-08' , '2013-01-09', 1);
All date intervals have different types (t).
It is required to combine date intervals of the same type where they are not broken by intervals of the other types (having all intervals ordered by start date).
So the result table should look like:
s | e | t
------------|------------|-----
2013-01-01 | 2013-01-02 | 3
2013-01-02 | 2013-01-05 | 1
2013-01-05 | 2013-01-08 | 2
2013-01-08 | 2013-01-09 | 1
Any ideas how to do this without cursor?
I've got one working solution:
declare #periods table (
s datetime primary key clustered,
e datetime,
t tinyint,
period_number int
);
insert into #periods (s, e, t) values
('2013-01-01' , '2013-01-02', 3),
('2013-01-02' , '2013-01-04', 1),
('2013-01-04' , '2013-01-05', 1),
('2013-01-05' , '2013-01-06', 2),
('2013-01-06' , '2013-01-07', 2),
('2013-01-07' , '2013-01-08', 2),
('2013-01-08' , '2013-01-09', 1);
declare #t tinyint = null;
declare #PeriodNumber int = 0;
declare #anchor date;
update #periods
set period_number = #PeriodNumber,
#PeriodNumber = case
when #t <> t
then #PeriodNumber + 1
else
#PeriodNumber
end,
#t = t,
#anchor = s
option (maxdop 1);
select
s = min(s),
e = max(e),
t = min(t)
from
#periods
group by
period_number
order by
s;
but I doubt if I can rely on such a behavior of UPDATE statement?
I use SQL Server 2008 R2.
Edit:
Thanks to Daniel and this article: http://www.sqlservercentral.com/articles/T-SQL/68467/
I found three important things that were missed in the solution above:
There must be clustered index on the table
There must be anchor variable and call of the clustered column
Update statement should be executed by one processor, i.e. without parallelism
I've changed the above solution in accordance with these rules.

Since your ranges are continuous, the problem essentially becomes a gaps-and-islands one. If only you had a criterion to help you to distinguish between different sequences with the same t value, you could group all the rows using that criterion, then just take MIN(s), MAX(e) for every group.
One method of obtaining such a criterion is to use two ROW_NUMBER calls. Consider the following query:
SELECT
*,
rnk1 = ROW_NUMBER() OVER ( ORDER BY s),
rnk2 = ROW_NUMBER() OVER (PARTITION BY t ORDER BY s)
FROM #periods
;
For your example it would return the following set:
s e t rnk1 rnk2
---------- ---------- -- ---- ----
2013-01-01 2013-01-02 3 1 1
2013-01-02 2013-01-04 1 2 1
2013-01-04 2013-01-05 1 3 2
2013-01-05 2013-01-06 2 4 1
2013-01-06 2013-01-07 2 5 2
2013-01-07 2013-01-08 2 6 3
2013-01-08 2013-01-09 1 7 3
The interesting thing about the rnk1 and rnk2 rankings is that if you subtract one from the other, you will get values that, together with t, uniquely identify every distinct sequence of rows with the same t:
s e t rnk1 rnk2 rnk1 - rnk2
---------- ---------- -- ---- ---- -----------
2013-01-01 2013-01-02 3 1 1 0
2013-01-02 2013-01-04 1 2 1 1
2013-01-04 2013-01-05 1 3 2 1
2013-01-05 2013-01-06 2 4 1 3
2013-01-06 2013-01-07 2 5 2 3
2013-01-07 2013-01-08 2 6 3 3
2013-01-08 2013-01-09 1 7 3 4
Knowing that, you can easily apply grouping and aggregation. This is what the final query might look like:
WITH partitioned AS (
SELECT
*,
g = ROW_NUMBER() OVER ( ORDER BY s)
- ROW_NUMBER() OVER (PARTITION BY t ORDER BY s)
FROM #periods
)
SELECT
s = MIN(s),
e = MAX(e),
t
FROM partitioned
GROUP BY
t,
g
;
If you like, you can play with this solution at SQL Fiddle.

How about this?
declare #periods table (
s datetime primary key,
e datetime,
t tinyint,
s2 datetime
);
insert into #periods (s, e, t) values
('2013-01-01' , '2013-01-02', 3),
('2013-01-02' , '2013-01-04', 1),
('2013-01-04' , '2013-01-05', 1),
('2013-01-05' , '2013-01-06', 2),
('2013-01-06' , '2013-01-07', 2),
('2013-01-07' , '2013-01-08', 2),
('2013-01-08' , '2013-01-09', 1);
update #periods set s2 = s;
while ##ROWCOUNT > 0
begin
update p2 SET s2=p1.s
from #periods p1
join #PERIODS P2 ON p2.t = p1.t AND p2.s2 = p1.e;
end
select s2 as s, max(e) as e, min(t) as t
from #periods
group by s2
order by s2;

a possibly solution to avoid update and cursor should be using common table expressions...
like this...
declare #periods table (
s date,
e date,
t tinyint
);
insert into #periods values
('2013-01-01' , '2013-01-02', 3),
('2013-01-02' , '2013-01-04', 1),
('2013-01-04' , '2013-01-05', 1),
('2013-01-05' , '2013-01-06', 2),
('2013-01-06' , '2013-01-07', 2),
('2013-01-07' , '2013-01-08', 2),
('2013-01-08' , '2013-01-09', 1);
with cte as ( select 0 as n
,p.s as s
,p.e as e
,p.t
,case when p2.s is null then 1 else 0 end fl_s
,case when p3.e is null then 1 else 0 end fl_e
from #periods p
left outer join #periods p2
on p2.e = p.s
and p2.t = p.t
left outer join #periods p3
on p3.s = p.e
and p3.t = p.t
union all
select n+1 as n
, p2.s as s
, p.e as e
,p.t
,case when not exists(select * from #periods p3 where p3.e =p2.s and p3.t=p2.t) then 1 else 0 end as fl_s
,p.fl_e as fl_e
from cte p
inner join #periods p2
on p2.e = p.s
and p2.t = p.t
where p.fl_s = 0
union all
select n+1 as n
, p.s as s
, p2.e as e
,p.t
,p.fl_s as fl_s
,case when not exists(select * from #periods p3 where p3.s =p2.e and p3.t=p2.t) then 1 else 0 end as fl_e
from cte p
inner join #periods p2
on p2.s = p.e
and p2.t = p.t
where p.fl_s = 1
and p.fl_e = 0
)
,result as (select s,e,t,COUNT(*) as count_lines
from cte
where fl_e = 1
and fl_s = 1
group by s,e,t
)
select * from result
option(maxrecursion 0)
resultset achieved...
s e t count_lines
2013-01-01 2013-01-02 3 1
2013-01-02 2013-01-05 1 2
2013-01-05 2013-01-08 2 3
2013-01-08 2013-01-09 1 1

Hooray! I've found the solution that suits me and it is done without iteration
with cte1 as (
select s, t from #periods
union all
select max(e), null from #periods
),
cte2 as (
select rn = row_number() over(order by s), s, t from cte1
),
cte3 as (
select
rn = row_number() over(order by a.rn),
a.s,
a.t
from
cte2 a
left join cte2 b on a.rn = b.rn + 1 and a.t = b.t
where
b.rn is null
)
select
s = a.s,
e = b.s,
a.t
from
cte3 a
inner join cte3 b on b.rn = a.rn + 1;
Thanks everyone for sharing your thoughts and solutions!
Details:
cte1 returns the chain of dates with the types after them:
s t
---------- ----
2013-01-01 3
2013-01-02 1
2013-01-04 1
2013-01-05 2
2013-01-06 2
2013-01-07 2
2013-01-08 1
2013-01-09 NULL -- there is no type *after* the last date
ct2 just add row number to the above result:
rn s t
---- ---------- ----
1 2013-01-01 3
2 2013-01-02 1
3 2013-01-04 1
4 2013-01-05 2
5 2013-01-06 2
6 2013-01-07 2
7 2013-01-08 1
8 2013-01-09 NULL
if we output all the fields from the query in cte3 without where condition, we get the following results:
select * from cte2 a left join cte2 b on a.rn = b.rn + 1 and a.t = b.t;
rn s t rn s t
---- ---------- ---- ------ ---------- ----
1 2013-01-01 3 NULL NULL NULL
2 2013-01-02 1 NULL NULL NULL
3 2013-01-04 1 2 2013-01-02 1
4 2013-01-05 2 NULL NULL NULL
5 2013-01-06 2 4 2013-01-05 2
6 2013-01-07 2 5 2013-01-06 2
7 2013-01-08 1 NULL NULL NULL
8 2013-01-09 NULL NULL NULL NULL
For the dates where type is repeted there are values on the right side of the results. So we can just remove all the lines where values exist on the right side.
So cte3 returns:
rn s t
----- ---------- ----
1 2013-01-01 3
2 2013-01-02 1
3 2013-01-05 2
4 2013-01-08 1
5 2013-01-09 NULL
Note that because of the removal some rows there are some gaps in rn sequence, so we have to renumber them again.
From here only one thing left - to transform the dates to periods:
select
s = a.s,
e = b.s,
a.t
from
cte3 a
inner join cte3 b on b.rn = a.rn + 1;
and we've got the required result:
s e t
---------- ---------- ----
2013-01-01 2013-01-02 3
2013-01-02 2013-01-05 1
2013-01-05 2013-01-08 2
2013-01-08 2013-01-09 1

this is your solution with a different data on the table..
declare #periods table (
s datetime primary key,
e datetime,
t tinyint,
period_number int
);
insert into #periods (s, e, t) values
('2013-01-01' , '2013-01-02', 3),
('2013-01-02' , '2013-01-04', 1),
('2013-01-04' , '2013-01-05', 1),
('2013-01-05' , '2013-01-06', 2),
('2013-01-09' , '2013-01-10', 2),
('2013-01-10' , '2013-01-11', 1);
declare #t tinyint = null;
declare #PeriodNumber int = 0;
update #periods
set period_number = #PeriodNumber,
#PeriodNumber = case
when #t <> t
then #PeriodNumber + 1
else
#PeriodNumber
end,
#t = t;
select
s = min(s),
e = max(e),
t = min(t)
from
#periods
group by
period_number
order by
s;
where have a gap between
('2013-01-05' , '2013-01-06', 2),
--and
('2013-01-09' , '2013-01-10', 2),
your solution resultset is..
s e t
2013-01-01 2013-01-02 3
2013-01-02 2013-01-05 1
2013-01-05 2013-01-10 2
2013-01-10 2013-01-11 1
isnt was spected the resultset like this..??
s e t
2013-01-01 2013-01-02 3
2013-01-02 2013-01-05 1
2013-01-05 2013-01-06 2
2013-01-09 2013-01-10 2
2013-01-10 2013-01-11 1
maybe I did misunderstood the rule of your problem...

Related

Cannot increment values in a T-SQL CTE

I have a case where I need to write a CTE ( at least this seems like the best approach) . I have almost everything I need in place but one last issue. I am using a CTE to generate many millions of a records and then I will insert them into a table. The data itself is almost irrelevant except for three columns. 2 date time columns and one character column.
The idea behind the CTE is this. I want one datetime field called Start and one int field called DataValue. I will have a variable which is the count of records I want to aim for and then another variable which is the number of times I want to repeat the datetime value. I don't think I need to explain the software this data represents but basically I need to have 16 rows where the Start value is the same and then after the 16th run I want to then add 15 minutes and then repeat. Effectively there will be events in 15 minute intervals and I will need X number of rows per 15 minute interval to represent those events.
This is my code
Declare #tot as int;
Declare #inter as int;
Set #tot = 26
Set #inter = 3;
WITH mycte(DataValue,start) AS
(
SELECT 1 DataValue, cast('01/01/2011 00:00:00' as datetime) as start
UNION all
if DataValue % #inter = 0
SELECT
DataValue + 1,
cast(DateAdd(minute,15,start) as datetime)
else
select
DataValue + ,
start
FROM mycte
WHERE DataValue + 1 <= #tot)
select
m.start,
m.start,
m.Datavalue%#inter
from mycte as m
option (maxrecursion 0);
I'll change the select statement into an insert statement once I get it working but the m.DataValue%#inter will make it repeat integer when inserting so the only thing I need is to figure out how to make the start be the same 16 times in a row and then increment
It seems that I cannot have an IF statement in the CTE but I am not sure how to accomplish that but what I was going to do was basically say if the DataValue%16 was 0 then increase the value of start.
In the end I should hopefully have something like this where in this case I only repeat it 4 times
+-----------+-------------------+
| DateValue | start |
+-----------+-------------------+
| 1 | 01/01/01 00:00:00 |
| 2 | 01/01/01 00:00:00 |
| 3 | 01/01/01 00:00:00 |
| 4 | 01/01/01 00:00:00 |
| 5 | 01/01/01 00:15:00 |
| 6 | 01/01/01 00:15:00 |
| 7 | 01/01/01 00:15:00 |
| 8 | 01/01/01 00:15:00 |
Is there another way to accomplish this without conditional statements?
You can use case when as below:
Declare #tot as int;
Declare #inter as int;
Set #tot = 26
Set #inter = 3;
WITH mycte(DataValue,start) AS
(
SELECT 1 DataValue, cast('01/01/2011 00:00:00' as datetime) as start
UNION all
SELECT DataValue+1 [Datavalue],
case when (DataValue % #inter) = 0 then cast(DateAdd(minute,15,start) as datetime) else [start] end [start]
FROM mycte
WHERE (DataValue + 1) <= #tot)
select
m.DataValue,
m.[start]
from mycte as m
option (maxrecursion 0);
This will give the below result
DataValue Start
========= =============
1 2011-01-01 00:00:00.000
2 2011-01-01 00:00:00.000
3 2011-01-01 00:00:00.000
4 2011-01-01 00:15:00.000
5 2011-01-01 00:15:00.000
6 2011-01-01 00:15:00.000
7 2011-01-01 00:30:00.000
8 2011-01-01 00:30:00.000
9 2011-01-01 00:30:00.000
10 2011-01-01 00:45:00.000
11 2011-01-01 00:45:00.000
12 2011-01-01 00:45:00.000
....
26 2011-01-01 02:00:00.000
And if you dont want to use case when you can use double recursive cte as below:-
WITH mycte(DataValue,start) AS
( --this recursive cte will generate the same record the number of #inter
SELECT 1 DataValue, cast('01/01/2011 00:00:00' as datetime) as start
UNION all
SELECT DataValue+1 [DataValue],[start]
FROM mycte
WHERE (DataValue + 1) <= #inter)
,Increments as (
-- this recursive cte will do the 15 additions
select * from mycte
union all
select DataValue+#inter [DataValue]
,DateAdd(minute,15,[start]) [start]
from Increments
WHERE (DataValue + 1) <= #tot
)
select
m.DataValue,
m.[start]
from Increments as m
order by DataValue
option (maxrecursion 0);
it will give the same results.
You can do this with a tally table and some basic math. I'm not sure if your total rows are #tot or should they be #tot * #inter. If so, you just need to change the TOP clause. If you need more rows, you just need to alter the tally table generation.
Declare #tot as int;
Declare #inter as int;
Set #tot = 26
Set #inter = 3;
WITH
E(n) AS(
SELECT n FROM (VALUES(0),(0),(0),(0),(0),(0),(0),(0),(0),(0))E(n)
),
E2(n) AS(
SELECT a.n FROM E a, E b
),
E4(n) AS(
SELECT a.n FROM E2 a, E2 b
),
cteTally(n) AS(
SELECT TOP( #tot) ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) n
FROM E4
)
SELECT n, DATEADD( MI, 15* ((n-1)/#inter), '20110101')
FROM cteTally;

T-SQL - 3 month moving sum - preceding null values

Using SQL Server 2016. I have the following data table (sample)
Target Date Total
-----------------
2018-01-24 1
2018-02-28 1
2018-03-02 1
2018-03-08 1
2018-03-15 1
2018-03-30 1
2018-04-16 1
2018-04-18 1
2018-04-30 1
I would like to get to get a 3 month moving sum (grouping is by month):
Target Date Total_Sum
-----------------------
2018-01-01 1
2018-02-01 2
2018-03-01 6
2018-04-01 8
Ok, this should get the answer you want. Firstly you need to total the value your months, then you can do a running total for the last 3 months:
CREATE TABLE SampleTable (TargetDate date, Total int);
GO
INSERT INTO SampleTable
VALUES ('20180124', 1),
('20180228', 1),
('20180302', 1),
('20180308', 1),
('20180315', 1),
('20180330', 1),
('20180416', 1),
('20180418', 1),
('20180430', 1);
GO
SELECT *
FROM SampleTable;
GO
WITH Months AS (
SELECT DATEADD(MONTH,DATEDIFF(MONTH, 0, TargetDate),0) AS TargetMonth, SUM(Total) AS MonthTotal
FROM SampleTable
GROUP BY DATEADD(MONTH,DATEDIFF(MONTH, 0, TargetDate),0))
SELECT TargetMonth,
SUM(MonthTotal) OVER (ORDER BY TargetMonth ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS Last3Months
FROM Months;
GO
DROP TABLE SampleTable;
GO
Pls try the below code
;WITH CTE(TargetDate,Total)
AS
(
SELECT '2018-01-24', 1 UNION ALL
SELECT '2018-02-28', 1 UNION ALL
SELECT '2018-03-02', 1 UNION ALL
SELECT '2018-03-08', 1 UNION ALL
SELECT '2018-03-15', 1 UNION ALL
SELECT '2018-03-30', 1 UNION ALL
SELECT '2018-04-16', 1 UNION ALL
SELECT '2018-04-18', 1 UNION ALL
SELECT '2018-04-30', 1
)
SELECT STUFF(TargetDate,9,2,'01') AS TargetDate
,Total_Sum
FROM
(
SELECT TargetDate,Total_Sum
,ROW_NUMBER()OVER(PARTITION BY Total_Sum ORDER BY TargetDate) AS Seq
FROM
(
SELECT TargetDate
,SUM(Total )OVER(ORDER BY MONTH(TargetDate) ) AS Total_Sum
FROM CTE
)dt
)fnl
WHERE Seq=1
Result
TargetDate Total_Sum
---------------------
2018-01-01 1
2018-02-01 2
2018-03-01 6
2018-04-01 9

MSSQL: Create incremental row label per group

In my table, I have a primary key and a date. What I'd like to achieve is to have an incremental label based on whether or not there is a break between the dates - column Goal.
Now, below is an example. The break column was calculated using LEAD function (I thought it might help).
I am able to solve it using T-SQL, but this would be last resort. Nothing I tried has worked so far. I am using MSSQL 2014.
PK | Date | break | Goal |
-------------------------------
1 | 03/2017 | 0 | 1 |
1 | 04/2017 | 0 | 1 |
1 | 08/2017 | 1 | 2 |
1 | 09/2017 | 0 | 2 |
1 | 10/2017 | 0 | 2 |
1 | 02/2018 | 1 | 3 |
1 | 03/2018 | 0 | 3 |
Here is a code to reproduce this example:
CREATE TABLE #test
(
ConsumerId INT,
FullDate DATE,
Goal INT
)
INSERT INTO #test (ConsumerId, FullDate, Goal) VALUES (1,'2017-03-01',1)
INSERT INTO #test (ConsumerId, FullDate, Goal) VALUES (1,'2017-04-01',1)
INSERT INTO #test (ConsumerId, FullDate, Goal) VALUES (1,'2017-08-01',2)
INSERT INTO #test (ConsumerId, FullDate, Goal) VALUES (1,'2017-09-01',2)
INSERT INTO #test (ConsumerId, FullDate, Goal) VALUES (1,'2017-10-01',2)
INSERT INTO #test (ConsumerId, FullDate, Goal) VALUES (1,'2018-02-01',3)
INSERT INTO #test (ConsumerId, FullDate, Goal) VALUES (1,'2018-03-01',3)
SELECT ConsumerId,
FullDate,
CASE WHEN (datediff(month,
isnull(
LEAD (FullDate,1) OVER (PARTITION BY ConsumerId ORDER BY FullDate DESC),
FullDate),
FullDate) > 1)
THEN 1
ELSE 0
END AS break,
Goal
FROM #test
ORDER BY FullDate ASC
EDIT
This is apparently a famous problem "Islands and gaps" as pointed out in the comments. And Google offers many solutions as well as other questions here at SO.
Try this...
WITH
cte_TestGap AS (
SELECT
t.ConsumerId, t.FullDate,
Gap = CASE
WHEN DATEDIFF(mm, t.FullDate, LAG(t.FullDate, 1) OVER (PARTITION BY t.ConsumerId ORDER BY t.FullDate)) = -1
THEN 0
ELSE ROW_NUMBER() OVER (PARTITION BY t.ConsumerId ORDER BY t.FullDate)
END
FROM
#test t
),
cte_SmearGap AS (
SELECT
tg.ConsumerId, tg.FullDate,
GV = MAX(tg.Gap) OVER (PARTITION BY tg.ConsumerId ORDER BY tg.FullDate ROWS UNBOUNDED PRECEDING)
FROM
cte_TestGap tg
)
SELECT
sg.ConsumerId, sg.FullDate,
GroupValue = DENSE_RANK() OVER (PARTITION BY sg.ConsumerId ORDER BY sg.GV)
FROM
cte_SmearGap sg;
An explanation of the code an how it works...
The 1st query, in cte_TestGap, uses the LAG function along with ROW_NUMBER() function to mark the location of gap in the data. We can see that by breaking it out and looking at it's results...
WITH
cte_TestGap AS (
SELECT
t.ConsumerId, t.FullDate,
Gap = CASE
WHEN DATEDIFF(mm, t.FullDate, LAG(t.FullDate, 1) OVER (PARTITION BY t.ConsumerId ORDER BY t.FullDate)) = -1
THEN 0
ELSE ROW_NUMBER() OVER (PARTITION BY t.ConsumerId ORDER BY t.FullDate)
END
FROM
#test t
)
SELECT * FROM cte_TestGap;
cte_TestGap results...
ConsumerId FullDate Gap
----------- ---------- --------------------
1 2017-03-01 1
1 2017-04-01 0
1 2017-08-01 3
1 2017-09-01 0
1 2017-10-01 0
1 2018-02-01 6
1 2018-03-01 0
At this point we want the 0 values to take on the value of the preceding non-0 values, allowing them to be grouped together. This is done in the 2nd query (cte_SmearGap) using the MAX function with a "window frame". So if we look at the output of cte_SmearGap, we can see that...
WITH
cte_TestGap AS (
SELECT
t.ConsumerId, t.FullDate,
Gap = CASE
WHEN DATEDIFF(mm, t.FullDate, LAG(t.FullDate, 1) OVER (PARTITION BY t.ConsumerId ORDER BY t.FullDate)) = -1
THEN 0
ELSE ROW_NUMBER() OVER (PARTITION BY t.ConsumerId ORDER BY t.FullDate)
END
FROM
#test t
),
cte_SmearGap AS (
SELECT
tg.ConsumerId, tg.FullDate,
GV = MAX(tg.Gap) OVER (PARTITION BY tg.ConsumerId ORDER BY tg.FullDate ROWS UNBOUNDED PRECEDING)
FROM
cte_TestGap tg
)
SELECT * FROM cte_SmearGap;
cte_SmearGap results...
ConsumerId FullDate GV
----------- ---------- --------------------
1 2017-03-01 1
1 2017-04-01 1
1 2017-08-01 3
1 2017-09-01 3
1 2017-10-01 3
1 2018-02-01 6
1 2018-03-01 6
At this point All of the rows are in distinct groups... but... We'd like to have our group numbers in a contiguous sequence (1,2,3) as opposed to (1,3,6).
Of course that's easy enough to fix using the DENSE_Rank() function, which is what's happening in the final select...
WITH
cte_TestGap AS (
SELECT
t.ConsumerId, t.FullDate,
Gap = CASE
WHEN DATEDIFF(mm, t.FullDate, LAG(t.FullDate, 1) OVER (PARTITION BY t.ConsumerId ORDER BY t.FullDate)) = -1
THEN 0
ELSE ROW_NUMBER() OVER (PARTITION BY t.ConsumerId ORDER BY t.FullDate)
END
FROM
#test t
),
cte_SmearGap AS (
SELECT
tg.ConsumerId, tg.FullDate,
GV = MAX(tg.Gap) OVER (PARTITION BY tg.ConsumerId ORDER BY tg.FullDate ROWS UNBOUNDED PRECEDING)
FROM
cte_TestGap tg
)
SELECT
sg.ConsumerId, sg.FullDate,
GroupValue = DENSE_RANK() OVER (PARTITION BY sg.ConsumerId ORDER BY sg.GV)
FROM
cte_SmearGap sg;
The end result...
ConsumerId FullDate GroupValue
----------- ---------- --------------------
1 2017-03-01 1
1 2017-04-01 1
1 2017-08-01 2
1 2017-09-01 2
1 2017-10-01 2
1 2018-02-01 3
1 2018-03-01 3
The comment from David Browne was actually extremely useful. If you google "Islands and Gaps", there are many variations of the solution. Below is the one I liked the most.
In the end, I needed the Goal column to be able to group the dates into MIN/MAX. This solution skips this step and directly creates the aggregated range.
Here is the source.
SELECT MIN(FullDate) AS range_start,
MAX(FUllDate) AS range_end
FROM (
SELECT FullDate,
DATEADD(MM, -1 * ROW_NUMBER() OVER(ORDER BY FullDate), FullDate) AS grp
FROM #test
) a
GROUP BY a.grp
And the output:
range_start | range_end |
--------------------------
2017-03-01 | 2017-04-01 |
2017-08-01 | 2017-10-01 |
2018-02-01 | 2018-03-01 |

Days difference in SQL Server

I have a question about SQL Server.
Table: holidaylist
Date | weekendStatus | Holidaystatus
2015-12-01 | 0 | 0
2015-12-02 | 0 | 0
2015-12-03 | 0 | 0
2015-12-04 | 1 | 0
2015-12-05 | 1 | 0
2015-12-06 | 0 | 1
2015-12-07 | 0 | 0
2015-12-08 | 0 | 0
2015-12-09 | 0 | 1
2015-12-10 | 0 | 0
2015-12-11 | 0 | 0
2015-12-12 | 1 | 1
2015-12-13 | 1 | 0
Table: emp
empid | doj | dos
1 | 2015-12-01 | 2015-12-06
2 |2015-12-01 | 2015-12-13
3 |2015-12-03 |2015-12-13
I want get days difference from dos-doj withoutweekenstatusandholidaysstatus
and includeweekendandholidaystatus
I want output like this:
Empid | doj | dos |includeweekendandholidays | witoutincludeweekendandholidayslist
1 | 2015-12-01 |2015-12-06 | 5 | 3
2 | 2015-12-01 |2015-12-13 | 12 | 8
3 | 2015-12-03 |2015-12-13 | 10 | 6
I tried this query:
select
a.empid, a.doj, a.dos,
case
when b.weekendstatus = 1 and c.Holidaystatus = 1
then datediff(day, c.date, b.date)
end as includeweekenandholidays
case
when b.weekendstatus != 1 or c.Holidaystatus = 1
then datediff(day, c.date, b.date)
end as witoutincludeweekendandholidayslist
from
emp a
left join
holidaylist b on a.doj = b.date
left join
holidaylist c on a.dos = c.date
Above query not given expected result please tell me how to write query to achieve this task in SQL Server
Try this :
select a.empid,
a.doj,a.dos,
IncludeRest = (select count(h.date) from holidaylist h where e.doj<=h.date AND e.dos>=h.date),
ExcludeRest = (select count(h.date) from holidaylist h where e.doj<=h.date AND e.dos>=h.date AND h.weekendstatus = 0 AND h.holdaystatus = 0)
from emp e
you can use a CASE in your COUNT to determine whether or not to count that day..
SELECT
e.empid,
e.doj,
e.dos,
COUNT(*) includeweekendandholidays,
COUNT(CASE WHEN Holidaystatus = 0
AND [weekendStatus] = 0 THEN 1
END) withoutincludeweekendandholidayslist
FROM
emp e
JOIN holidaylist hl ON hl.Date >= e.doj
AND hl.Date < e.dos
GROUP BY
e.empid,
e.doj,
e.dos
This might perform better since it only joins to holidaylist table on records you need..
SELECT
e.empid,
e.doj,
e.dos,
DATEDIFF(DAY, e.doj, e.dos) includeweekendandholidays,
COUNT(*) withoutincludeweekendandholidayslist
FROM
emp e
JOIN holidaylist hl ON hl.Date BETWEEN e.doj AND e.dos
WHERE
weekendStatus = 0
AND Holidaystatus = 0
GROUP BY
e.empid,
e.doj,
e.dos,
DATEDIFF(DAY, e.doj, e.dos)
I don't get your output though since it only appears that you're excluding weekends and not holidays..
You can use OUTER APPLY:
SELECT a.empid, a.doj, a.dos,
DATEDIFF(d, a.doj, a.dos) + 1 AS include,
DATEDIFF(d, a.doj, a.dos) + 1 - b.wd - b.hd + b.common AS without
FROM emp AS a
OUTER APPLY (
SELECT SUM(weekendStatus) AS wd,
SUM(Holidaystatus) AS hd,
COUNT(CASE WHEN weekendStatus = 1 AND Holidaystatus = 1 THEN 1 END) AS common
FROM holidaylist
WHERE [Date] BETWEEN a.doj AND a.dos) AS b
For each row of table emp, OUTER APPLY calculates weekendStatus=1 and Holidaystatus=1 rows that correspond to the interval of this row.
Calculated fields selected:
include is the total number of days of the emp interval including weekend days and holidays.
without is the total number of days of the emp interval minus weekend days and holidays. common field makes sure common weekend days and holidays are not subtracted twice.
Note: The above query includes start and end days of the interval in the calculations, so the interval considered is [doj - dos]. You can change the predicate of the WHERE clause in the OUTER APPLY operation so as to exclude start, end, or both, days of the interval.
Demo here
try another way with cross join
select t.empid,t.doj,t.dos,datediff(day,t.doj,t.dos) includeweekendandholidays,
datediff(day,t.doj,t.dos)-isnull(t1.wes,0) as witoutincludeweekendandholidayslist
from #emp t left join (
select empid, sum(hd.Holidaystatus+hd.weekendStatus) wes from
#emp emp cross join #holidaylist hd where hd.[Date] between doj
and dateadd(day,-1,dos) group by empid) t1 on t.empid=t1.empid
sample data
declare #holidaylist table ([Date] date, weekendStatus int, Holidaystatus int)
insert into #holidaylist([Date], weekendStatus, Holidaystatus) values
('2015-12-01' , 0 , 0),
('2015-12-02' , 0 , 0),
('2015-12-03' , 0 , 0),
('2015-12-04' , 1 , 0),
('2015-12-05' , 1 , 0),
('2015-12-06' , 0 , 1),
('2015-12-07' , 0 , 0),
('2015-12-08' , 0 , 0),
('2015-12-09' , 0 , 1),
('2015-12-10' , 0 , 0),
('2015-12-11' , 0 , 0),
('2015-12-12' , 1 , 1),
('2015-12-13' , 1 , 0)
declare #emp table(empid int, doj date, dos date)
insert into #emp (empid,doj,dos) values
(1 , '2015-12-01' , '2015-12-06'),
(2 ,'2015-12-01' , '2015-12-13'),
(3 ,'2015-12-03' ,'2015-12-13')

How to join master table to multiple details table with extra rows of details cell filled with NULL

I am not sure how appropriate is the question title. My problem is similar to the thread How to Join Multiple Detail Tables to Header Table . But this one too giving duplicate records.
Here is my situation
I have a master table and two details tables.
MasterID | Name
----------------------- // Master table
1 Item1
2 Item2
3 Item3
4 Item4
5 Item5
-----------------------
Det1ID | FKMasterID | Value
-----------------------------
1 1 Det1-Val1
2 1 Det1-Val2
3 2 Det1-Val3
Det2ID | FKMasterID | Value
-----------------------------
1 1 Det2-Val1
2 1 Det2-Val2
3 1 Det2-Val3
4 3 Det2-Val4
5 5 Det2-Val5
----------------------------------
The Tables are somewhat like this.
When I make required left-right joins , I get result in this way.
MasterID | Name | Det1ID | Det1Value | Det2ID | Det2Value
------------------------------------------------------------
1 Item1 1 Det1-Val1 1 Det2-Val1
1 Item1 1 Det1-Val1 2 Det2-Val2
1 Item1 1 Det1-Val1 3 Det2-Val3
1 Item1 2 Det1-Val2 1 Det2-Val1
1 Item1 2 Det1-Val2 2 Det2-Val2
1 Item1 2 Det1-Val2 3 Det2-Val3
2 Item2 3 Det1-Val3 NULL NULL
3 Item3 NULL NULL 4 Det2-Val4
4 Item4 NULL NULL NULL NULL
5 Item5 NULL NULL 5 Det2-Val5
-------------------------------------------------------------
What I expect to get is
MasterID | Name | Det1ID | Det1Value | Det2ID | Det2Value
------------------------------------------------------------
1 Item1 1 Det1-Val1 1 Det2-Val1
1 Item1 2 Det1-Val2 2 Det2-Val2
1 Item1 NULL NULL 3 Det2-Val3
2 Item2 3 Det1-Val3 NULL NULL
3 Item3 NULL NULL 4 Det2-Val4
4 Item4 NULL NULL NULL NULL
5 Item5 NULL NULL 5 Det2-Val5
------------------------------------------------------------
I don't want the details value to be duplicated for any of the master item.
Is there any way to do this?? only iterate with a cursor is the way??
A little help is appreciated.
Thank you,
It proved to be a bit more tricky than I initially thought, but the following should do the trick. The code should be pretty self explanatory.
WITH [master] AS(
SELECT * FROM (VALUES
(1, 'Item1')
,(2, 'Item2')
,(3, 'Item3')
,(4, 'Item4')
,(5, 'Item5')
) AS T(ID, Value)
),
Det1 AS (
SELECT * FROM (VALUES
(1, 1, 'Det1-Val1')
,(2, 1, 'Det1-Val2')
,(3, 2, 'Det1-Val3')
) AS T(ID, MasterID, Value)
),
Det2 AS (
SELECT * FROM (VALUES
(1, 1, 'Det2-Val1')
,(2, 1, 'Det2-Val2')
,(3, 1, 'Det2-Val3')
,(4, 3, 'Det2-Val4')
,(5, 5, 'Det2-Val5')
) AS T(ID, MasterID, Value)
),
Det1Numbered AS(
SELECT MasterID = M.ID ,
MasterValue = M.Value ,
Det1ID = D.ID ,
Det1Value = D.Value,
RowNr = ROW_NUMBER() OVER (PARTITION BY M.ID ORDER BY D.ID)
FROM [master] AS M
LEFT JOIN Det1 AS D
ON M.ID = D.MasterID
),
Det2Numbered AS(
SELECT MasterID = M.ID ,
MasterValue = M.Value ,
Det2ID = D.ID ,
Det2Value = D.Value,
RowNr = ROW_NUMBER() OVER (PARTITION BY M.ID ORDER BY D.ID)
FROM [master] AS M
LEFT JOIN Det2 AS D
ON M.ID = D.MasterID
)
SELECT MasterID = COALESCE(D1.MasterID, D2.MasterID),
MasterValue = COALESCE(D1.MasterValue, D2.MasterValue),
D1.Det1ID ,
D1.Det1Value ,
D2.Det2ID ,
D2.Det2Value
FROM Det1Numbered AS D1
FULL JOIN Det2Numbered AS D2
ON D1.MasterID = D2.MasterID
AND D2.RowNr = D1.RowNr
ORDER BY MasterID
Edit: There indeed was a little bug in there, I've updated the query above. The fix is to replace PARTITION BY D.MasterID by PARTITION BY M.ID, now RowNr starts at 1 for each master record which it did not in the previous revision.
I'm not super sure but I think what you want is this:
SELECT m.MasterID, m.Name, d1.DetailsID Det1ID, d1.Value Det1Value, d2.DetailsID Det2ID, d2.Value Det2Value
FROM Details1 d1
FULL OUTER JOIN Details2 d2 ON d1.FKMasterID = d2.FKMasterID AND d1.Value = d2.Value
RIGHT JOIN Master m ON d1.FKMasterID = m.MasterID OR d2.FKMasterID = m.MasterID
This will only show both Details tables if the Values match, which seems to be what you want?

Resources