TSQL - Groups and Islands dates - sql-server

I need a help on writing an optimal query for the below problem. Have attached the query I have with me but it is highly utilizing resources.
Below is the code to achieve above said logic. Please suggest some optimal way to achieve the same
-- drop table #me
create table #ME (memid int , EffectiveDate datetime , termdate datetime)
Insert into #ME values ('123','3-Dec-16','10-Jan-17')
Insert into #ME values ('123','11-Jan-17','6-Feb-17')
Insert into #ME values ('123','7-Feb-17','5-Mar-17')
Insert into #ME values ('123','8-Mar-17','15-Apr-17')
Insert into #ME values ('123','16-Apr-17','24-May-17')
--drop table #dim
select * from #ME
declare #StartDate datetime , #CutoffDate datetime
select #StartDate= min(effectivedate),#CutoffDate = max(termdate) From #me where termdate<>'9999-12-31 00:00:00.000'
SELECT d
into #dim
FROM
(
SELECT d = DATEADD(DAY, rn - 1, #StartDate)
FROM
(
SELECT TOP (DATEDIFF(DAY, #StartDate, #CutoffDate))
rn = ROW_NUMBER() OVER (ORDER BY s1.[object_id])
FROM sys.all_objects AS s1
CROSS JOIN sys.all_objects AS s2
-- on my system this would support > 5 million days
ORDER BY s1.[object_id]
) AS x
) AS y;
--drop table #MemEligibilityDateSpread
select MemID, D As DateSpread Into #MemEligibilityDateSpread From #Dim dim JOIN #me ME on dim.d between ME.effectivedate and me.termdate
--drop table #DateClasified
WITH CTE AS
(
SELECT MEmID,
UniqueDate = DateSpread,
DateGroup = DATEADD(dd, - ROW_NUMBER() OVER (PARTITION BY Memid ORDER BY Memid,DateSpread), DateSpread)
FROM #MemEligibilityDateSpread
GROUP BY Memid,DateSpread
)
--===== Now, if we find the MIN and MAX date for each DateGroup, we'll have the
-- Start and End dates of each group of contiguous daes. While we're at it,
-- we can also figure out how many days are in each range of days.
SELECT Memid,
StartDate = MIN(UniqueDate),
EndDate = MAX(UniqueDate)
INTO #DateClasified
FROM cte
GROUP BY Memid,DateGroup
ORDER BY Memid,StartDate
select ME.MemID,ME.EffectiveDate,ME.TermDate,DC.StartDate,DC.EndDate from #DateClasified dc join #me ME ON Me.MemID = dc.MemID
and (ME.EffectiveDate BETWEEN DC.StartDate AND DC.EndDate
OR ME.TermDate BETWEEN DC.StartDate AND DC.EndDate)

In cte0 and cte1, we create an ad-hoc tally/calendar table. Once we have that, it is a small matter to calculate and group by Island.
Currently, the tally is has a max of 10,000 days (27 years), but you can easily expand the tally table by adding , cte0 N5
;with cte0(N) as (Select 1 From (Values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N(N))
,cte1(R,D) as (Select Row_Number() over (Order By (Select Null))
,DateAdd(DD,-1+Row_Number() over (Order By (Select Null)),(Select MinDate=min(EffectiveDate) From #ME))
From cte0 N1, cte0 N2, cte0 N3, cte0 N4)
Select MemID
,EffectiveDate
,TermDate
,SinceFrom = Min(EffectiveDate) over (Partition By Island)
,Tildate = Max(TermDate) over (Partition By Island)
From (
Select *,Island = R - Row_Number() over (Partition By MemID Order by TermDate)
From #ME A
Join cte1 B on D Between EffectiveDate and TermDate
) A
Group By MemID,Island,EffectiveDate,TermDate
Order By 1,2
Returns
MemID EffectiveDate TermDate SinceFrom Tildate
123 2016-12-03 2017-01-10 2016-12-03 2017-03-05
123 2017-01-11 2017-02-06 2016-12-03 2017-03-05
123 2017-02-07 2017-03-05 2016-12-03 2017-03-05
123 2017-03-08 2017-04-15 2017-03-08 2017-05-24
123 2017-04-16 2017-05-24 2017-03-08 2017-05-24
Edit - Now if you want a compressed dataset
Select MemID
,EffectiveDate = Min(EffectiveDate)
,TermDate = Max(TermDate)
From (
Select *,Island = R - Row_Number() over (Partition By MemID Order by TermDate)
From #ME A
Join cte1 B on D Between EffectiveDate and TermDate
) A
Group By MemID,Island
Order By 1,2
Returns
MemID EffectiveDate TermDate
123 2016-12-03 2017-03-05
123 2017-03-08 2017-05-24

Related

Find overlapping days between two periods of Date

I have two tables, each of which holds the period of dates (from date1 to date2)
i will Find overlapping days between two periods of Date in table1 and table2
Example
table1
-------------------------
id | FromDate | ToDate
1 |2000-01-01 | 2000-02-04
2 |2000-03-01 | 2000-03-29
table2
-------------------------
id | FromDate | ToDate
1 |2000-02-01 | 2000-02-07
2 |2000-03-27 | 2000-03-29
The result I want to have:
2000-02-01
2000-02-02
2000-02-03
2000-02-04
2000-03-27
2000-03-28
2000-03-29
This should work:
CREATE TABLE #t1
(
id int,
FromDate date,
ToDate date
)
CREATE TABLE #t2
(
id int,
FromDate date,
ToDate date
)
INSERT #t1 VALUES
(1, '2000-01-01', '2000-02-04'),
(2, '2000-03-01', '2000-03-29')
INSERT #t2 VALUES
(1, '2000-02-01', '2000-02-07'),
(2, '2000-03-27', '2000-03-29')
WITH DateRange AS --select range where intersection is possible
(
SELECT MAX(MinDate) MinDate,MIN(MaxDate) MaxDate,DATEDIFF(DAY,MAX(MinDate),MIN(MaxDate)) Diff
FROM (VALUES ((SELECT MIN(FromDate) FROM #t1)),((SELECT MIN(FromDate) FROM #t2))) MinDate(MinDate)
CROSS APPLY (VALUES ((SELECT MAX(ToDate) FROM #t1)),((SELECT MAX(ToDate) FROM #t2))) MaxDate(MaxDate)
), AllDates AS --generate sequence of days
(
SELECT MinDate D, MaxDate Limit
FROM DateRange
UNION ALL
SELECT DATEADD(DAY, 1, D), Limit
FROM AllDates
WHERE DATEADD(DAY, 1, D)<=Limit
) --select all days existing in any range in both tables
SELECT D
FROM AllDates
WHERE EXISTS (SELECT * FROM #t1 WHERE D>=FromDate AND D<=ToDate)
AND EXISTS (SELECT * FROM #t2 WHERE D>=FromDate AND D<=ToDate)
It's possible to do this with CTE's and recursion.
--Your sample data
DECLARE #table1 TABLE (id int PRIMARY KEY, FromDate date, ToDate date)
DECLARE #table2 TABLE (id int PRIMARY KEY, FromDate date, ToDate date)
INSERT INTO #table1 VALUES (1, '2000-01-01', '2000-02-04') , (2, '2000-03-01', '2000-03-29')
INSERT INTO #table2 VALUES (1, '2000-02-01', '2000-02-07') , (2, '2000-03-27', '2000-03-29')
--A couple CTE's
;WITH cteDates AS (
SELECT T1.id --get the min and max dates for each id
,CASE WHEN T1.FromDate > T2.FromDate THEN T1.FromDate ELSE T2.FromDate END [mindate]
,CASE WHEN T1.ToDate < T2.ToDate THEN T1.ToDate ELSE T2.ToDate END [maxdate]
FROM #table1 T1 INNER JOIN #table2 T2 ON T1.id = T2.id
)
, cteRecursion AS ( --date range for each id
SELECT id, mindate AS DateValue
FROM cteDates
UNION ALL
SELECT id, DATEADD(DAY, 1, DateValue)
FROM cteRecursion C1
WHERE DATEADD(DAY, 1, DateValue) <= (
SELECT maxDate
FROM cteDates C2
WHERE C2.id = C1.id
)
)
--SELECT query
SELECT DateValue FROM cteRecursion ORDER BY DateValue OPTION (MAXRECURSION 0)
Produces Output:
DateValue
---------
2000-02-01
2000-02-02
2000-02-03
2000-02-04
2000-03-27
2000-03-28
2000-03-29
One possible solution is the with the use of a Numbers or Tally table
;WITH cteNumbers (N)
AS(
SELECT ROW_NUMBER() OVER(ORDER BY N1.N)
FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N1(N)
CROSS JOIN (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N2 (N)
CROSS JOIN (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N3 (N)
)
SELECT T1.FromDate
FROM(
SELECT
T1.FromDate
FROM dbo.Table1 T1
UNION
SELECT
DATEADD(DAY, N, T1.FromDate)
FROM
dbo.Table1 T1
CROSS APPLY cteNumbers N
WHERE N <= DATEDIFF(DAY, T1.FromDate, T1.ToDate)
) T1
WHERE t1.FromDate IN
(
SELECT
T2.FromDate
FROM dbo.Table2 T2
UNION
SELECT
DATEADD(DAY, N, T2.FromDate)
FROM
dbo.Table2 T2
CROSS APPLY cteNumbers N
WHERE N <= DATEDIFF(DAY, T2.FromDate, T2.ToDate)
)
Result is
FromDate
2000-02-01 00:00:00.000
2000-02-02 00:00:00.000
2000-02-03 00:00:00.000
2000-02-04 00:00:00.000
2000-03-27 00:00:00.000
2000-03-28 00:00:00.000
2000-03-29 00:00:00.000
The Numbers/tally table will allow for a daterange of up to 1000 days. If you need more then add another line like so, CROSS JOIN (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N4 (N)

sql query that gets the difference between 2 recent rows for every row item that occurs more than once in a table

Sql query that gets the difference between 2 recent rows for every value that occurs more than once in a table.
for example
book value date
A 4 2017-07-17 09:16:44.480
A 2 2017-08-15 10:05:58.273
B 3 2017-04-15 10:05:58.273
C 2 2017-08-15 10:05:58.273
B 3 2017-04-13 10:05:58.273
B 3 2017-04-12 10:05:58.273
should return
A 2
B 0
Here is a solution:
SELECT book, MAX(value) - MIN(value) AS difference FROM (
SELECT book, value, ROW_NUMBER() OVER (PARTITION BY book ORDER BY date DESC) AS rownum FROM t
) AS a WHERE rownum <= 2 GROUP BY book HAVING MAX(rownum) >= 2
And here it is in SQLFiddle
SELECT id_pk FROM [table] GROUP BY [fields you whant to compare by] HAVING COUNT(*) > 1)
this select returns you the list of pk from element that are repited
so, in other select you migth get another Select like
Select * from [table] where id_pk in(
SELECT id_pk FROM [table] GROUP BY [fields you whant to compare by] HAVING COUNT(*) > 1)) limit 2
this is functional, still not good as i'm not analising complexity.
Add a rownumber before calculating:
create table #test ([book] char(1), [value] int, [date] datetime)
insert into #test values ('A', 4, '2017-07-17 09:16:44.480')
insert into #test values ('A', 2, '2017-08-15 10:05:58.273')
insert into #test values ('B', 3, '2017-04-15 10:05:58.273')
insert into #test values ('C', 2, '2017-08-15 10:05:58.273')
insert into #test values ('B', 3, '2017-04-13 10:05:58.273')
insert into #test values ('B', 3, '2017-04-12 10:05:58.273')
;with cte as(
Select ROW_NUMBER () OVER (order by [book], [date] ) as rownumber, *
from #test)
select distinct [1].book, abs(first_value([1].[Value]) over (partition by [1].book order by [1].rownumber desc) - [2].val2) as [Difference]
from cte [1]
inner join
(select rownumber, book, first_value([Value]) over (partition by book order by rownumber desc) as val2
from cte) [2] on [1].book = [2].book and [1].rownumber < [2].rownumber
I would use analytic functions:
;with CTE as (
SELECT book
,value
,LAG(value) OVER (PARTITION BY book ORDER BY date) last_value
,ROW_NUMBER() OVER (PARTITION BY book ORDER BY date DESC) rn
FROM MyTable
)
SELECT book
,value - last_value as value_change
FROM CTE
WHERE rn = 1
AND last_value IS NOT NULL
LAG() was added in SQL Server 2012, but even if you're on a higher version, your database must have the compatibility version set to 110 or higher for them to be available. Here's an alternative that should work on SQL Server 2005 or higher, or a database compatibility 90 or higher.
;with CTE as (
SELECT book
,value
,ROW_NUMBER() OVER (PARTITION BY book ORDER BY date DESC) rn
FROM MyTable
)
SELECT c1.book
c1.value - c2.value as value_change
FROM CTE c1
INNER JOIN CTE c2
ON c1.book = c2.book
WHERE c1.rn = 1
AND c2.rn = 2

Find Non Consecutive date in SQL Server

I want to find the missing NON-consecutive dates between two consecutive date.
I am posting my SQL query and temp tables to find out the results.
But I am not getting the proper results
Here is my SQL Query
drop table #temp
create table #temp(an varchar(20),dt date)
insert into #temp
select '2133783715' , '2016-10-16' union all
select '5107537880' , '2016-10-15' union all
select '6619324250' , '2016-10-15' union all
select '7146586717' , '2016-10-15' union all
select '7472381321' , '2016-10-12' union all
select '7472381321' , '2016-10-13' union all
select '7472381321' , '2016-10-14' union all
select '7472381321' , '2016-10-24' union all
select '8186056340' , '2016-10-15' union all
select '9099457123' , '2016-10-12' union all
select '9099457123' , '2016-10-13' union all
select '9099457123' , '2016-10-14' union all
select '9099457123' , '2016-10-23' union all
select '9099457123' , '2016-11-01' union all
select '9099457123' , '2016-11-02' union all
select '9099457123' , '2016-11-03' union all
select '9165074784' , '2016-10-16'
drop table #final
SELECT an,MIN(dt) AS MinDate,MAX(dt) AS MaxDate, COUNT(*) AS ConsecutiveUsage
--DateDiff(Day,LAG(MAX(dt)) OVER (partition by an ORDER BY an),MAX(dt)) nonusageDate
into #final
FROM(
SELECT an,dt,
DATEDIFF(D, ROW_NUMBER() OVER(partition by an ORDER BY dt),dt) AS Diff
FROM #temp c
)P
GROUP BY an,diff
select * from #final order by 1
an MinDate MaxDate ConsecutiveUsage
2133783715 2016-10-16 2016-10-16 1
5107537880 2016-10-15 2016-10-15 1
6619324250 2016-10-15 2016-10-15 1
7146586717 2016-10-15 2016-10-15 1
7472381321 2016-10-12 2016-10-14 3
7472381321 2016-10-24 2016-10-24 1
7472381321 2016-10-27 2016-10-28 1
8186056340 2016-10-15 2016-10-15 1
9099457123 2016-10-12 2016-10-14 3
9099457123 2016-10-23 2016-10-23 1
9165074784 2016-10-16 2016-10-16 1
But I want results of non-usage date.
I want to get those AN which has not been used continuously since 10 days.
So here output should be like this:-
an minusagesdate maxusagedate ConsecutiveNotUseddays
7472381321 2016-10-15 2016-10-23 9
7472381321 2016-10-25 2016-10-26 2
9099457123 2016-10-15 2016-10-22 8
So I just want to find out only consecutive not used dates count and their min and max dates .
try this :
with ranked as (
select f1.*,
ROW_NUMBER() over(partition by an order by dt) rang
from #temp f1
where exists
(select * from #temp f2
where f1.an=f2.an and datediff( day, f2.dt, f1.dt) >1
)
)
select an, minusagesdate, maxusagesdate, ConsecutiveNotUseddays
from (
select f1.*,
DATEADD(DAY,1, (select f2.dt from ranked f2 where f1.an=f2.an and f2.rang+1=f1.rang)) minusagesdate ,
DATEADD(DAY,-1, f1.dt) maxusagesdate ,
datediff( day, (select f2.dt from ranked f2 where f1.an=f2.an and f2.rang+1=f1.rang), f1.dt) - 1 ConsecutiveNotUseddays
from ranked f1
) tmp
where tmp.ConsecutiveNotUseddays>0
or like this
with ranked as (
select f1.*,
ROW_NUMBER() over(partition by an order by dt) rang
from #temp f1
where exists
(select * from #temp f2
where f1.an=f2.an and datediff( day, f2.dt, f1.dt) >1
)
)
select f1.an,
DATEADD(DAY,1, f3.dtbefore) minusagesdate ,
DATEADD(DAY,-1, f1.dt) maxusagesdate ,
datediff( day, f3.dtbefore, f1.dt) - 1 ConsecutiveNotUseddays
from ranked f1
outer apply
(
select top 1 f2.dt as dtbefore from ranked f2
where f1.an=f2.an and f2.rang+1=f1.rang
) f3
where datediff( day, f3.dtbefore, f1.dt) - 1>0
It looks like you're trying to count the number of days not used between the mindate and the maxdate for each an. If that's the case, then this should do the trick:
select an, min(dt) as min_dt, max(dt) as max_dt
, count(distinct dt) as daysused --this counts each day used, but only once
, datediff(day,min(dt),max(dt)) as totaldays --this is the total number of days between min and max date
, datediff(day,min(dt),max(dt)) - count(distinct dt) as daysnotused
--This takes total days - used days to give non-used days
from #temp c
group by an
having datediff(day,min(dt),max(dt)) - count(distinct dt) >= 10
As I understood you need this:
;WITH cte AS (
SELECT an,
dt,
ROW_NUMBER() OVER (PARTITION BY an ORDER BY dt) as rn
FROM #temp
)
SELECT c1.an,
c1.dt MinDate,
c2.dt MaxDate,
DATEDIFF(day,c1.dt,c2.dt) as ConsecutiveNotUseddays
FROM cte c1
INNER JOIN cte c2
ON c1.an = c2.an AND c1.rn = c2.rn-1
WHERE DATEDIFF(day,c1.dt,c2.dt) >= 10
Output:
an MinDate MaxDate ConsecutiveNotUseddays
7472381321 2016-10-14 2016-10-24 10
For 9099457123 I got two rows with 9 in ConsecutiveNotUseddays. You can check results removing WHERE statement.
On any newer version of SQL Server this should be easy:
with x as (
select *, lag(dt) over(partition by an order by dt) dt_lag
from #temp
)
select *, datediff(day, dt_lag, dt)
from x
where datediff(day, dt_lag, dt) >= 10

Split number into rows so that they sum up to the original number

I have a table [tbl] with money values
id mon
1 10.17
2 36.00
I need to split these values into rows by a set of specific ranges [1.00,10.00,25.00]. The sum of the new values grouped by id will equal the original value.
id mon sum
1 1.00 1.00
1 9.17 10.17
2 1.00 1.00
2 10.00 11.00
2 25.00 36.00
Is there any way to do this without using a cursor?
Here's one way to do it:
;with CTE as (select t2.value, t1.id, sum(t2.value)
over (partition by t1.id order by t2.value asc) as total
from table1 t1 join table2 t2 on t1.mon >= t2.limit
)
select id, value, total from CTE
union all
select t1.id, t1.mon - c.total, t1.mon
from table1 t1
outer apply (select top 1 id, total from CTE c
where c.id = t1.id order by c.value desc) c
where t1.mon > c.total
order by 1,3
This uses additional table that has the limits stored to join with the original data and then uses running total in a CTE and joins that to the original table to get the remaining amounts
You can test the example in SQL Fiddle
Here is my attempt using window functions and CROSS APPLY:
;WITH Cte(s) AS(
SELECT CAST(1 AS MONEY) UNION ALL
SELECT 10 UNION ALL
SELECT 25
)
,CteRange AS(
SELECT
s,
e = SUM(s) OVER(ORDER BY s)
FROM Cte
)
SELECT
t.id,
mon = CASE WHEN t.mon > x.e THEN x.s ELSE mon - LAG(x.e) OVER(PARTITION BY t.id ORDER BY x.s) END,
[sum] = CASE WHEN t.mon < x.e THEN t.mon ELSE x.e END
FROM tbl t
CROSS APPLY(
SELECT * FROM CteRange
)x
WHERE t.mon > x.s
UNION ALL
SELECT
t.id,
mon = t.mon - x.e,
[sum] = t.mon
FROM tbl t
CROSS APPLY(
SELECT TOP 1 e
FROM CteRange
ORDER BY e DESC
)x(e)
WHERE t.mon > e
ORDER BY t.id, mon
SQL Fiddle
This works for your given example data, you just need to predefine ranges all by yourself (I've used CROSS JOIN VALUES, but this can be done however you want/prefer). I think that's not an issue. I've used running SUM and analytic functions to achieve that.
DECLARE #tbl TABLE
(
id INT IDENTITY (1, 1)
, mon MONEY
);
INSERT INTO #tbl (mon)
VALUES (10.17), (36.00);
SELECT id
, [sum] - SUM(lagRange) OVER (PARTITION BY ID ORDER BY rangeId) AS mon
, [sum]
FROM (
SELECT id, rangeId
, LAG(rangeValue, 1, 0) OVER (PARTITION BY ID ORDER BY rangeId) AS lagRange
, CASE
WHEN SUM(rangeValue) OVER (PARTITION BY ID ORDER BY rangeId) > mon THEN mon
ELSE SUM(rangeValue) OVER (PARTITION BY ID ORDER BY rangeId)
END AS [sum]
FROM #tbl
CROSS JOIN (VALUES ((1), (1.00)), ((2), (10.00)), ((3), (25.00))) AS T(rangeId, rangeValue)
WHERE rangeValue <= mon
) AS T;
Results:
id mon sum
-----------------
1 1.00 1.00
1 9.17 10.17
2 1.00 1.00
2 10.00 11.00
2 25.00 36.00

Concatenate date ranges in SQL (T/SQL preferred)

I need to concatenate rows with a date and a code into a date range
Table with two columns that are a composite primary key (date and a code )
Date Code
1/1/2011 A
1/2/2011 A
1/3/2011 A
1/1/2011 B
1/2/2011 B
2/1/2011 A
2/2/2011 A
2/27/2011 A
2/28/2011 A
3/1/2011 A
3/2/2011 A
3/3/2011 A
3/4/2011 A
Needs to be converted to
Start Date End Date Code
1/1/2011 1/3/2011 A
2/1/2011 2/2/2011 A
1/1/2011 1/2/2011 B
2/27/2011 3/4/2011 A
Is there any other way or is a cursor loop the only way?
declare #T table
(
[Date] date,
Code char(1)
)
insert into #T values
('1/1/2011','A'),
('1/2/2011','A'),
('1/3/2011','A'),
('1/1/2011','B'),
('1/2/2011','B'),
('3/1/2011','A'),
('3/2/2011','A'),
('3/3/2011','A'),
('3/4/2011','A')
;with C as
(
select *,
datediff(day, 0, [Date]) - row_number() over(partition by Code
order by [Date]) as rn
from #T
)
select min([Date]) as StartDate,
max([Date]) as EndDate,
Code
from C
group by Code, rn
sql server 2000 has it limitations. Rewrote the solution to make it more readable.
declare #t table
(
[Date] datetime,
Code char(1)
)
insert into #T values
('1/1/2011','A'),
('1/2/2011','A'),
('1/3/2011','A'),
('1/1/2011','B'),
('1/2/2011','B'),
('3/1/2011','A'),
('3/2/2011','A'),
('3/3/2011','A'),
('3/4/2011','A')
select a.code, a.date, min(b.date)
from
(
select *
from #t t
where not exists (select 1 from #t where t.code = code and t.date -1 = date)
) a
join
(
select *
from #t t
where not exists (select 1 from #t where t.code = code and t.date = date -1)
) b
on a.code = b.code and a.date <= b.date
group by a.code, a.date
Using a DatePart function for month will get you the "groups" you want
SELECT Min(Date) as StartDate, Max(Date) as EndDate, Code
FROM ThisTable Group By DatePart(m, Date), Code

Resources