Group up rows based on date overlapping - sql-server

In a same id, if any of row's effective date and enddate overlaps then we need group it up in a unique id
In below image dategroup is the desired output column
Data is sorted in order by ID asc, EffectiveDate ASC, EndDate Desc
CREATE TABLE #DataTable (id int , EffectiveDate datetime, Enddate Datetime )
INSERT [dbo].#DataTable ([id], [EffectiveDate], [Enddate]) VALUES (1, CAST(N'2017-01-01 00:00:00.000' AS DateTime), CAST(N'2017-01-11 00:00:00.000' AS DateTime))
GO
INSERT [dbo].#DataTable ([id], [EffectiveDate], [Enddate]) VALUES (1, CAST(N'2017-01-02 00:00:00.000' AS DateTime), CAST(N'2017-01-05 00:00:00.000' AS DateTime))
GO
INSERT [dbo].#DataTable ([id], [EffectiveDate], [Enddate]) VALUES (1, CAST(N'2017-01-03 00:00:00.000' AS DateTime), CAST(N'2017-01-12 00:00:00.000' AS DateTime))
GO
INSERT [dbo].#DataTable ([id], [EffectiveDate], [Enddate]) VALUES (1, CAST(N'2017-01-06 00:00:00.000' AS DateTime), CAST(N'2017-01-09 00:00:00.000' AS DateTime))
GO
INSERT [dbo].#DataTable ([id], [EffectiveDate], [Enddate]) VALUES (1, CAST(N'2017-01-13 00:00:00.000' AS DateTime), CAST(N'2017-01-19 00:00:00.000' AS DateTime))
GO
INSERT [dbo].#DataTable ([id], [EffectiveDate], [Enddate]) VALUES (2, CAST(N'2017-02-01 00:00:00.000' AS DateTime), CAST(N'2017-02-11 00:00:00.000' AS DateTime))
GO
INSERT [dbo].#DataTable ([id], [EffectiveDate], [Enddate]) VALUES (2, CAST(N'2017-02-06 00:00:00.000' AS DateTime), CAST(N'2017-02-16 00:00:00.000' AS DateTime))
GO

Try this, Hope it helps. Not the most attractive code but it should work. I may clean it up later if I find some time.
;WITH cte_StepOne as
(
SELECT ROW_NUMBER() OVER (ORDER BY a.[id],
a.[EffectiveDate],
a.[Enddate]) AS SeqNo,
a.[id],
a.[EffectiveDate],
a.[Enddate],
b.[id] AS OverLapID,
b.[EffectiveDate] AS [OverLapEffectiveDate],
b.[Enddate] AS [OverLapEnddate]
FROM ##DataTable a
LEFT JOIN ##DataTable b
ON a.EffectiveDate BETWEEN b.EffectiveDate
AND b.EndDate
AND a.EffectiveDate <> b.EffectiveDate
AND a.EndDate <> b.EndDate --and a.ID <> b.ID
)
,cte_StepTwo AS
(
SELECT SeqNo,
id,
EffectiveDate,
Enddate,
LEAD(OverLapEffectiveDate, 1) OVER (ORDER BY SeqNo) AS LeadValue,LAG(id, 1) OVER (ORDER BY SeqNo) AS LeadValueID,
OverLapID,
OverLapEffectiveDate,
OverLapEnddate
FROM cte_StepOne
)
,cte_Result AS
(
SELECT id,
EffectiveDate,
Enddate,
CASE
WHEN LeadValue = EffectiveDate AND OverLapEffectiveDate IS NULL THEN ID
WHEN OverLapID IS NULL THEN LeadValueID + 1
ELSE OverLapID
END AS OverLapID,
CASE
WHEN LeadValue = EffectiveDate AND OverLapEffectiveDate IS NULL THEN EffectiveDate
ELSE OverLapEffectiveDate
END AS OverLapEffectiveDate,
CASE
WHEN LeadValue = EffectiveDate AND OverLapEffectiveDate IS NULL THEN Enddate
ELSE OverLapEnddate
END AS OverLapEnddate
FROM cte_StepTwo
)
SELECT DISTINCT id,
EffectiveDate,
Enddate,
DENSE_RANK() OVER (ORDER BY ID,OverLapID) AS DateGroup
FROM cte_Result
ORDER BY id,EffectiveDate
Result:

This answer takes the approach of trying to identify records for which the running DateGroup counter should be incremented. Ultimately, we will assign a value of 1 to such records. With this assignment in hand, we can then simply take a cumulative sum to generate the DateGroup.
-- this CTE identifies all new ID records
WITH cte1 AS (
SELECT t.ID, t.EffectiveDate, t.EndDate
FROM
(
SELECT ID, EffectiveDate, EndDate,
ROW_NUMBER() OVER (PARTITION BY ID ORDER BY EffectiveDate) rn
FROM yourTable
) t
WHERE t.rn = 1
),
-- this CTE identifies all records whose both effective and end dates
-- do not fall within the range of the start ID record
cte2 AS (
SELECT t1.ID, t1.EffectiveDate, t1.EndDate
FROM yourTable t1
INNER JOIN cte1 t2
ON t1.ID = t2.ID AND
t1.EffectiveDate NOT BETWEEN t2.EffectiveDate AND t2.EndDate AND
t1.EndDate NOT BETWEEN t2.EffectiveDate AND t2.EndDate
),
-- this CTE returns the original table with a new column, amount, which
-- contains a value of 1 should that record cause the DateGroup to be
-- incremented by 1
cte3 AS (
SELECT t1.ID, t1.EffectiveDate, t1.EndDate,
CASE WHEN t2.ID IS NOT NULL OR t3.ID IS NOT NULL THEN 1 ELSE 0 END AS amount
FROM yourTable t1
LEFT JOIN cte1 t2
ON t1.ID = t2.ID AND
t1.EffectiveDate = t2.EffectiveDate AND
t1.EndDate = t2.EndDate
LEFT JOIN cte2 t3
ON t1.ID = t3.ID AND
t1.EffectiveDate = t3.EffectiveDate AND
t1.EndDate = t3.EndDate
)
-- finally, take a cumulative sum of the 'amount' column to generate the DateGroup
SELECT t1.ID,
t1.EffectiveDate,
t1.EndDate,
SUM(t2.amount) AS DateGroup
FROM cte3 t1
INNER JOIN cte3 t2
ON t1.ID >= t2.ID AND
t1.EffectiveDate >= t2.EffectiveDate
GROUP BY t1.id, t1.EffectiveDate, t1.EndDate;
Output:
Demo here:
Rextester
Data used:
CREATE TABLE yourTable (ID int, EffectiveDate datetime, EndDate datetime);
INSERT INTO yourTable
VALUES
(1, '2017-01-01 00:00:00.000', '2017-01-11 00:00:00.000'),
(1, '2017-01-02 00:00:00.000', '2017-01-05 00:00:00.000'),
(1, '2017-01-03 00:00:00.000', '2017-01-12 00:00:00.000'),
(1, '2017-01-06 00:00:00.000', '2017-01-09 00:00:00.000'),
(1, '2017-01-13 00:00:00.000', '2017-01-19 00:00:00.000'),
(2, '2017-02-01 00:00:00.000', '2017-02-11 00:00:00.000'),
(2, '2017-02-06 00:00:00.000', '2017-02-16 00:00:00.000');

What about this? It's simpler that other solutions posted:
WITH
CTE_GetFirstRecordForEachId AS
(
SELECT
id,
EffectiveDate,
Enddate,
rn = ROW_NUMBER() OVER (PARTITION BY id ORDER BY EffectiveDate, EndDate)
FROM
#DataTable
),
CTE_GetOutOfDateRange AS
(
SELECT
a.*,
OutOfDateRange =
CASE WHEN (b.EffectiveDate>=a.EffectiveDate AND b.EffectiveDate<=b.Enddate) OR (b.Enddate>=a.EffectiveDate AND b.Enddate<=b.Enddate)
THEN 0
ELSE 1
END
FROM
#DataTable a
INNER JOIN
CTE_GetFirstRecordForEachId b ON a.id = b.id AND b.rn=1
)
SELECT
id,
Effectivedate,
Enddate,
DateGroup = DENSE_RANK() OVER (ORDER BY id, OutOfDateRange)
FROM
CTE_GetOutOfDateRange
ORDER BY
id, Effectivedate, Enddate
Output:
id Effectivedate Enddate DateGroup
----------- ----------------------- ----------------------- --------------------
1 2017-01-01 00:00:00.000 2017-01-11 00:00:00.000 1
1 2017-01-02 00:00:00.000 2017-01-05 00:00:00.000 1
1 2017-01-03 00:00:00.000 2017-01-12 00:00:00.000 1
1 2017-01-06 00:00:00.000 2017-01-09 00:00:00.000 1
1 2017-01-13 00:00:00.000 2017-01-19 00:00:00.000 2
2 2017-02-01 00:00:00.000 2017-02-11 00:00:00.000 3
2 2017-02-06 00:00:00.000 2017-02-16 00:00:00.000 3

What about this (I am still testing it)
WITH Z AS
(SELECT * FROM (SELECT ID, [EffectiveDate], ENDDate
, LAG(ID) OVER (PARTITION BY ID ORDER BY EffectiveDate, ENDDate Desc) AS ID_Prec
, LAG(EffectiveDate) OVER (PARTITION BY ID ORDER BY EffectiveDate, ENDDate Desc) AS EffDate_Prec
, LAG(ENDDate) OVER (PARTITION BY ID ORDER BY EffectiveDate, ENDDate Desc) AS EndDate_Prec
, ROW_NUMBER() OVER (ORDER BY ID, EffectiveDate,ENDDate DESC) AS RN
, 1 AS DATEGROUP
FROM #DataTable ) C WHERE RN = 1
UNION ALL
SELECT A.ID, A.EffectiveDate, A.Enddate
, A.ID_Prec, A.EffDate_Prec
, A.EndDate_Prec
, A.RN
, CASE WHEN A.ID = A.ID_PREC AND (A.EffectiveDate <=A.EndDate_Prec /* OR A.EndDate>=A.EffDate_Prec*/) THEN Z.DATEGROUP
ELSE Z.DATEGROUP+1 END AS DATEGROUP
FROM (SELECT A.ID, A.EffectiveDate, A.ENDDate
, LAG(A.ID) OVER (PARTITION BY A.ID ORDER BY A.EffectiveDate, A.ENDDate Desc) AS ID_Prec
, LAG(A.EffectiveDate) OVER (PARTITION BY A.ID ORDER BY A.EffectiveDate, A.ENDDate Desc) AS EffDate_Prec
, LAG(A.ENDDate) OVER (PARTITION BY A.ID ORDER BY A.EffectiveDate, A.ENDDate Desc) AS EndDate_Prec
, ROW_NUMBER() OVER (ORDER BY A.ID, A.EffectiveDate,A.ENDDate DESC) AS RN
, 1 AS DATEGROUP
FROM #DataTable A) A
INNER JOIN Z ON A.RN -1= Z.RN
)
SELECT ID, EffectiveDate, Enddate, DATEGROUP FROM Z
Output:
ID EffectiveDate Enddate DATEGROUP
----------- ----------------------- ----------------------- -----------
1 2017-01-01 00:00:00.000 2017-01-11 00:00:00.000 1
1 2017-01-02 00:00:00.000 2017-01-05 00:00:00.000 1
1 2017-01-03 00:00:00.000 2017-01-12 00:00:00.000 1
1 2017-01-06 00:00:00.000 2017-01-09 00:00:00.000 1
1 2017-01-13 00:00:00.000 2017-01-19 00:00:00.000 2
2 2017-02-01 00:00:00.000 2017-02-11 00:00:00.000 3
2 2017-02-06 00:00:00.000 2017-02-16 00:00:00.000 3

guess you are missing some test scenario in your sample date.
;with CTE as
(
select *,ROW_NUMBER()over(order by id, effectivedate)rn
from #DataTable
)
,CTE1 AS
(
select *, 1 New_ID
from cte
where rn=1
union ALL
select c.id,c.effectivedate,c.enddate,c.rn
,case when c.effectivedate between c1.effectivedate
and c1.enddate
and c.id=c1.id then c1.New_ID
else c1.New_ID+1
END
from cte c
inner join cte1 c1
on c.rn=c1.rn+1
and c.rn>1 and c.rn<=7
)
select * from cte1
drop table #DataTable

this may help you. I posted here shortest and simplest version of tsql...
WITH CTE AS (
SELECT *,
ISNULL(LAG(EffectiveDate) OVER (PARTITION BY id ORDER BY id,EffectiveDate,Enddate),EffectiveDate) AS PreviousEffDate,
ISNULL(LAG(Enddate) OVER (PARTITION BY id ORDER BY id,EffectiveDate,Enddate),Enddate) AS PreviousEndDate
FROM #DataTable)
SELECT id,
EffectiveDate,
Enddate,
DENSE_RANK() OVER (ORDER BY id,CASE
WHEN EffectiveDate BETWEEN PreviousEffDate AND PreviousEndDate OR
Enddate BETWEEN PreviousEffDate AND PreviousEndDate OR
PreviousEffDate BETWEEN EffectiveDate AND Enddate OR
PreviousEndDate BETWEEN EffectiveDate AND Enddate
THEN 0
ELSE 1
END) AS DateGroup
FROM CTE
Result:

Have got this one from another forum; altered as per my requirement . looks simple and effective.
WITH C1 AS (
SELECT *,
CASE WHEN EffectiveDate <= MAX(ISnull(EndDate,'9999-12-31 00:00:00.000')) OVER(partition by id ORDER BY EffectiveDate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) THEN 0 ELSE 1 END AS isstart
FROM #DataTable
)
SELECT ID,EffectiveDate,EndDate,
SUM(isstart) OVER(ORDER BY ID ROWS UNBOUNDED PRECEDING) AS DG
FROM C1

Related

Repeated data on inserted rows

--demo setup
drop table if exists dbo.product
go
create table dbo.Product
(
ProductId int,
ProductTitle varchar(55),
ProductCategory varchar(255),
Loaddate datetime
)
insert into dbo.Product
values (1, 'Table', 'ABCD', '3/4/2018'),
(1, 'Table', 'ABCD', '3/5/2018'),
(1, 'Table', 'ABCD', '3/6/2018'),
(1, 'Table', 'XYZ', '3/7/2018'),
(1, 'Table', 'XYZ', '3/8/2018'),
(1, 'Table', 'XYZ', '3/9/2018'),
(1, 'Table', 'GHI', '3/10/2018'),
(1, 'Table', 'GHI', '3/11/2018'),
(1, 'Table', 'XYZ', '3/12/2018'),
(1, 'Table', 'XYZ', '3/13/2018')
SELECT
product.productid,
product.producttitle,
product.productcategory,
MIN(product.loaddate) AS BeginDate,
-- ,max(product.LoadDate) as BeginDate1
CASE
WHEN MAX(product.loaddate) = MAX(oa.enddate1)
THEN '12/31/9999'
ELSE MAX(product.loaddate)
END AS EndDate
FROM
dbo.product product
CROSS APPLY
(SELECT MAX(subproduct.loaddate) EndDate1
FROM dbo.product subproduct
WHERE subproduct.productid = product.productid) oa
GROUP BY
productid, producttitle, productcategory
Output:
productid
producttitle
productcategory
BeginDate
EndDate
1
Table
ABCD
2018-03-04 00:00:00.000
2018-03-06 00:00:00.000
1
Table
XYZ
2018-03-07 00:00:00.000
9999-12-31 00:00:00.000
1
Table
GHI
2018-03-10 00:00:00.000
2018-03-11 00:00:00.000
Desired output:
productid
producttitle
productcategory
BeginDate
EndDate
1
Table
ABCD
2018-03-04 00:00:00.000
2018-03-06 00:00:00.000
1
Table
XYZ
2018-03-07 00:00:00.000
2018-03-09 00:00:00.000
1
Table
GHI
2018-03-10 00:00:00.000
2018-03-11 00:00:00.000
1
Table
XYZ
2018-03-12 00:00:00.000
9999-12-31 00:00:00.000
The last two inserted rows repeat the data from Loaddate '3/7/2018'-'3/9/2018', this doesn't happen if any of the new inserted rows doesn't repeat data. The only thing that changes is the LoadDate, giving me incorrect output. how can i get something like that desired output?
Well, first of all, you need to find a sequence number over all your records. If you already have a primary key, that's good. In example you gave us, there's no such column, so let's generate it.
Then, we make pairs with start and end dates for each product's category change. Another thing is to group all these product's category changes.
Finally, we make just a simple group by:
;
with cte as ( select *,
row_number() over(partition by ProductId order by Loaddate) as rn
from product
), cte2 as ( select t1.ProductId,
t1.ProductTitle,
t1.ProductCategory,
t1.Loaddate as BeginDate,
case
when t1.ProductCategory <> t2.ProductCategory
then t1.Loaddate
else coalesce(t2.Loaddate, null)
end as EndDate,
row_number() over(order by t1.ProductId, t1.Loaddate) as rn_overall,
row_number() over(partition by t1.ProductId, t1.ProductCategory order by t1.Loaddate) as rn_category
from cte as t1
left join cte as t2
on t2.ProductId = t1.ProductId
and t2.rn = t1.rn + 1
), cte3 as ( select *,
min(rn_overall) over (partition by ProductId, ProductCategory, rn_overall - rn_category) as product_group
from cte2
)
select ProductId, ProductTitle, ProductCategory,
min(BeginDate) as BeginDate,
case
when max(case when EndDate is null then 1 else 0 end) = 0
then max(EndDate)
else null
end as EndDate
from cte3
group by ProductId, ProductTitle, ProductCategory, product_group
order by ProductId, BeginDate

Find overlapping days between two periods of Date

I have two tables, each of which holds the period of dates (from date1 to date2)
i will Find overlapping days between two periods of Date in table1 and table2
Example
table1
-------------------------
id | FromDate | ToDate
1 |2000-01-01 | 2000-02-04
2 |2000-03-01 | 2000-03-29
table2
-------------------------
id | FromDate | ToDate
1 |2000-02-01 | 2000-02-07
2 |2000-03-27 | 2000-03-29
The result I want to have:
2000-02-01
2000-02-02
2000-02-03
2000-02-04
2000-03-27
2000-03-28
2000-03-29
This should work:
CREATE TABLE #t1
(
id int,
FromDate date,
ToDate date
)
CREATE TABLE #t2
(
id int,
FromDate date,
ToDate date
)
INSERT #t1 VALUES
(1, '2000-01-01', '2000-02-04'),
(2, '2000-03-01', '2000-03-29')
INSERT #t2 VALUES
(1, '2000-02-01', '2000-02-07'),
(2, '2000-03-27', '2000-03-29')
WITH DateRange AS --select range where intersection is possible
(
SELECT MAX(MinDate) MinDate,MIN(MaxDate) MaxDate,DATEDIFF(DAY,MAX(MinDate),MIN(MaxDate)) Diff
FROM (VALUES ((SELECT MIN(FromDate) FROM #t1)),((SELECT MIN(FromDate) FROM #t2))) MinDate(MinDate)
CROSS APPLY (VALUES ((SELECT MAX(ToDate) FROM #t1)),((SELECT MAX(ToDate) FROM #t2))) MaxDate(MaxDate)
), AllDates AS --generate sequence of days
(
SELECT MinDate D, MaxDate Limit
FROM DateRange
UNION ALL
SELECT DATEADD(DAY, 1, D), Limit
FROM AllDates
WHERE DATEADD(DAY, 1, D)<=Limit
) --select all days existing in any range in both tables
SELECT D
FROM AllDates
WHERE EXISTS (SELECT * FROM #t1 WHERE D>=FromDate AND D<=ToDate)
AND EXISTS (SELECT * FROM #t2 WHERE D>=FromDate AND D<=ToDate)
It's possible to do this with CTE's and recursion.
--Your sample data
DECLARE #table1 TABLE (id int PRIMARY KEY, FromDate date, ToDate date)
DECLARE #table2 TABLE (id int PRIMARY KEY, FromDate date, ToDate date)
INSERT INTO #table1 VALUES (1, '2000-01-01', '2000-02-04') , (2, '2000-03-01', '2000-03-29')
INSERT INTO #table2 VALUES (1, '2000-02-01', '2000-02-07') , (2, '2000-03-27', '2000-03-29')
--A couple CTE's
;WITH cteDates AS (
SELECT T1.id --get the min and max dates for each id
,CASE WHEN T1.FromDate > T2.FromDate THEN T1.FromDate ELSE T2.FromDate END [mindate]
,CASE WHEN T1.ToDate < T2.ToDate THEN T1.ToDate ELSE T2.ToDate END [maxdate]
FROM #table1 T1 INNER JOIN #table2 T2 ON T1.id = T2.id
)
, cteRecursion AS ( --date range for each id
SELECT id, mindate AS DateValue
FROM cteDates
UNION ALL
SELECT id, DATEADD(DAY, 1, DateValue)
FROM cteRecursion C1
WHERE DATEADD(DAY, 1, DateValue) <= (
SELECT maxDate
FROM cteDates C2
WHERE C2.id = C1.id
)
)
--SELECT query
SELECT DateValue FROM cteRecursion ORDER BY DateValue OPTION (MAXRECURSION 0)
Produces Output:
DateValue
---------
2000-02-01
2000-02-02
2000-02-03
2000-02-04
2000-03-27
2000-03-28
2000-03-29
One possible solution is the with the use of a Numbers or Tally table
;WITH cteNumbers (N)
AS(
SELECT ROW_NUMBER() OVER(ORDER BY N1.N)
FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N1(N)
CROSS JOIN (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N2 (N)
CROSS JOIN (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N3 (N)
)
SELECT T1.FromDate
FROM(
SELECT
T1.FromDate
FROM dbo.Table1 T1
UNION
SELECT
DATEADD(DAY, N, T1.FromDate)
FROM
dbo.Table1 T1
CROSS APPLY cteNumbers N
WHERE N <= DATEDIFF(DAY, T1.FromDate, T1.ToDate)
) T1
WHERE t1.FromDate IN
(
SELECT
T2.FromDate
FROM dbo.Table2 T2
UNION
SELECT
DATEADD(DAY, N, T2.FromDate)
FROM
dbo.Table2 T2
CROSS APPLY cteNumbers N
WHERE N <= DATEDIFF(DAY, T2.FromDate, T2.ToDate)
)
Result is
FromDate
2000-02-01 00:00:00.000
2000-02-02 00:00:00.000
2000-02-03 00:00:00.000
2000-02-04 00:00:00.000
2000-03-27 00:00:00.000
2000-03-28 00:00:00.000
2000-03-29 00:00:00.000
The Numbers/tally table will allow for a daterange of up to 1000 days. If you need more then add another line like so, CROSS JOIN (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N4 (N)

T-SQL - timespan by overlapping datetime columns

I want maximum period of date range that is overlapping each other and if the period is not clashing other date ranges then I want it as it is.
I have this table:
CREATE TABLE [dbo].[table1]
(
[id] [numeric](18, 0) IDENTITY(1,1) NOT NULL,
[StartDate] [datetime] NOT NULL,
[EndDate] [datetime] NOT NULL
)
And their respective values:
INSERT INTO [dbo].[table1]
VALUES (CAST('2013-11-01 00:00:00.000' AS DateTime), CAST('2013-11-10 00:00:00.000' AS DateTime)),
(CAST('2013-11-05 00:00:00.000' AS DateTime), CAST('2013-11-15 00:00:00.000' AS DateTime)),
(CAST('2013-11-10 00:00:00.000' AS DateTime), CAST('2013-11-15 00:00:00.000' AS DateTime)),
(CAST('2013-11-10 00:00:00.000' AS DateTime), CAST('2013-11-25 00:00:00.000' AS DateTime)),
(CAST('2013-11-26 00:00:00.000' AS DateTime), CAST('2013-11-29 00:00:00.000' AS DateTime))
And expected result is:
ID StartDate EndDate
--------------------------------------------------------
1 2013-11-01 00:00:00.000 2013-11-25 00:00:00.000
2 2013-11-26 00:00:00.000 2013-11-29 00:00:00.000
Thanks in advance.
// Edit 1: Thanks.
Works, but there is a new question for breaks in the same table
INSERT INTO [dbo].[table1]
VALUES (CAST('2018-05-03 08:30:00.000' AS DateTime), CAST('2018-05-03 08:45:00.000' AS DateTime)),
(CAST('2018-05-03 08:45:00.000' AS DateTime), CAST('2018-05-03 09:30:00.000' AS DateTime)),
(CAST('2018-05-03 08:45:00.000' AS DateTime), CAST('2018-05-03 11:30:00.000' AS DateTime)),
(CAST('2018-05-03 12:45:00.000' AS DateTime), CAST('2018-05-03 13:00:00.000' AS DateTime)),
(CAST('2018-05-03 14:00:00.000' AS DateTime), CAST('2018-05-03 15:45:00.000' AS DateTime)),
(CAST('2018-05-03 14:15:00.000' AS DateTime), CAST('2018-05-03 15:30:00.000' AS DateTime))
And expected result is:
ID StartDate EndDate
--------------------------------------------------------
1 2018-05-03 08:30:00.000 2018-05-03 11:30:00.000
2 2018-05-03 12:45:00.000 2018-05-03 13:00:00.000
3 2018-05-03 14:00:00.000 2018-05-03 15:45:00.000
Very similar answer, but making use of an index and windowed functions to make the gaps and islands analysis cheaper (faster).
http://sqlfiddle.com/#!18/f19569/3
SELECT
ROW_NUMBER() OVER (ORDER BY MIN(StartDate)),
MIN(StartDate),
MAX(EndDate)
from
(
SELECT
*,
SUM(CASE WHEN PrecedingEndDate >= StartDate THEN 0 ELSE 1 END)
OVER (ORDER BY StartDate, EndDate)
AS GroupID
FROM
(
SELECT
*,
MAX(EndDate)
OVER (ORDER BY StartDate, EndDate
ROWS BETWEEN UNBOUNDED PRECEDING
AND 1 PRECEDING
)
AS PrecedingEndDate
FROM
Table1
)
look_back
)
grouped
GROUP BY
GroupID
This is a form of the gaps and islands problem.
In this case, exists and cumulative sum and group by are the route to the solution:
select row_number() over (order by min(startdate)),
min(startdate), max(enddate)
from (select t1.*, sum(isstart) over (order by startdate) as grp
from (select t1.*,
(case when exists (select 1
from table1 tt1
where tt1.startdate <= t1.enddate and tt1.enddate >= t1.startdate and tt1.id <> t1.id
)
then 0 else 1
end) as isstart
from table1 t1
) t1
) t1
group by grp;

How do I add original amount of oldest transaction in an aggregate function?

I'm trying to find out how to include the original amount of the first transaction (oldest by Posted Date) to an aggregate query.
The following finds reversed transactions ..
SELECT DISTINCT
[Account], [Voucher],
[DocumentDate],
SUM([Amount])
FROM
MyTable
WHERE
[Account] = 'abc'
GROUP BY
[Account], [Voucher], [DocumentDate]
HAVING
SUM([Amount]) = 0
How would I include the amount in the results for the transaction with the oldest posted date for each record?
For example, using the following:
Account Voucher DocumentDate PostedDate Amount
---------------------------------------------------------
abc 1 01/01/2018 08/01/2018 100.00
abc 1 01/01/2018 15/01/2018 -100.00
The expected result would be:
Account Voucher DocumentDate OriginalAmount Sum(Amount) Records
-------------------------------------------------------------------------
abc 1 01/01/2018 100.00 0.00 2
One way to do it is using a cte with first_value, sum...over and count...over.
First, create and populate sample table (Please save us this step in your future questions)
DECLARE #T AS TABLE
(
Account char(3),
Voucher int,
DocumentDate date,
PostedDate date,
Amount numeric(5,2)
)
INSERT INTO #T VALUES
('abc', 1, '2018-01-01', '2018-01-08', 100),
('abc', 1, '2018-01-01', '2018-01-15', -100)
The cte:
;WITH CTE
AS
(
SELECT [Account],
[Voucher],
[DocumentDate],
FIRST_VALUE(Amount) OVER(PARTITION BY [Account], [Voucher], [DocumentDate] ORDER BY PostedDate) AS OriginalAmount,
SUM([Amount]) OVER(PARTITION BY [Account], [Voucher], [DocumentDate]) AS [Sum(Amount)],
COUNT(*) OVER(PARTITION BY [Account], [Voucher], [DocumentDate]) Records
FROM
#T
WHERE
[Account] = 'abc'
)
The query:
SELECT DISTINCT *
FROM CTE
WHERE [Sum(Amount)] = 0
Results:
Account Voucher DocumentDate OriginalAmount Sum(Amount) Records
abc 1 01.01.2018 00:00:00 100,00 0,00 2
See a live demo on rextester.
It seems straightforward ... am I missing something?
WITH CTE AS
(
SELECT
[Account],
[Voucher],
[DocumentDate],
ROW_NUMBER() OVER (PARTITION BY [Account],[Voucher] ORDER BY [DocumentDate]) RN,
[Amount]
FROM
MyTable
WHERE
[Account] = 'abc'
)
SELECT
[Account],
[Voucher],
[DocumentDate],
max(case when RN = 1 THEN [Amount] else null end) OriginalAmount,
sum([Amount]) SUM_Amount,
count(*) Records
from cte
GROUP BY
[Account], [Voucher], [DocumentDate]
HAVING
SUM([Amount]) = 0

TSQL - Groups and Islands dates

I need a help on writing an optimal query for the below problem. Have attached the query I have with me but it is highly utilizing resources.
Below is the code to achieve above said logic. Please suggest some optimal way to achieve the same
-- drop table #me
create table #ME (memid int , EffectiveDate datetime , termdate datetime)
Insert into #ME values ('123','3-Dec-16','10-Jan-17')
Insert into #ME values ('123','11-Jan-17','6-Feb-17')
Insert into #ME values ('123','7-Feb-17','5-Mar-17')
Insert into #ME values ('123','8-Mar-17','15-Apr-17')
Insert into #ME values ('123','16-Apr-17','24-May-17')
--drop table #dim
select * from #ME
declare #StartDate datetime , #CutoffDate datetime
select #StartDate= min(effectivedate),#CutoffDate = max(termdate) From #me where termdate<>'9999-12-31 00:00:00.000'
SELECT d
into #dim
FROM
(
SELECT d = DATEADD(DAY, rn - 1, #StartDate)
FROM
(
SELECT TOP (DATEDIFF(DAY, #StartDate, #CutoffDate))
rn = ROW_NUMBER() OVER (ORDER BY s1.[object_id])
FROM sys.all_objects AS s1
CROSS JOIN sys.all_objects AS s2
-- on my system this would support > 5 million days
ORDER BY s1.[object_id]
) AS x
) AS y;
--drop table #MemEligibilityDateSpread
select MemID, D As DateSpread Into #MemEligibilityDateSpread From #Dim dim JOIN #me ME on dim.d between ME.effectivedate and me.termdate
--drop table #DateClasified
WITH CTE AS
(
SELECT MEmID,
UniqueDate = DateSpread,
DateGroup = DATEADD(dd, - ROW_NUMBER() OVER (PARTITION BY Memid ORDER BY Memid,DateSpread), DateSpread)
FROM #MemEligibilityDateSpread
GROUP BY Memid,DateSpread
)
--===== Now, if we find the MIN and MAX date for each DateGroup, we'll have the
-- Start and End dates of each group of contiguous daes. While we're at it,
-- we can also figure out how many days are in each range of days.
SELECT Memid,
StartDate = MIN(UniqueDate),
EndDate = MAX(UniqueDate)
INTO #DateClasified
FROM cte
GROUP BY Memid,DateGroup
ORDER BY Memid,StartDate
select ME.MemID,ME.EffectiveDate,ME.TermDate,DC.StartDate,DC.EndDate from #DateClasified dc join #me ME ON Me.MemID = dc.MemID
and (ME.EffectiveDate BETWEEN DC.StartDate AND DC.EndDate
OR ME.TermDate BETWEEN DC.StartDate AND DC.EndDate)
In cte0 and cte1, we create an ad-hoc tally/calendar table. Once we have that, it is a small matter to calculate and group by Island.
Currently, the tally is has a max of 10,000 days (27 years), but you can easily expand the tally table by adding , cte0 N5
;with cte0(N) as (Select 1 From (Values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N(N))
,cte1(R,D) as (Select Row_Number() over (Order By (Select Null))
,DateAdd(DD,-1+Row_Number() over (Order By (Select Null)),(Select MinDate=min(EffectiveDate) From #ME))
From cte0 N1, cte0 N2, cte0 N3, cte0 N4)
Select MemID
,EffectiveDate
,TermDate
,SinceFrom = Min(EffectiveDate) over (Partition By Island)
,Tildate = Max(TermDate) over (Partition By Island)
From (
Select *,Island = R - Row_Number() over (Partition By MemID Order by TermDate)
From #ME A
Join cte1 B on D Between EffectiveDate and TermDate
) A
Group By MemID,Island,EffectiveDate,TermDate
Order By 1,2
Returns
MemID EffectiveDate TermDate SinceFrom Tildate
123 2016-12-03 2017-01-10 2016-12-03 2017-03-05
123 2017-01-11 2017-02-06 2016-12-03 2017-03-05
123 2017-02-07 2017-03-05 2016-12-03 2017-03-05
123 2017-03-08 2017-04-15 2017-03-08 2017-05-24
123 2017-04-16 2017-05-24 2017-03-08 2017-05-24
Edit - Now if you want a compressed dataset
Select MemID
,EffectiveDate = Min(EffectiveDate)
,TermDate = Max(TermDate)
From (
Select *,Island = R - Row_Number() over (Partition By MemID Order by TermDate)
From #ME A
Join cte1 B on D Between EffectiveDate and TermDate
) A
Group By MemID,Island
Order By 1,2
Returns
MemID EffectiveDate TermDate
123 2016-12-03 2017-03-05
123 2017-03-08 2017-05-24

Resources