How to calculate peak-valley drawdown with SQL Server 2012? - sql-server

I'm wondering if some of the new SQL Server 2012 functions would help with this problem. Here's my DDL and sample data
CREATE TABLE [dbo].[transactions]
(
[transactionId] [int] NOT NULL,
[dt] [datetime] NOT NULL,
[balance] [decimal](22, 6) NULL
);
GO
INSERT [dbo].[transactions] ([transactionId], [dt], [balance]) VALUES
(174, CAST(0x0000A19600000000 AS DateTime), CAST(1000.000000 AS Decimal(22, 6))),
(178, CAST(0x0000A19700869520 AS DateTime), CAST(1100.000000 AS Decimal(22, 6))),
(179, CAST(0x0000A19700933780 AS DateTime), CAST(1212.000000 AS Decimal(22, 6))),
(180, CAST(0x0000A19700B4B9A0 AS DateTime), CAST(1342.000000 AS Decimal(22, 6))),
(181, CAST(0x0000A19700BB0AD0 AS DateTime), CAST(1198.000000 AS Decimal(22, 6))),
(182, CAST(0x0000A19700E67030 AS DateTime), CAST(1234.000000 AS Decimal(22, 6))),
(183, CAST(0x0000A19700F358E0 AS DateTime), CAST(900.000000 AS Decimal(22, 6))),
(184, CAST(0x0000A19700F58B60 AS DateTime), CAST(876.000000 AS Decimal(22, 6))),
(185, CAST(0x0000A19700F9AA10 AS DateTime), CAST(889.000000 AS Decimal(22, 6))),
(186, CAST(0x0000A19701034700 AS DateTime), CAST(1133.000000 AS Decimal(22, 6))),
(187, CAST(0x0000A19A0089E0E0 AS DateTime), CAST(1400.000000 AS Decimal(22, 6))),
(191, CAST(0x0000A19A009450C0 AS DateTime), CAST(1566.000000 AS Decimal(22, 6))),
(192, CAST(0x0000A19A00A5E4C0 AS DateTime), CAST(1800.000000 AS Decimal(22, 6))),
(188, CAST(0x0000A19A00AA49C0 AS DateTime), CAST(1900.000000 AS Decimal(22, 6))),
(189, CAST(0x0000A19A00B54640 AS DateTime), CAST(1456.000000 AS Decimal(22, 6))),
(190, CAST(0x0000A19A00CAB2A0 AS DateTime), CAST(1234.000000 AS Decimal(22, 6))),
(193, CAST(0x0000A19A00F12660 AS DateTime), CAST(1400.000000 AS Decimal(22, 6))),
(195, CAST(0x0000A19A010087E0 AS DateTime), CAST(1444.000000 AS Decimal(22, 6))),
(196, CAST(0x0000A19E00C7F380 AS DateTime), CAST(1556.000000 AS Decimal(22, 6))),
(197, CAST(0x0000A19E00FE5560 AS DateTime), CAST(1975.000000 AS Decimal(22, 6)));
I am after the largest percentage peak-valley drawdown of the balance for the series, ordered by dt. The peak to valley drawdown is the greatest percent change in a high in the balance to the lowest low before the previous high is crossed. Better described here http://www.investopedia.com/terms/p/peak-to-valley-drawdown.asp In this data set we have two drawdowns.
First one is from 1342.00 to 876.00 (-34.72%) and the second one from 1900 to 1234 (-35.05%)
The biggest peak to valley percent drawdown in this set therefore, is -35.05%. I need a SQL Server query that can provide this value. Would rather not have to use temp tables if possible. Any ideas?

I don't know that any SQL Server 2012 functionality will get this value any more succinctly or efficiently than this:
;WITH x AS
(
SELECT [drop] = ((s.balance-e.balance)*100.0/s.balance)
FROM dbo.transactions AS s
INNER JOIN dbo.transactions AS e
ON s.transactionId < e.transactionId
AND s.balance > e.balance
)
SELECT [Largest Drawdown] = -MAX([drop]) FROM x;
Result:
Largest Drawdown
----------------
-35.05263157894
I do confess, though, that this works for your sample data only because your valleys are convenient for the problem you want to solve. If you change the 4th-last row to 875 this query considers that a part of the set. In other words, I've calculated drawdown here for the entire range, rather than just the range until the high is crossed again.
I suspect there is a better way to solve this query using gap/island techniques and I will try to return to it when I can focus on it adequately.

This will miss if the first entry is a peak
;with trnsCTE (ID,bal) AS
( -- get seqential ID
SELECT ROW_NUMBER() OVER (ORDER BY DT) as ID, [balance]
from [transactions]
),
trnsCTE2 (ID,bal) AS
( -- any peaks
select t2.ID, t2.bal
from trnsCTE as T1
join trnsCTE as T2
on ( t2.ID = t1.ID+1
and t2.bal > t1.bal )
join trnsCTE as T3
on t3.ID = t2.ID+1
and t3.bal < t1.bal
)
,
trnsCTE3 (ID,bal) AS
( -- get first peak and then bigger peaks only
SELECT distinct T1.ID, T1.BAL
from trnsCTE2 as T1
where T1.ID = (select min(ID) from trnsCTE2)
or T1.bal > (select max(bal) from trnsCTE2 where trnsCTE2.ID < t1.ID)
)
-- calculate
select t1.id, t1.bal, min(trnsCTE.bal), (t1.bal - min(trnsCTE.bal)) * 100 / t1.bal
from trnsCTE
join trnsCTE3 t1
on t1.id < trnsCTE.id
and ( trnsCTE.id < (select min(id) from trnsCTE3 where id > t1.id)
or
t1.id = ( select max(id) from trnsCTE3 ) )
group by t1.id, t1.bal
order by t1.id
This translates directly to a #temps
Did not use #temp at OP said did not want to user #temp
insert into #trnsCTE (ID,bal)
SELECT ROW_NUMBER() OVER (ORDER BY DT) as ID, [balance]
from [transactions]

select peak_dt, peak_balance, trough_dt, trough_balance, (peak_balance - trough_balance) * 100.0 / peak_balance as drawdown
from (
select dt as peak_dt, balance as peak_balance, nullif(last_value(dt) over (partition by peak_valley_group order by dt rows between unbounded preceding and unbounded following), dt) as trough_dt, nullif(last_value(balance) over (partition by peak_valley_group order by dt rows between unbounded preceding and unbounded following), balance) as trough_balance, isPeak
from (
select *, sum(isPeak) over (order by dt) as peak_valley_group
from (
select dt, balance, (case when forward_trend = -1 then 1 else 0 end) as isPeak, max(balance) over (partition by forward_trend order by dt) as current_max_balance
from (
-- Nulls for lead/lag here produce the desired result
select *, (case when lead(balance, 1) over (order by dt) > balance then 1 else -1 end) as forward_trend, (case when lag(balance, 1) over (order by dt) > balance then 1 else -1 end) as backward_trend
from transactions
) t
where forward_trend = backward_trend
) t
where (isPeak = 1 and balance = current_max_balance)
or isPeak = 0
) t
) t
where isPeak = 1
order by peak_dt

Related

CTE - LEFT OUTER JOIN Performance Problem

Using SQL Server 2017.
SQL FIDDLE: LINK
CREATE TABLE [TABLE_1]
(
PLAN_NR decimal(28,6) NULL,
START_DATE datetime NULL,
);
CREATE TABLE [TABLE_2]
(
PLAN_NR decimal(28,6) NULL,
PERIOD_NR decimal(28,6) NULL,
);
INSERT INTO TABLE_1 (PLAN_NR, START_DATE)
VALUES (1, '2020-05-01'), (2, '2020-08-05');
INSERT INTO TABLE_2 (PLAN_NR, PERIOD_NR)
VALUES (1, 1), (1, 2), (1, 5), (1, 6), (1, 5), (1, 6), (1, 17),
(2, 2), (2, 3), (2, 5), (2, 2), (2, 17), (2, 28);
CREATE VIEW ALL_PERIODS
AS
WITH rec_cte AS
(
SELECT
PLAN_NR, START_DATE,
1 period_nr, DATEADD(day, 7, START_DATE) next_date
FROM
TABLE_1
UNION ALL
SELECT
PLAN_NR, next_date,
period_nr + 1, DATEADD(day, 7, next_date)
FROM
rec_cte
WHERE
period_nr < 100
),
cte1 AS
(
SELECT
PLAN_NR, period_nr, START_DATE
FROM
rec_cte
UNION ALL
SELECT
PLAN_NR, period_nr, DATEADD(DAY, 1, EOMONTH(next_date, -1))
FROM
rec_cte
WHERE
MONTH(START_DATE) <> MONTH(next_date)
),
cte2 AS (
SELECT *, ROW_NUMBER() OVER (PARTITION BY PLAN_NR ORDER BY START_DATE) rn
FROM cte1
)
SELECT PLAN_NR, rn PERIOD_NR, START_DATE
FROM cte2
WHERE rn <= 100
Table_1 lists plans (PLAN_NR) and their start date (START_DATE).
Table_2 lists plan numbers (PLAN_NR) and periods (1 - X). Per plan number periods can appear several times but can also be missing.
A period lasts seven days, unless the period includes a change of month. Then the period is divided into a part before the end of the month and a part after the end of the month.
The view ALL_PERIODS lists 100 periods per plan according to this system.
My problem is the performance of the following select which I would like to use in a view:
SELECT
t2.PLAN_NR
, t2.PERIOD_NR
, a_p.START_DATE
from TABLE_2 as t2
left outer join ALL_PERIODS a_p on t2.PERIOD_NR = a_p.PERIOD_NR and t2.PLAN_NR = a_p.PLAN_NR
From about 4000 entries in TABLE_2 the select becomes incredibly slow.
The join itself does not yet slow down the query. Only with the additional select a_p.START_DATE everything becomes incredibly slow.
I read the view into a temporary table and did the join over that and got no performance issues. (2 seconds for the 4000 entries).
So I assume that the CTE used in the view is the reason for the slow performance.
Unfortunately I can't use temporary tables in views and I would hate to write the data to a normal table.
Is there a way in SQL Server to improve the CTE lag?
Instead of a recusive CTE, generate ALL_PERIODS with a CROSS join between the Plan table and a "number table" either persisted, or as a non-recursive CTE.
EG
WITH N As
(
select top 100 row_number() over (order by (select null)) i
from (values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10) ) v1(i),
(values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10) ) v2(i)
),
plan_period AS
(
SELECT
PLAN_NR, START_DATE,
N.i period_nr, DATEADD(day, 7*N.i, START_DATE) next_date
FROM TABLE_1 CROSS JOIN N
),
if you are able to modify the view I would recommend to do this :
add a table containing numbers starting from 0 to whatever you think you will need in database, you can use below command :
create table numbers ( id int)
go
;with cte (
select 0 num
union all
select num + 1
where num < 2000 -- change this
)
insert into number
from num from cte
change the first cte in the view to this :
WITH rec_cte AS
(
SELECT
PLAN_NR
, DATEADD(DAY, 7* id, START_DATE) START_DATE
, id +1 period_nr
, DATEADD(DAY, 7*( id+1), START_DATE) next_date
FROM
TABLE_1 t
CROSS apply intenum i
WHERE i.id <100
),...
Also consider using temp table instead of cte it might be helpful

T-SQL - timespan by overlapping datetime columns

I want maximum period of date range that is overlapping each other and if the period is not clashing other date ranges then I want it as it is.
I have this table:
CREATE TABLE [dbo].[table1]
(
[id] [numeric](18, 0) IDENTITY(1,1) NOT NULL,
[StartDate] [datetime] NOT NULL,
[EndDate] [datetime] NOT NULL
)
And their respective values:
INSERT INTO [dbo].[table1]
VALUES (CAST('2013-11-01 00:00:00.000' AS DateTime), CAST('2013-11-10 00:00:00.000' AS DateTime)),
(CAST('2013-11-05 00:00:00.000' AS DateTime), CAST('2013-11-15 00:00:00.000' AS DateTime)),
(CAST('2013-11-10 00:00:00.000' AS DateTime), CAST('2013-11-15 00:00:00.000' AS DateTime)),
(CAST('2013-11-10 00:00:00.000' AS DateTime), CAST('2013-11-25 00:00:00.000' AS DateTime)),
(CAST('2013-11-26 00:00:00.000' AS DateTime), CAST('2013-11-29 00:00:00.000' AS DateTime))
And expected result is:
ID StartDate EndDate
--------------------------------------------------------
1 2013-11-01 00:00:00.000 2013-11-25 00:00:00.000
2 2013-11-26 00:00:00.000 2013-11-29 00:00:00.000
Thanks in advance.
// Edit 1: Thanks.
Works, but there is a new question for breaks in the same table
INSERT INTO [dbo].[table1]
VALUES (CAST('2018-05-03 08:30:00.000' AS DateTime), CAST('2018-05-03 08:45:00.000' AS DateTime)),
(CAST('2018-05-03 08:45:00.000' AS DateTime), CAST('2018-05-03 09:30:00.000' AS DateTime)),
(CAST('2018-05-03 08:45:00.000' AS DateTime), CAST('2018-05-03 11:30:00.000' AS DateTime)),
(CAST('2018-05-03 12:45:00.000' AS DateTime), CAST('2018-05-03 13:00:00.000' AS DateTime)),
(CAST('2018-05-03 14:00:00.000' AS DateTime), CAST('2018-05-03 15:45:00.000' AS DateTime)),
(CAST('2018-05-03 14:15:00.000' AS DateTime), CAST('2018-05-03 15:30:00.000' AS DateTime))
And expected result is:
ID StartDate EndDate
--------------------------------------------------------
1 2018-05-03 08:30:00.000 2018-05-03 11:30:00.000
2 2018-05-03 12:45:00.000 2018-05-03 13:00:00.000
3 2018-05-03 14:00:00.000 2018-05-03 15:45:00.000
Very similar answer, but making use of an index and windowed functions to make the gaps and islands analysis cheaper (faster).
http://sqlfiddle.com/#!18/f19569/3
SELECT
ROW_NUMBER() OVER (ORDER BY MIN(StartDate)),
MIN(StartDate),
MAX(EndDate)
from
(
SELECT
*,
SUM(CASE WHEN PrecedingEndDate >= StartDate THEN 0 ELSE 1 END)
OVER (ORDER BY StartDate, EndDate)
AS GroupID
FROM
(
SELECT
*,
MAX(EndDate)
OVER (ORDER BY StartDate, EndDate
ROWS BETWEEN UNBOUNDED PRECEDING
AND 1 PRECEDING
)
AS PrecedingEndDate
FROM
Table1
)
look_back
)
grouped
GROUP BY
GroupID
This is a form of the gaps and islands problem.
In this case, exists and cumulative sum and group by are the route to the solution:
select row_number() over (order by min(startdate)),
min(startdate), max(enddate)
from (select t1.*, sum(isstart) over (order by startdate) as grp
from (select t1.*,
(case when exists (select 1
from table1 tt1
where tt1.startdate <= t1.enddate and tt1.enddate >= t1.startdate and tt1.id <> t1.id
)
then 0 else 1
end) as isstart
from table1 t1
) t1
) t1
group by grp;

SQL Server Windowing - 24 Hour Window

I have the following data
CREATE TABLE [dbo].[Test](
[CustId] [int] NULL,
[Spend] [money] NULL,
[TimeOdSpent] [datetime] NULL,
[ID] [int] IDENTITY(1,1) NOT NULL
) ON [PRIMARY]
GO
SET IDENTITY_INSERT [dbo].[Test] ON
GO
INSERT [dbo].[Test] ([CustId], [Spend], [TimeOdSpent], [ID])
VALUES (11, 400.0000, CAST(N'2016-10-27 10:00:00.000' AS DateTime), 1)
INSERT [dbo].[Test] ([CustId], [Spend], [TimeOdSpent], [ID])
VALUES (11, 200.0000, CAST(N'2016-10-27 11:00:00.000' AS DateTime), 2)
INSERT [dbo].[Test] ([CustId], [Spend], [TimeOdSpent], [ID])
VALUES (11, 400.0000, CAST(N'2016-10-28 09:00:00.000' AS DateTime), 3)
INSERT [dbo].[Test] ([CustId], [Spend], [TimeOdSpent], [ID])
VALUES (11, 500.0000, CAST(N'2016-10-28 16:00:00.000' AS DateTime), 4)
GO
SET IDENTITY_INSERT [dbo].[Test] OFF
Expected Result should be like this
1 2016-10-27 11:00:00.000 600
2 2016-10-28 09:00:00.000 1000
3 2016-10-28 16:00:00.000 900
I want to find out the instances where the spend Totals > 500 within a 24 hour period. Being trying to write a windowing query without luck
You can query as below:
Select * from (
Select *, Sm = sum(spend) over(partition by convert(date,timeofuse)) from #customer
) a
Where Sm > 500
This is the sort of thing I was looking for. I used the Sales.SalesOrderHeader table from AdventureWorks Instead of my simple table above
;WITH cte1 as
(
select
LAG(ShipDate) OVER(PARTITION By SAlesPersonId ORDER BY ShipDate) ShipDateBefore,ShipDate, SalesPersonID,SubTotal,CAST(ShipDate as Date) Date
from Sales.SalesOrderHeader
where CAST(ShipDate as DATE)<'20080710' and SalesPersonID IS NOT NULL
),cte2 as
(Select * ,DATEDIFF(ss,ShipDateBefore,ShipDate) as DiffinDays
from cte1
), cte3 as (
select * ,SUM(DiffinDays) OVER(Partition BY SalesPersonId ORDER BY ShipDate ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as RunningTime
from cte2
),cte4 as
(select
*,ISNULL(CAST((RunningTime / 86400.00) AS INT),0) Cycle
FROM cte3
)
SELECT
SalesPersonID ,SUM(SubTotal)Total,MIN(ShipDate)DurationStart,MAX(ShipDate)DurationStart
from cte4
GROUP by SalesPersonID,Cycle
Having SUM(SubTotal) > 100000.00

Group up rows based on date overlapping

In a same id, if any of row's effective date and enddate overlaps then we need group it up in a unique id
In below image dategroup is the desired output column
Data is sorted in order by ID asc, EffectiveDate ASC, EndDate Desc
CREATE TABLE #DataTable (id int , EffectiveDate datetime, Enddate Datetime )
INSERT [dbo].#DataTable ([id], [EffectiveDate], [Enddate]) VALUES (1, CAST(N'2017-01-01 00:00:00.000' AS DateTime), CAST(N'2017-01-11 00:00:00.000' AS DateTime))
GO
INSERT [dbo].#DataTable ([id], [EffectiveDate], [Enddate]) VALUES (1, CAST(N'2017-01-02 00:00:00.000' AS DateTime), CAST(N'2017-01-05 00:00:00.000' AS DateTime))
GO
INSERT [dbo].#DataTable ([id], [EffectiveDate], [Enddate]) VALUES (1, CAST(N'2017-01-03 00:00:00.000' AS DateTime), CAST(N'2017-01-12 00:00:00.000' AS DateTime))
GO
INSERT [dbo].#DataTable ([id], [EffectiveDate], [Enddate]) VALUES (1, CAST(N'2017-01-06 00:00:00.000' AS DateTime), CAST(N'2017-01-09 00:00:00.000' AS DateTime))
GO
INSERT [dbo].#DataTable ([id], [EffectiveDate], [Enddate]) VALUES (1, CAST(N'2017-01-13 00:00:00.000' AS DateTime), CAST(N'2017-01-19 00:00:00.000' AS DateTime))
GO
INSERT [dbo].#DataTable ([id], [EffectiveDate], [Enddate]) VALUES (2, CAST(N'2017-02-01 00:00:00.000' AS DateTime), CAST(N'2017-02-11 00:00:00.000' AS DateTime))
GO
INSERT [dbo].#DataTable ([id], [EffectiveDate], [Enddate]) VALUES (2, CAST(N'2017-02-06 00:00:00.000' AS DateTime), CAST(N'2017-02-16 00:00:00.000' AS DateTime))
GO
Try this, Hope it helps. Not the most attractive code but it should work. I may clean it up later if I find some time.
;WITH cte_StepOne as
(
SELECT ROW_NUMBER() OVER (ORDER BY a.[id],
a.[EffectiveDate],
a.[Enddate]) AS SeqNo,
a.[id],
a.[EffectiveDate],
a.[Enddate],
b.[id] AS OverLapID,
b.[EffectiveDate] AS [OverLapEffectiveDate],
b.[Enddate] AS [OverLapEnddate]
FROM ##DataTable a
LEFT JOIN ##DataTable b
ON a.EffectiveDate BETWEEN b.EffectiveDate
AND b.EndDate
AND a.EffectiveDate <> b.EffectiveDate
AND a.EndDate <> b.EndDate --and a.ID <> b.ID
)
,cte_StepTwo AS
(
SELECT SeqNo,
id,
EffectiveDate,
Enddate,
LEAD(OverLapEffectiveDate, 1) OVER (ORDER BY SeqNo) AS LeadValue,LAG(id, 1) OVER (ORDER BY SeqNo) AS LeadValueID,
OverLapID,
OverLapEffectiveDate,
OverLapEnddate
FROM cte_StepOne
)
,cte_Result AS
(
SELECT id,
EffectiveDate,
Enddate,
CASE
WHEN LeadValue = EffectiveDate AND OverLapEffectiveDate IS NULL THEN ID
WHEN OverLapID IS NULL THEN LeadValueID + 1
ELSE OverLapID
END AS OverLapID,
CASE
WHEN LeadValue = EffectiveDate AND OverLapEffectiveDate IS NULL THEN EffectiveDate
ELSE OverLapEffectiveDate
END AS OverLapEffectiveDate,
CASE
WHEN LeadValue = EffectiveDate AND OverLapEffectiveDate IS NULL THEN Enddate
ELSE OverLapEnddate
END AS OverLapEnddate
FROM cte_StepTwo
)
SELECT DISTINCT id,
EffectiveDate,
Enddate,
DENSE_RANK() OVER (ORDER BY ID,OverLapID) AS DateGroup
FROM cte_Result
ORDER BY id,EffectiveDate
Result:
This answer takes the approach of trying to identify records for which the running DateGroup counter should be incremented. Ultimately, we will assign a value of 1 to such records. With this assignment in hand, we can then simply take a cumulative sum to generate the DateGroup.
-- this CTE identifies all new ID records
WITH cte1 AS (
SELECT t.ID, t.EffectiveDate, t.EndDate
FROM
(
SELECT ID, EffectiveDate, EndDate,
ROW_NUMBER() OVER (PARTITION BY ID ORDER BY EffectiveDate) rn
FROM yourTable
) t
WHERE t.rn = 1
),
-- this CTE identifies all records whose both effective and end dates
-- do not fall within the range of the start ID record
cte2 AS (
SELECT t1.ID, t1.EffectiveDate, t1.EndDate
FROM yourTable t1
INNER JOIN cte1 t2
ON t1.ID = t2.ID AND
t1.EffectiveDate NOT BETWEEN t2.EffectiveDate AND t2.EndDate AND
t1.EndDate NOT BETWEEN t2.EffectiveDate AND t2.EndDate
),
-- this CTE returns the original table with a new column, amount, which
-- contains a value of 1 should that record cause the DateGroup to be
-- incremented by 1
cte3 AS (
SELECT t1.ID, t1.EffectiveDate, t1.EndDate,
CASE WHEN t2.ID IS NOT NULL OR t3.ID IS NOT NULL THEN 1 ELSE 0 END AS amount
FROM yourTable t1
LEFT JOIN cte1 t2
ON t1.ID = t2.ID AND
t1.EffectiveDate = t2.EffectiveDate AND
t1.EndDate = t2.EndDate
LEFT JOIN cte2 t3
ON t1.ID = t3.ID AND
t1.EffectiveDate = t3.EffectiveDate AND
t1.EndDate = t3.EndDate
)
-- finally, take a cumulative sum of the 'amount' column to generate the DateGroup
SELECT t1.ID,
t1.EffectiveDate,
t1.EndDate,
SUM(t2.amount) AS DateGroup
FROM cte3 t1
INNER JOIN cte3 t2
ON t1.ID >= t2.ID AND
t1.EffectiveDate >= t2.EffectiveDate
GROUP BY t1.id, t1.EffectiveDate, t1.EndDate;
Output:
Demo here:
Rextester
Data used:
CREATE TABLE yourTable (ID int, EffectiveDate datetime, EndDate datetime);
INSERT INTO yourTable
VALUES
(1, '2017-01-01 00:00:00.000', '2017-01-11 00:00:00.000'),
(1, '2017-01-02 00:00:00.000', '2017-01-05 00:00:00.000'),
(1, '2017-01-03 00:00:00.000', '2017-01-12 00:00:00.000'),
(1, '2017-01-06 00:00:00.000', '2017-01-09 00:00:00.000'),
(1, '2017-01-13 00:00:00.000', '2017-01-19 00:00:00.000'),
(2, '2017-02-01 00:00:00.000', '2017-02-11 00:00:00.000'),
(2, '2017-02-06 00:00:00.000', '2017-02-16 00:00:00.000');
What about this? It's simpler that other solutions posted:
WITH
CTE_GetFirstRecordForEachId AS
(
SELECT
id,
EffectiveDate,
Enddate,
rn = ROW_NUMBER() OVER (PARTITION BY id ORDER BY EffectiveDate, EndDate)
FROM
#DataTable
),
CTE_GetOutOfDateRange AS
(
SELECT
a.*,
OutOfDateRange =
CASE WHEN (b.EffectiveDate>=a.EffectiveDate AND b.EffectiveDate<=b.Enddate) OR (b.Enddate>=a.EffectiveDate AND b.Enddate<=b.Enddate)
THEN 0
ELSE 1
END
FROM
#DataTable a
INNER JOIN
CTE_GetFirstRecordForEachId b ON a.id = b.id AND b.rn=1
)
SELECT
id,
Effectivedate,
Enddate,
DateGroup = DENSE_RANK() OVER (ORDER BY id, OutOfDateRange)
FROM
CTE_GetOutOfDateRange
ORDER BY
id, Effectivedate, Enddate
Output:
id Effectivedate Enddate DateGroup
----------- ----------------------- ----------------------- --------------------
1 2017-01-01 00:00:00.000 2017-01-11 00:00:00.000 1
1 2017-01-02 00:00:00.000 2017-01-05 00:00:00.000 1
1 2017-01-03 00:00:00.000 2017-01-12 00:00:00.000 1
1 2017-01-06 00:00:00.000 2017-01-09 00:00:00.000 1
1 2017-01-13 00:00:00.000 2017-01-19 00:00:00.000 2
2 2017-02-01 00:00:00.000 2017-02-11 00:00:00.000 3
2 2017-02-06 00:00:00.000 2017-02-16 00:00:00.000 3
What about this (I am still testing it)
WITH Z AS
(SELECT * FROM (SELECT ID, [EffectiveDate], ENDDate
, LAG(ID) OVER (PARTITION BY ID ORDER BY EffectiveDate, ENDDate Desc) AS ID_Prec
, LAG(EffectiveDate) OVER (PARTITION BY ID ORDER BY EffectiveDate, ENDDate Desc) AS EffDate_Prec
, LAG(ENDDate) OVER (PARTITION BY ID ORDER BY EffectiveDate, ENDDate Desc) AS EndDate_Prec
, ROW_NUMBER() OVER (ORDER BY ID, EffectiveDate,ENDDate DESC) AS RN
, 1 AS DATEGROUP
FROM #DataTable ) C WHERE RN = 1
UNION ALL
SELECT A.ID, A.EffectiveDate, A.Enddate
, A.ID_Prec, A.EffDate_Prec
, A.EndDate_Prec
, A.RN
, CASE WHEN A.ID = A.ID_PREC AND (A.EffectiveDate <=A.EndDate_Prec /* OR A.EndDate>=A.EffDate_Prec*/) THEN Z.DATEGROUP
ELSE Z.DATEGROUP+1 END AS DATEGROUP
FROM (SELECT A.ID, A.EffectiveDate, A.ENDDate
, LAG(A.ID) OVER (PARTITION BY A.ID ORDER BY A.EffectiveDate, A.ENDDate Desc) AS ID_Prec
, LAG(A.EffectiveDate) OVER (PARTITION BY A.ID ORDER BY A.EffectiveDate, A.ENDDate Desc) AS EffDate_Prec
, LAG(A.ENDDate) OVER (PARTITION BY A.ID ORDER BY A.EffectiveDate, A.ENDDate Desc) AS EndDate_Prec
, ROW_NUMBER() OVER (ORDER BY A.ID, A.EffectiveDate,A.ENDDate DESC) AS RN
, 1 AS DATEGROUP
FROM #DataTable A) A
INNER JOIN Z ON A.RN -1= Z.RN
)
SELECT ID, EffectiveDate, Enddate, DATEGROUP FROM Z
Output:
ID EffectiveDate Enddate DATEGROUP
----------- ----------------------- ----------------------- -----------
1 2017-01-01 00:00:00.000 2017-01-11 00:00:00.000 1
1 2017-01-02 00:00:00.000 2017-01-05 00:00:00.000 1
1 2017-01-03 00:00:00.000 2017-01-12 00:00:00.000 1
1 2017-01-06 00:00:00.000 2017-01-09 00:00:00.000 1
1 2017-01-13 00:00:00.000 2017-01-19 00:00:00.000 2
2 2017-02-01 00:00:00.000 2017-02-11 00:00:00.000 3
2 2017-02-06 00:00:00.000 2017-02-16 00:00:00.000 3
guess you are missing some test scenario in your sample date.
;with CTE as
(
select *,ROW_NUMBER()over(order by id, effectivedate)rn
from #DataTable
)
,CTE1 AS
(
select *, 1 New_ID
from cte
where rn=1
union ALL
select c.id,c.effectivedate,c.enddate,c.rn
,case when c.effectivedate between c1.effectivedate
and c1.enddate
and c.id=c1.id then c1.New_ID
else c1.New_ID+1
END
from cte c
inner join cte1 c1
on c.rn=c1.rn+1
and c.rn>1 and c.rn<=7
)
select * from cte1
drop table #DataTable
this may help you. I posted here shortest and simplest version of tsql...
WITH CTE AS (
SELECT *,
ISNULL(LAG(EffectiveDate) OVER (PARTITION BY id ORDER BY id,EffectiveDate,Enddate),EffectiveDate) AS PreviousEffDate,
ISNULL(LAG(Enddate) OVER (PARTITION BY id ORDER BY id,EffectiveDate,Enddate),Enddate) AS PreviousEndDate
FROM #DataTable)
SELECT id,
EffectiveDate,
Enddate,
DENSE_RANK() OVER (ORDER BY id,CASE
WHEN EffectiveDate BETWEEN PreviousEffDate AND PreviousEndDate OR
Enddate BETWEEN PreviousEffDate AND PreviousEndDate OR
PreviousEffDate BETWEEN EffectiveDate AND Enddate OR
PreviousEndDate BETWEEN EffectiveDate AND Enddate
THEN 0
ELSE 1
END) AS DateGroup
FROM CTE
Result:
Have got this one from another forum; altered as per my requirement . looks simple and effective.
WITH C1 AS (
SELECT *,
CASE WHEN EffectiveDate <= MAX(ISnull(EndDate,'9999-12-31 00:00:00.000')) OVER(partition by id ORDER BY EffectiveDate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) THEN 0 ELSE 1 END AS isstart
FROM #DataTable
)
SELECT ID,EffectiveDate,EndDate,
SUM(isstart) OVER(ORDER BY ID ROWS UNBOUNDED PRECEDING) AS DG
FROM C1

Sql server query clarification

I have table like below,
Txn_Id Txn_Type
___________________
1 101
1 102
1 103
1 104
2 101
2 102
2 104
3 101
3 104
I want rows which has only txn_type 101 and 104. For eg., I should get only Txn_Id "3" for above data.
I tried like below and getting result. Is it possible to have single query to achive this.
Select txn_id from Txn where txn_id in (Select txn_id from Txn where txn_id = 101) and txn_id =104.
Select txn_id from Txn where txn_type in (101,104)
option 2
Select txn_id from Txn where (txn_type = 101 OR txn_type=104)
To get only "3"
Select distinct txn_id from Txn t1 where (txn_type = 101 OR txn_type=104)
and not exists(
select 1 from Txn t2 where t2.txn_type IN (102,103) and t2.txn_id = t1.txn_id
)
Hi As per your above comments you only need the txn_id =3(max)
Please Find the code below.
DECLARE #Table1 TABLE
(txn_id int, Txn_Type int)
;
INSERT INTO #Table1
(txn_id , Txn_Type )
VALUES
(1, 101),
(1, 102),
(1, 103),
(1, 104),
(2, 101),
(2, 102),
(2, 104),
(3, 101),
(3, 104)
;
Select max(txn_id ),Txn_Type
from #Table1 where item in (101,104)
group by Txn_Type
As balaji pointed out, #Ayush solution is not flexible, since will return incorrect results if you, for example, add another pair of records in the table (4,101) and (4,104). IMO, you have to join table to itself for some filtering, something like this:
DECLARE #Table1 TABLE
(txn_id int, Txn_Type int);
INSERT INTO #Table1
(txn_id , Txn_Type )
VALUES
(1, 101),
(1, 102),
(1, 103),
(1, 104),
(2, 101),
(2, 102),
(2, 104),
(3, 101),
(3, 104),
(4, 101),
(4, 104);
select t1.*
from #Table1 t1
inner join (select txn_id, count(*) as total
from #Table1
group by Txn_id
having count(*) < 3
) t2 on t2.txn_id = t1.txn_id
where t1.Txn_Type in (101,104)

Resources