Running Average for stock using Recursive CTE - sql-server

I have the below data and need to calculate the running average for each row using the amount from each of the previous rows.
CREATE TABLE [dbo].[AKTest](
[IntakeSellingPrice] [decimal](38, 20) NULL,
[IntakeSellingAmount] [decimal](38, 6) NULL,
[Item No_] [nvarchar](20) NOT NULL,
[Variant Code] [nvarchar](10) NOT NULL,
[Unit of Measure Code] [nvarchar](10) NOT NULL,
[Posting Date] [datetime] NOT NULL,
[PurchaseQty] [decimal](38, 20) NULL,
[ReceiptNo] [bigint] NULL,
[InventoryBalance] [decimal](38, 20) NOT NULL,
[NewBalance] [decimal](38, 20) NULL
) ON [PRIMARY]
GO
INSERT [dbo].[AKTest] ([IntakeSellingPrice], [IntakeSellingAmount], [Item No_], [Variant Code], [Unit of Measure Code], [Posting Date], [PurchaseQty], [ReceiptNo], [InventoryBalance], [NewBalance]) VALUES (CAST(10.00000000000000000000 AS Decimal(38, 20)), CAST(1000.000000 AS Decimal(38, 6)), N'1000001', N'NO_SIZE', N'EACH', CAST(0x0000A80800000000 AS DateTime), CAST(100.00000000000000000000 AS Decimal(38, 20)), 1, CAST(0.00000000000000000000 AS Decimal(38, 20)), CAST(100.00000000000000000000 AS Decimal(38, 20)))
GO
INSERT [dbo].[AKTest] ([IntakeSellingPrice], [IntakeSellingAmount], [Item No_], [Variant Code], [Unit of Measure Code], [Posting Date], [PurchaseQty], [ReceiptNo], [InventoryBalance], [NewBalance]) VALUES (CAST(5.00000000000000000000 AS Decimal(38, 20)), CAST(250.000000 AS Decimal(38, 6)), N'1000001', N'NO_SIZE', N'EACH', CAST(0x0000A80E00000000 AS DateTime), CAST(50.00000000000000000000 AS Decimal(38, 20)), 2, CAST(50.00000000000000000000 AS Decimal(38, 20)), CAST(100.00000000000000000000 AS Decimal(38, 20)))
GO
INSERT [dbo].[AKTest] ([IntakeSellingPrice], [IntakeSellingAmount], [Item No_], [Variant Code], [Unit of Measure Code], [Posting Date], [PurchaseQty], [ReceiptNo], [InventoryBalance], [NewBalance]) VALUES (CAST(12.50000000000000000000 AS Decimal(38, 20)), CAST(625.000000 AS Decimal(38, 6)), N'1000001', N'NO_SIZE', N'EACH', CAST(0x0000A81900000000 AS DateTime), CAST(50.00000000000000000000 AS Decimal(38, 20)), 3, CAST(60.00000000000000000000 AS Decimal(38, 20)), CAST(110.00000000000000000000 AS Decimal(38, 20)))
GO
Expected Outcome
ReceiptNo Average
_________________________
1 10.00
2 7.50
3 8.86
The Formular I used to calculate it manually is defined below for the third row. The calculation is better if you start at the bottom first.
A) I start at the bottom using receiptNo 3 where the NewBalance is 110.
B) 50 units are purchased for 12.50 = 625
C) that leaves 60 units. on the previous row 50 units are purchased for 5 = 250
D) that leaves 10 units. on the previous row 100 units are purchased for 10 = 1000. But we only need the cost of 10 so 1000/10 = 100.
E) add all the cost up 625 + 250 + 100 = 975 / 110 = 8.86

I don't think this is possible using ROWS BETWEEN and OVER as the logic is a bit odd?
I created a temporary table to play with the data and test the results, but basically this is just your original script with a recursive CTE added:
CREATE TABLE #AKTest (
[IntakeSellingPrice] [decimal](38, 20) NULL,
[IntakeSellingAmount] [decimal](38, 6) NULL,
[Item No_] [nvarchar](20) NOT NULL,
[Variant Code] [nvarchar](10) NOT NULL,
[Unit of Measure Code] [nvarchar](10) NOT NULL,
[Posting Date] [datetime] NOT NULL,
[PurchaseQty] [decimal](38, 20) NULL,
[ReceiptNo] [bigint] NULL,
[InventoryBalance] [decimal](38, 20) NOT NULL,
[NewBalance] [decimal](38, 20) NULL);
GO
INSERT #AKTest ([IntakeSellingPrice], [IntakeSellingAmount], [Item No_], [Variant Code], [Unit of Measure Code], [Posting Date], [PurchaseQty], [ReceiptNo], [InventoryBalance], [NewBalance]) VALUES (CAST(10.00000000000000000000 AS Decimal(38, 20)), CAST(1000.000000 AS Decimal(38, 6)), N'1000001', N'NO_SIZE', N'EACH', CAST(0x0000A80800000000 AS DateTime), CAST(100.00000000000000000000 AS Decimal(38, 20)), 1, CAST(0.00000000000000000000 AS Decimal(38, 20)), CAST(100.00000000000000000000 AS Decimal(38, 20)))
GO
INSERT #AKTest ([IntakeSellingPrice], [IntakeSellingAmount], [Item No_], [Variant Code], [Unit of Measure Code], [Posting Date], [PurchaseQty], [ReceiptNo], [InventoryBalance], [NewBalance]) VALUES (CAST(5.00000000000000000000 AS Decimal(38, 20)), CAST(250.000000 AS Decimal(38, 6)), N'1000001', N'NO_SIZE', N'EACH', CAST(0x0000A80E00000000 AS DateTime), CAST(50.00000000000000000000 AS Decimal(38, 20)), 2, CAST(50.00000000000000000000 AS Decimal(38, 20)), CAST(100.00000000000000000000 AS Decimal(38, 20)))
GO
INSERT #AKTest ([IntakeSellingPrice], [IntakeSellingAmount], [Item No_], [Variant Code], [Unit of Measure Code], [Posting Date], [PurchaseQty], [ReceiptNo], [InventoryBalance], [NewBalance]) VALUES (CAST(12.50000000000000000000 AS Decimal(38, 20)), CAST(625.000000 AS Decimal(38, 6)), N'1000001', N'NO_SIZE', N'EACH', CAST(0x0000A81900000000 AS DateTime), CAST(50.00000000000000000000 AS Decimal(38, 20)), 3, CAST(60.00000000000000000000 AS Decimal(38, 20)), CAST(110.00000000000000000000 AS Decimal(38, 20)))
GO
SELECT * FROM #AKTest;
WITH cte AS (
SELECT
ReceiptNo,
ReceiptNo AS linked_to,
NewBalance,
NewBalance - PurchaseQty AS remaining,
PurchaseQty AS purchased,
IntakeSellingPrice
FROM
#AKTest
UNION ALL
SELECT
c.ReceiptNo,
c.linked_to - 1 AS linked_to,
a.NewBalance,
c.remaining - a.PurchaseQty AS remaining,
CASE WHEN a.PurchaseQty > c.remaining THEN c.remaining ELSE a.PurchaseQty END AS purchased,
a.IntakeSellingPrice
FROM
cte c
INNER JOIN #AKTest a ON a.ReceiptNo = c.linked_to - 1
WHERE
c.linked_to > 1)
SELECT
ReceiptNo,
SUM(purchased * IntakeSellingPrice) / MAX(NewBalance) AS avg_price
FROM
cte
GROUP BY
ReceiptNo
ORDER BY
ReceiptNo;
Gets the correct answer:
ReceiptNo avg_price
1 10.000000
2 7.500000
3 8.863636
As requested, this will display all of the data from the table, with the average price on the end:
WITH cte AS (
SELECT
ReceiptNo,
ReceiptNo AS linked_to,
NewBalance,
NewBalance - PurchaseQty AS remaining,
PurchaseQty AS purchased,
IntakeSellingPrice
FROM
#AKTest
UNION ALL
SELECT
c.ReceiptNo,
c.linked_to - 1 AS linked_to,
a.NewBalance,
c.remaining - a.PurchaseQty AS remaining,
CASE WHEN a.PurchaseQty > c.remaining THEN c.remaining ELSE a.PurchaseQty END AS purchased,
a.IntakeSellingPrice
FROM
cte c
INNER JOIN #AKTest a ON a.ReceiptNo = c.linked_to - 1
WHERE
c.linked_to > 1),
Averages AS (
SELECT
ReceiptNo,
SUM(purchased * IntakeSellingPrice) / MAX(NewBalance) AS avg_price
FROM
cte
GROUP BY
ReceiptNo)
SELECT
a.*,
v.avg_price
FROM
Averages v
INNER JOIN #AKTest a ON a.ReceiptNo = v.ReceiptNo
ORDER BY
a.ReceiptNo;

Related

How to reset the cumulative sum based on condition?

Sample data:
CREATE TABLE [dbo].[agent_sales]
(
[date] [date] NULL,
[agent] [nvarchar](50) NULL,
[sale] [int] NULL
) ON [PRIMARY]
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-01-03' AS Date), N'Agent A', 10)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-02-05' AS Date), N'Agent A', 5)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-03-10' AS Date), N'Agent A', 20)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-04-10' AS Date), N'Agent A', 2)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-01-05' AS Date), N'Agent B', 5)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-02-06' AS Date), N'Agent B', 28)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-03-10' AS Date), N'Agent B', 5)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-04-10' AS Date), N'Agent B', 10)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-01-02' AS Date), N'Agent C', 35)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-02-04' AS Date), N'Agent C', 25)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-03-08' AS Date), N'Agent C', 15)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-04-10' AS Date), N'Agent C', 10)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-01-01' AS Date), N'Agent D', 5)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-02-02' AS Date), N'Agent D', 35)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-03-10' AS Date), N'Agent D', 31)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-04-10' AS Date), N'Agent D', 10)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-01-01' AS Date), N'Agent E', 32)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-02-01' AS Date), N'Agent E', 0)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-03-10' AS Date), N'Agent E', 20)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-04-10' AS Date), N'Agent E', 12)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-01-01' AS Date), N'Agent F', 32)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-02-02' AS Date), N'Agent F', 9)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-03-10' AS Date), N'Agent F', 11)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-04-10' AS Date), N'Agent F', 12)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-01-01' AS Date), N'Agent G', 32)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-02-02' AS Date), N'Agent G', 0)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-03-10' AS Date), N'Agent G', 20)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-04-10' AS Date), N'Agent G', 8)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-01-01' AS Date), N'Agent H', 32)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-03-10' AS Date), N'Agent H', 20)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-04-10' AS Date), N'Agent H', 8)
SELECT statement output:
select date, agent, sale
from agent_sales
date agent sales
--------------------------------------
2021-01-03 00:00:00.000 Agent A 10
2021-02-05 00:00:00.000 Agent A 5
2021-03-10 00:00:00.000 Agent A 20
2021-04-10 00:00:00.000 Agent A 2
2021-01-05 00:00:00.000 Agent B 5
2021-02-06 00:00:00.000 Agent B 28
2021-03-10 00:00:00.000 Agent B 5
2021-04-10 00:00:00.000 Agent B 10
2021-01-02 00:00:00.000 Agent C 35
2021-02-04 00:00:00.000 Agent C 25
2021-03-08 00:00:00.000 Agent C 15
2021-04-10 00:00:00.000 Agent C 10
2021-01-01 00:00:00.000 Agent D 5
2021-02-02 00:00:00.000 Agent D 35
2021-03-10 00:00:00.000 Agent D 31
2021-04-10 00:00:00.000 Agent D 10
2021-01-01 00:00:00.000 Agent E 32
2021-02-02 00:00:00.000 Agent E 0
2021-03-10 00:00:00.000 Agent E 20
2021-04-10 00:00:00.000 Agent E 12
2021-01-01 00:00:00.000 Agent F 32
2021-02-02 00:00:00.000 Agent F 9
2021-03-10 00:00:00.000 Agent F 11
2021-04-10 00:00:00.000 Agent F 12
2021-01-01 00:00:00.000 Agent G 32
2021-02-02 00:00:00.000 Agent G 0
2021-03-10 00:00:00.000 Agent G 20
2021-04-10 00:00:00.000 Agent G 8
2021-01-01 00:00:00.000 Agent H 32
2021-03-10 00:00:00.000 Agent H 20
2021-04-10 00:00:00.000 Agent H 8
I want to get the counts of agents who have crossed 30 sales cumulatively summed, but the counter (cumulative sum logic) should get reset if an agent has not made 30 sales in last 45 days.
Expected output:
YrMon
Count_Agent_more_than_30_sales
Jan21
5
Feb21
7
Mar21
5
Apr21
6
Logic:
Jan21 - 5 since C, E, F, G, H cross 30.
Feb21 - 7 since B, C, D, E, F, G, H cumulatively cross 30.
Mar21 - 5 since A, B, C, D, F cumulatively cross 30. Where as E, G, H are excluded because it has been 45 days since the last entry cumulatively crossing 30 sales.
Apr21 - 6 since A, B, C, D, E, F cumulatively cross 30. Where as G, H are excluded because it has been 45 days since the last entry cumulatively crossing 30 sales.
My query to calculate sum over period:
;WITH CTE AS
(
SELECT
CAST(YEAR([DATE]) AS VARCHAR) + ' ' + CAST(MONTH([DATE]) AS VARCHAR) YRMON,
[DATE], AGENT, SUM(SALE) SALES
FROM
agent_sales
GROUP BY
CAST(YEAR([DATE]) AS VARCHAR) + ' ' + CAST(MONTH([DATE]) AS VARCHAR),
AGENT, [DATE]
)
SELECT
*,
SUM(SALES) OVER(PARTITION BY AGENT ORDER BY YRMON) SUMOVERPERIOD
FROM CTE
ORDER BY 3,2
Now I am trying to apply the logic on the calculated sum:
;WITH CTE AS (SELECT CAST(YEAR([DATE]) AS VARCHAR)+' '+CAST(MONTH([DATE]) AS VARCHAR) YRMON, [DATE], AGENT, SUM(SALE) SALES
FROM agent_sales
GROUP BY CAST(YEAR([DATE]) AS VARCHAR)+' '+CAST(MONTH([DATE]) AS VARCHAR), [DATE], AGENT
)
SELECT *, SUM(SALES) OVER(PARTITION BY AGENT ORDER BY YRMON) SUMOVERPERIOD,
CASE WHEN SUM(SALES) OVER(PARTITION BY AGENT ORDER BY YRMON)>30 THEN 1 ELSE 0 END AS CALC
FROM CTE
ORDER BY 3,2
This is giving me the cumulative sum.
To check cumulative sum based on 45 day:
;WITH CTE AS (SELECT CAST(YEAR([DATE]) AS VARCHAR)+' '+CAST(MONTH([DATE]) AS VARCHAR) YRMON, [DATE], AGENT, SUM(SALE) SALES
FROM agent_sales
GROUP BY CAST(YEAR([DATE]) AS VARCHAR)+' '+CAST(MONTH([DATE]) AS VARCHAR), [DATE], AGENT
)
SELECT *, SUM(SALES) OVER(PARTITION BY AGENT ORDER BY YRMON) SUMOVERPERIOD,
CASE WHEN SUM(SALES) OVER(PARTITION BY AGENT ORDER BY YRMON)>30 THEN 1 ELSE 0 END AS CUMULATIVE_ABOVE_30,
[DATE],LAG([DATE],1,[DATE]) OVER(PARTITION BY AGENT ORDER BY [DATE]) [LAG],
DATEDIFF(DAY,LAG([DATE],1,[DATE]) OVER(PARTITION BY AGENT ORDER BY [DATE]),[DATE]) [DDIFF],
CASE WHEN SUM(SALES) OVER(PARTITION BY AGENT ORDER BY YRMON)>30 AND DATEDIFF(DAY,LAG([DATE],1,[DATE]) OVER(PARTITION BY AGENT ORDER BY [DATE]),[DATE])<46 THEN 1 ELSE 0 END AS CUMULATIVE_ABOVE_30_AND_LAST_SALE_IN_45_DAYS
FROM CTE
ORDER BY 3,2
How do I get the above query to reset the cumulative sum counter with 45 day logic? For example - Agent G should not show up in Mar and Apr.
Same SQL as above, but with Month-wise Agent Names:
;WITH CTE AS (SELECT CAST(YEAR([DATE]) AS VARCHAR)+' '+CAST(MONTH([DATE]) AS VARCHAR) YRMON, [DATE], AGENT, SUM(SALE) SALES
FROM agent_sales
GROUP BY CAST(YEAR([DATE]) AS VARCHAR)+' '+CAST(MONTH([DATE]) AS VARCHAR), [DATE], AGENT
),
CTE1 as (SELECT *, SUM(SALES) OVER(PARTITION BY AGENT ORDER BY YRMON) SUMOVERPERIOD,
CASE WHEN SUM(SALES) OVER(PARTITION BY AGENT ORDER BY YRMON)>30 THEN 1 ELSE 0 END AS CUMULATIVE_ABOVE_30,
[DATE] AS [DT],LAG([DATE],1,[DATE]) OVER(PARTITION BY AGENT ORDER BY [DATE]) [LAG],
DATEDIFF(DAY,LAG([DATE],1,[DATE]) OVER(PARTITION BY AGENT ORDER BY [DATE]),[DATE]) [DDIFF],
CASE WHEN SUM(SALES) OVER(PARTITION BY AGENT ORDER BY YRMON)>30 AND DATEDIFF(DAY,LAG([DATE],1,[DATE]) OVER(PARTITION BY AGENT ORDER BY [DATE]),[DATE])<46 THEN 1 ELSE 0 END AS CUMULATIVE_ABOVE_30_AND_LAST_SALE_IN_45_DAYS
FROM CTE)
select YRMON,AGENT FROM CTE1 WHERE CUMULATIVE_ABOVE_30_AND_LAST_SALE_IN_45_DAYS=1
Same as above, but with month-wise counts:
;WITH CTE AS (SELECT CAST(YEAR([DATE]) AS VARCHAR)+' '+CAST(MONTH([DATE]) AS VARCHAR) YRMON, [DATE], AGENT, SUM(SALE) SALES
FROM agent_sales
GROUP BY CAST(YEAR([DATE]) AS VARCHAR)+' '+CAST(MONTH([DATE]) AS VARCHAR), [DATE], AGENT
),
CTE1 as (SELECT *, SUM(SALES) OVER(PARTITION BY AGENT ORDER BY YRMON) SUMOVERPERIOD,
CASE WHEN SUM(SALES) OVER(PARTITION BY AGENT ORDER BY YRMON)>30 THEN 1 ELSE 0 END AS CUMULATIVE_ABOVE_30,
[DATE] AS [DT],LAG([DATE],1,[DATE]) OVER(PARTITION BY AGENT ORDER BY [DATE]) [LAG],
DATEDIFF(DAY,LAG([DATE],1,[DATE]) OVER(PARTITION BY AGENT ORDER BY [DATE]),[DATE]) [DDIFF],
CASE WHEN SUM(SALES) OVER(PARTITION BY AGENT ORDER BY YRMON)>30 AND DATEDIFF(DAY,LAG([DATE],1,[DATE]) OVER(PARTITION BY AGENT ORDER BY [DATE]),[DATE])<46 THEN 1 ELSE 0 END AS CUMULATIVE_ABOVE_30_AND_LAST_SALE_IN_45_DAYS
FROM CTE)
select YRMON,count(*) FROM CTE1 WHERE CUMULATIVE_ABOVE_30_AND_LAST_SALE_IN_45_DAYS=1
group by YRMON
I am unable to get query to reset the cumulative sum counter with 45 day logic.
Note: I get different results, perhaps misinterpreting one of your rules, but you will get the idea.
Try this:
DECLARE #ClosingDay int = 21
, #CicleDays int = -45
, #TargetSales int = 30
;
WITH AgentSaleCicle AS
(
SELECT
*
, CicleBegin = DATEADD(DAY, #CicleDays, CONVERT(date, LTRIM(YEAR([date])*10000+MONTH([date])*100+#ClosingDay)))
, CicleEnd = CONVERT(date, LTRIM(YEAR([date])*10000+MONTH([date])*100+#ClosingDay))
FROM [dbo].[agent_sales]
)
, AgentSaleCicleSum AS
(
SELECT
*
-- , CicleDays = DATEDIFF(DAY, CicleBegin, CicleEnd)
, CicleSales = (
SELECT SUM(Sale)
FROM [dbo].[agent_sales] IA
WHERE 1=1
AND IA.Agent = OA.Agent
AND IA.[Date] BETWEEN CicleBegin AND CicleEnd
)
FROM AgentSaleCicle OA
)
SELECT
CicleEnd
, CicleAgentCount = SUM(1)
FROM AgentSaleCicleSum
WHERE CicleSales >= #TargetSales
GROUP BY CicleEnd
ORDER BY CicleEnd

T-SQL - timespan by overlapping datetime columns

I want maximum period of date range that is overlapping each other and if the period is not clashing other date ranges then I want it as it is.
I have this table:
CREATE TABLE [dbo].[table1]
(
[id] [numeric](18, 0) IDENTITY(1,1) NOT NULL,
[StartDate] [datetime] NOT NULL,
[EndDate] [datetime] NOT NULL
)
And their respective values:
INSERT INTO [dbo].[table1]
VALUES (CAST('2013-11-01 00:00:00.000' AS DateTime), CAST('2013-11-10 00:00:00.000' AS DateTime)),
(CAST('2013-11-05 00:00:00.000' AS DateTime), CAST('2013-11-15 00:00:00.000' AS DateTime)),
(CAST('2013-11-10 00:00:00.000' AS DateTime), CAST('2013-11-15 00:00:00.000' AS DateTime)),
(CAST('2013-11-10 00:00:00.000' AS DateTime), CAST('2013-11-25 00:00:00.000' AS DateTime)),
(CAST('2013-11-26 00:00:00.000' AS DateTime), CAST('2013-11-29 00:00:00.000' AS DateTime))
And expected result is:
ID StartDate EndDate
--------------------------------------------------------
1 2013-11-01 00:00:00.000 2013-11-25 00:00:00.000
2 2013-11-26 00:00:00.000 2013-11-29 00:00:00.000
Thanks in advance.
// Edit 1: Thanks.
Works, but there is a new question for breaks in the same table
INSERT INTO [dbo].[table1]
VALUES (CAST('2018-05-03 08:30:00.000' AS DateTime), CAST('2018-05-03 08:45:00.000' AS DateTime)),
(CAST('2018-05-03 08:45:00.000' AS DateTime), CAST('2018-05-03 09:30:00.000' AS DateTime)),
(CAST('2018-05-03 08:45:00.000' AS DateTime), CAST('2018-05-03 11:30:00.000' AS DateTime)),
(CAST('2018-05-03 12:45:00.000' AS DateTime), CAST('2018-05-03 13:00:00.000' AS DateTime)),
(CAST('2018-05-03 14:00:00.000' AS DateTime), CAST('2018-05-03 15:45:00.000' AS DateTime)),
(CAST('2018-05-03 14:15:00.000' AS DateTime), CAST('2018-05-03 15:30:00.000' AS DateTime))
And expected result is:
ID StartDate EndDate
--------------------------------------------------------
1 2018-05-03 08:30:00.000 2018-05-03 11:30:00.000
2 2018-05-03 12:45:00.000 2018-05-03 13:00:00.000
3 2018-05-03 14:00:00.000 2018-05-03 15:45:00.000
Very similar answer, but making use of an index and windowed functions to make the gaps and islands analysis cheaper (faster).
http://sqlfiddle.com/#!18/f19569/3
SELECT
ROW_NUMBER() OVER (ORDER BY MIN(StartDate)),
MIN(StartDate),
MAX(EndDate)
from
(
SELECT
*,
SUM(CASE WHEN PrecedingEndDate >= StartDate THEN 0 ELSE 1 END)
OVER (ORDER BY StartDate, EndDate)
AS GroupID
FROM
(
SELECT
*,
MAX(EndDate)
OVER (ORDER BY StartDate, EndDate
ROWS BETWEEN UNBOUNDED PRECEDING
AND 1 PRECEDING
)
AS PrecedingEndDate
FROM
Table1
)
look_back
)
grouped
GROUP BY
GroupID
This is a form of the gaps and islands problem.
In this case, exists and cumulative sum and group by are the route to the solution:
select row_number() over (order by min(startdate)),
min(startdate), max(enddate)
from (select t1.*, sum(isstart) over (order by startdate) as grp
from (select t1.*,
(case when exists (select 1
from table1 tt1
where tt1.startdate <= t1.enddate and tt1.enddate >= t1.startdate and tt1.id <> t1.id
)
then 0 else 1
end) as isstart
from table1 t1
) t1
) t1
group by grp;

Calculate multiple shifts time difference in SQL Server

I have a question about SQL Server. Please tell me how to solve login and logout time calculation in SQL Server based on conditions.
if same empid will work multiple shifts, multiple shifts calculation for same date must be time difference is 5 hours then that date consider as multiple shifts for that emp
OnFloor time how much time he spend
OffFloor time how much time he spend
if logout is missed then consider as taligate is 1 or -1
Sample input data :
CREATE TABLE [dbo].[emplogindetails]
(
[Emp ID] [float] NULL,
[Area Of Access] [nvarchar](255) NULL,
[Time] [datetime] NULL
) ON [PRIMARY]
GO
INSERT [dbo].[emplogindetails] ([Emp ID], [Area Of Access], [Time])
VALUES (1, N'K4F White Rm IN', CAST(N'2017-08-02T09:00:00.000' AS DateTime))
INSERT [dbo].[emplogindetails] ([Emp ID], [Area Of Access], [Time])
VALUES (1, N'K4FWhite Rm OUT', CAST(N'2017-08-02T10:30:00.000' AS DateTime))
INSERT [dbo].[emplogindetails] ([Emp ID], [Area Of Access], [Time])
VALUES (1, N'K4F White Rm IN', CAST(N'2017-08-03T09:30:00.000' AS DateTime))
INSERT [dbo].[emplogindetails] ([Emp ID], [Area Of Access], [Time])
VALUES (1, N'K4FWhite Rm OUT', CAST(N'2017-08-03T12:30:00.000' AS DateTime))
INSERT [dbo].[emplogindetails] ([Emp ID], [Area Of Access], [Time])
VALUES (1, N'K4F White Rm IN', CAST(N'2017-08-03T12:40:00.000' AS DateTime))
INSERT [dbo].[emplogindetails] ([Emp ID], [Area Of Access], [Time])
VALUES (1, N'K4FWhite Rm OUT', CAST(N'2017-08-03T17:10:00.000' AS DateTime))
INSERT [dbo].[emplogindetails] ([Emp ID], [Area Of Access], [Time])
VALUES (1, N'K4F White Rm IN', CAST(N'2017-08-03T06:30:00.000' AS DateTime))
INSERT [dbo].[emplogindetails] ([Emp ID], [Area Of Access], [Time])
VALUES (1, N'K4FWhite Rm OUT', CAST(N'2017-08-03T08:30:00.000' AS DateTime))
INSERT [dbo].[emplogindetails] ([Emp ID], [Area Of Access], [Time])
VALUES (1, N'K4F White Rm IN', CAST(N'2017-08-05T23:30:00.000' AS DateTime))
INSERT [dbo].[emplogindetails] ([Emp ID], [Area Of Access], [Time])
VALUES (1, N'K4FWhite Rm OUT', CAST(N'2017-08-06T01:55:00.000' AS DateTime))
INSERT [dbo].[emplogindetails] ([Emp ID], [Area Of Access], [Time])
VALUES (1, N'K4F White Rm IN', CAST(N'2017-08-06T02:15:00.000' AS DateTime))
INSERT [dbo].[emplogindetails] ([Emp ID], [Area Of Access], [Time])
VALUES (1, N'K4FWhite Rm OUT', CAST(N'2017-08-06T06:10:00.000' AS DateTime))
INSERT [dbo].[emplogindetails] ([Emp ID], [Area Of Access], [Time])
VALUES (1, N'K4F White Rm IN', CAST(N'2017-08-02T11:00:00.000' AS DateTime))
INSERT [dbo].[emplogindetails] ([Emp ID], [Area Of Access], [Time])
VALUES (1, N'K4FWhite Rm OUT', CAST(N'2017-08-02T12:00:00.000' AS DateTime))
INSERT [dbo].[emplogindetails] ([Emp ID], [Area Of Access], [Time])
VALUES (1, N'K4F White Rm IN', CAST(N'2017-08-02T13:00:00.000' AS DateTime))
INSERT [dbo].[emplogindetails] ([Emp ID], [Area Of Access], [Time])
VALUES (1, N'K4F White Rm IN', CAST(N'2017-08-06T14:01:00.000' AS DateTime))
INSERT [dbo].[emplogindetails] ([Emp ID], [Area Of Access], [Time])
VALUES (1, N'K4FWhite Rm OUT', CAST(N'2017-08-06T15:01:00.000' AS DateTime))
INSERT [dbo].[emplogindetails] ([Emp ID], [Area Of Access], [Time])
VALUES (1, N'K4F White Rm IN', CAST(N'2017-08-06T15:20:00.000' AS DateTime))
INSERT [dbo].[emplogindetails] ([Emp ID], [Area Of Access], [Time])
VALUES (1, N'K4FWhite Rm OUT', CAST(N'2017-08-06T20:01:00.000' AS DateTime))
GO
Based on above data 4 th dates is holiday and he entered 5th date night shift and logout 6th date and same 6 th date he did another shift.
I want an output like below:
ShiftDate |ShitStartTime |ShiftEndTime |Total_Time |OnFloor |OffFloor |EmpID |Incount |OutCount |Tailgate
08/05/2017 |2017-08-05 23:30:00.000 |2017-08-06 06:10:00.000 |6:40:00 |06:00:00 |00:40:00 |1 |2 | 2 | 0
08/02/2017 |2017-08-02 09:00:00.000 |2017-08-02 13:00:00.000 |04:00:00 |02:30:00 |01:30:00 |1 |3 | 2 | 1
08/03/2017 |2017-08-03 06:30:00.000 |2017-08-03 17:10:00.000 |10:40:00 |09:30:00 |01:10:00 |1 |3 | 3 | 0
08/06/2017 |2017-08-06 14:01:00.000 |2017-08-06 20:01:00.000 |6:00:00 |05:10:00 |00:50:00 |1 |2 | 2 | 0
I tried like this:
select
isnull( ShiftDate ,'1900-01-01')as ShiftDate ,
isnull( min(logintime) ,'1900-01-01') as ShitStartTime,
max( case when logouttime is null then '1900-01-01' else logouttime end )ShiftEndTime ,
convert(varchar(8),dateadd(ss,sum(datediff(second,0,dateadd(day,0,Total_Time))),0),108) Total_Time,
convert(varchar(8),dateadd(ss,sum(datediff(second,0,dateadd(day,0,OnFloor))),0),108) OnFloor,
convert(varchar(8),dateadd(ss,sum(datediff(second,0,dateadd(day,0,OffFloor))),0),108) OffFloor,
EmpID ,Incount ,OutCount, Tailgate
from (
select
CONVERT(VARCHAR(12), ( OffFloor + cast ( OnFloor as int)) / 60 / 60 % 24)
+':'+ CONVERT(VARCHAR(2), (OffFloor + cast ( OnFloor as int)) / 60 % 60)
+':'+ CONVERT(VARCHAR(2), (OffFloor + cast ( OnFloor as int)) % 60) as Total_Time
,case when convert( varchar(10),OnFloor )='0' then '0:0:0' else OnFloor end OnFloor
, CONVERT(VARCHAR(12), (OffFloor) / 60 / 60 % 24) +':'+ CONVERT(VARCHAR(2), (OffFloor) / 60 % 60)
+':'+ CONVERT(VARCHAR(2), (OffFloor) % 60) AS OffFloor
,[Emp ID] ,[Area Of Access],status,logintime,logouttime
from (
select isnull( DATEDIFF(SECOND, a.logintime, a.logouttime) ,0) OffFloor , cast ( '0' as varchar) as OnFloor
,[Emp ID] ,[Area Of Access],status,logintime,logouttime
from (
SELECT o.time logouttime,i.Time logintime,i.[Emp ID]
,substring ( i.[Area Of Access] ,charindex('out',i.[Area Of Access]),len(i.[Area Of Access])) status
,i.[Area Of Access]
FROM test.dbo.emplogindetails i left join test.dbo.emplogindetails o
on i.[emp id] = o.[emp id]
AND CONVERT(date, i.time) = CONVERT(date, o.time)
AND o.time > i.time
AND substring ( o.[Area Of Access] ,charindex('in',o.[Area Of Access]),len(o.[Area Of Access]))='in'
and substring ( i.[Area Of Access] ,charindex('out',i.[Area Of Access]),len(i.[Area Of Access]))='out'
and o.Time=(SELECT MIN(o2.time)
FROM test.dbo.emplogindetails o2
WHERE o2.time > i.time
and o2.[Emp ID]=i.[Emp ID]
---and [emp id]='105828'
)
--where i.[emp id]='105828'
)a where a.status='out')a
union all
select CONVERT(VARCHAR(12), (OffFloor + OnFloor) / 60 / 60 % 24)
+':'+ CONVERT(VARCHAR(2), (OffFloor + OnFloor) / 60 % 60)
+':'+ CONVERT(VARCHAR(2), (OffFloor + OnFloor) % 60) as Calculated_Time
, CONVERT(VARCHAR(12), (OnFloor) / 60 / 60 % 24)
+':'+ CONVERT(VARCHAR(2), (OnFloor) / 60 % 60)+':'+ CONVERT(VARCHAR(2), (OnFloor) % 60) AS OnFloor
, case when convert( varchar(10),OffFloor) ='0' then '0:0:0' else OffFloor end OffFloor
,[Emp ID] ,[Area Of Access],
status,logintime,logouttime
from (
select '0' as OffFloor, isnull( DATEDIFF(SECOND, a.logintime, a.logouttime) ,0) OnFloor
,[Emp ID] ,[Area Of Access],status
,logintime,logouttime
from (
SELECT o.time logouttime,i.Time logintime,i.[Emp ID]
,substring ( i.[Area Of Access] ,charindex('in',i.[Area Of Access]),len(i.[Area Of Access])) status
,i.[Area Of Access]
FROM test.dbo.emplogindetails i left join test.dbo.emplogindetails o
on i.[emp id] = o.[emp id]
AND CONVERT(date, i.time) = CONVERT(date, o.time)
AND o.time > i.time
AND substring ( o.[Area Of Access] ,charindex('out',o.[Area Of Access]),len(o.[Area Of Access]))='out'
and substring ( i.[Area Of Access] ,charindex('in',i.[Area Of Access]),len(i.[Area Of Access]))='in'
and o.Time=(SELECT MIN(o2.time)
FROM test.dbo.emplogindetails o2
WHERE o2.time > i.time
and o2.[Emp ID]=i.[Emp ID]
---and [emp id]='105828'
)
---where i.[emp id]='105828'
)a where a.status='in')stag)stag
join
----get incount and outcount and tailgate information
(select [emp id]as empid,incount,outcount,
isnull( incount-outcount ,0) as Tailgate ,Date as ShiftDate
from (
select
i.[Emp ID] ,convert(varchar(10),time,101) as Date,
count( case when substring ( i.[Area Of Access] ,charindex('in',i.[Area Of Access]),len(i.[Area Of Access]))='in'
then 'in' end )Incount
,count( case when substring ( i.[Area Of Access] ,charindex('out',i.[Area Of Access]),len(i.[Area Of Access]))='out'
then 'out' end )outcount
FROM test.dbo.emplogindetails i
--where [emp id]='105828'
group by i.[Emp ID],convert(varchar(10),time,101)
)cnt)cnt
on stag.[Emp ID]=cnt.empid and convert(varchar(10),stag.logintime,101)=cnt.ShiftDate
group by EmpID ,Incount ,OutCount, Tailgate
,isnull( ShiftDate ,'1900-01-01')
This query is not returning the expected result when same date have multiple shifts calculation if logout and login time difference more than 5 hours then consider as next shift.
as per given below logic not working for empid=5 and data looking like below.
Hi ,one records is failed as per our logic .some changes is required in logic. I did not get expected ouput for below example.
INSERT [dbo].[emplogindetails] ([Emp ID], [Area Of Access], [Time]) VALUES (5, N'K4F White Rm IN', CAST(N'2017-08-02T23:30:00.000' AS DateTime))
GO
INSERT [dbo].[emplogindetails] ([Emp ID], [Area Of Access], [Time]) VALUES (5, N' K4FWhite Rm OUT', CAST(N'2017-08-03T01:30:00.000' AS DateTime))
GO
INSERT [dbo].[emplogindetails] ([Emp ID], [Area Of Access], [Time]) VALUES (5, N'K4F White Rm IN', CAST(N'2017-08-03T01:40:00.000' AS DateTime))
GO
INSERT [dbo].[emplogindetails] ([Emp ID], [Area Of Access], [Time]) VALUES (5, N'K4F White Rm OUT', CAST(N'2017-08-03T04:00:00.000' AS DateTime))
GO
INSERT [dbo].[emplogindetails] ([Emp ID], [Area Of Access], [Time]) VALUES (5, N'K4F White Rm IN', CAST(N'2017-08-03T04:30:00.000' AS DateTime))
GO
INSERT [dbo].[emplogindetails] ([Emp ID], [Area Of Access], [Time]) VALUES (5, N'K4F White Rm OUT', CAST(N'2017-08-03T06:00:00.000' AS DateTime))
GO
Please tell me how to achieve this task in SQL Server
This query returns your expected output
select
cast(ShitStartTime as date) ShiftDate, ShitStartTime, ShiftEndTime
, concat(right(concat('0', tTime/60), 2), ':', right(concat('0',tTime%60), 2)) Total_Time
, concat(right(concat('0', onF/60), 2), ':', right(concat('0',onF%60), 2)) OnFloor
, concat(right(concat('0', offF/60), 2), ':', right(concat('0',offF%60), 2)) OffFloor
, [Emp Id], Incount, OutCount, Tailgate
from (
select
[Emp Id], min(Time) ShitStartTime, max(Time) ShiftEndTime, sum(iif([Area Of Access] = 'K4F White Rm IN', diff, 0)) onF
, sum(iif([Area Of Access] = 'K4FWhite Rm OUT', diff, 0)) offF, sum(diff) tTime
, sum(iif([Area Of Access] = 'K4F White Rm IN', 1, 0)) Incount
, sum(iif([Area Of Access] = 'K4FWhite Rm OUT', 1, 0)) OutCount
, max(Tailgate) Tailgate
from (
select
*, datediff(mi, Time, lead(Time) over (partition by [Emp Id], group_ order by Time)) diff
, iif(Time = max(Time) over (partition by [Emp Id], group_) and [Area Of Access] = 'K4F White Rm IN', 1, 0) Tailgate
from (
select
*, sum(gr) over (partition by [Emp Id] order by Time rows unbounded preceding) group_
from (
select
*, iif(datediff(hh, lag(Time) over (partition by [Emp Id] order by Time), Time) <= 5, 0, 1) gr
from
emplogindetails
) t
) t
) t
group by [Emp Id], group_
) t

Capturing Wait Statistics

Based on https://www.simple-talk.com/books/sql-books/sql-server-performance-tuning-using-wait-statistics-a-beginners-guide/ (page 34), I'm trying to create table and fill it with Wait Stats.
IF NOT EXISTS (
SELECT *
FROM [sys].[tables]
WHERE [name]=N'WaitStats'AND[type] =N'U'
)
CREATE TABLE [dbo].[T_WaitStats](
[RowNum] [BIGINT] IDENTITY(1, 1) ,
[CaptureDate] [DATETIME] ,
[WaitType] [NVARCHAR](120) ,
[Wait_S] [DECIMAL](14, 2) ,
[Resource_S] [DECIMAL](14, 2),
[Signal_S] [DECIMAL](14, 2) ,
[WaitCount] [BIGINT] ,
[Percentage] [DECIMAL](4, 2) ,
[AvgWait_S] [DECIMAL](14, 2),
[AvgRes_S] [DECIMAL](14, 2),
[AvgSig_S] [DECIMAL](14, 2));
GO
INSERT INTO dbo.T_WaitStats([WaitType])
VALUES ('Wait Statistics for '+ CAST(GETDATE() AS NVARCHAR(19)));
INSERT INTO dbo.T_WaitStats(
[CaptureDate],
[WaitType],
[Wait_S],
[Resource_S],
[Signal_S],
[WaitCount],
[Percentage],
[AvgWait_S],
[AvgRes_S],
[AvgSig_S]
)
EXEC(
'WITH [Waits] AS(
SELECT
[wait_type],
[wait_time_ms] / 1000.0 AS [Wait_S],
([wait_time_ms] - [signal_wait_time_ms]) / 1000.0 AS [Resource_S],
[signal_wait_time_ms] / 1000.0 AS [Signal_S],
[waiting_tasks_count] AS [WaitCount],
100.0 * [wait_time_ms] / SUM ([wait_time_ms]) OVER() AS [Percentage],
ROW_NUMBER() OVER(ORDER BY [wait_time_ms] DESC) AS [RowNum]
FROM sys.dm_os_wait_stats
WHERE [wait_type] NOT IN (
N''BROKER_EVENTHANDLER'', N''BROKER_RECEIVE_WAITFOR'',
N''BROKER_TASK_STOP'', N''BROKER_TO_FLUSH'',
N''BROKER_TRANSMITTER'', N''CHECKPOINT_QUEUE'',
N''CHKPT'', N''CLR_AUTO_EVENT'',
N''CLR_MANUAL_EVENT''
)
)
SELECT
GETDATE() AS [CaptureDate],
[W1].[wait_type] AS [WaitType],
CAST ([W1].[Wait_S] AS DECIMAL(14, 2)) AS [Wait_S],
CAST ([W1].[Resource_S] AS DECIMAL(14, 2)) AS [Resource_S],
CAST ([W1].[Signal_S] AS DECIMAL(14, 2)) AS [Signal_S],
[W1].[WaitCount] AS [WaitCount],
CAST ([W1].[Percentage] AS DECIMAL(4, 2)) AS [Percentage],
CAST (([W1].[Wait_S] / [W1].[WaitCount]) AS DECIMAL (14, 4))
AS [AvgWait_S],
CAST (([W1].[Resource_S] / [W1].[WaitCount]) AS DECIMAL (14, 4))
AS [AvgWait_S],
CAST (([W1].[Resource_S] / [W1].[WaitCount]) AS DECIMAL (14, 4))
AS [AvgRes_S],
CAST (([W1].[Signal_S] / [W1].[WaitCount]) AS DECIMAL (14, 4))
AS [AvgSig_S]
FROM [Waits] AS [W1]
INNER JOIN [Waits] AS [W2]
ON [W2].[RowNum] <= [W1].[RowNum]
GROUP BY [W1].[RowNum], [W1].[wait_type], [W1].[Wait_S],
[W1].[Resource_S], [W1].[Signal_S], [W1].[WaitCount],
[W1].[Percentage]
HAVING SUM ([W2].[Percentage]) - [W1].[Percentage] < 95;'
);
First insert works, but second give me an error:
(1 row(s) affected) Msg 213, Level 16, State 7, Line 1 Column name or
number of supplied values does not match table definition.
Running second statement without insert (only EXEC part) works.
What might cause this error?
copy-paste in your query (returns 11 columns, but table expected 10 columns)
CAST (([W1].[Resource_S] / [W1].[WaitCount]) AS DECIMAL (14, 4)) AS [AvgWait_S],
query -
IF OBJECT_ID('dbo.T_WaitStats', 'U') IS NOT NULL
DROP TABLE dbo.T_WaitStats
GO
CREATE TABLE [dbo].[T_WaitStats] (
RowNum INT IDENTITY (1, 1) PRIMARY KEY,
CaptureDate DATETIME,
WaitType NVARCHAR(120),
Wait_S DECIMAL(14,2),
Resource_S DECIMAL(14,2),
Signal_S DECIMAL(14,2),
WaitCount BIGINT,
Percentage DECIMAL(4,2),
AvgWait_S DECIMAL(14,2),
AvgRes_S DECIMAL(14,2),
AvgSig_S DECIMAL(14,2)
)
GO
INSERT INTO dbo.T_WaitStats ([WaitType])
VALUES ('Wait Statistics for ' + CAST(GETDATE() AS NVARCHAR(19)))
INSERT INTO dbo.T_WaitStats (CaptureDate, WaitType, Wait_S, Resource_S, Signal_S, WaitCount, Percentage, AvgWait_S, AvgRes_S, AvgSig_S)
EXEC ('WITH [Waits] AS(
SELECT
[wait_type],
[wait_time_ms] / 1000.0 AS [Wait_S],
([wait_time_ms] - [signal_wait_time_ms]) / 1000.0 AS [Resource_S],
[signal_wait_time_ms] / 1000.0 AS [Signal_S],
[waiting_tasks_count] AS [WaitCount],
100.0 * [wait_time_ms] / SUM ([wait_time_ms]) OVER() AS [Percentage],
ROW_NUMBER() OVER(ORDER BY [wait_time_ms] DESC) AS [RowNum]
FROM sys.dm_os_wait_stats
WHERE [wait_type] NOT IN (
N''BROKER_EVENTHANDLER'', N''BROKER_RECEIVE_WAITFOR'',
N''BROKER_TASK_STOP'', N''BROKER_TO_FLUSH'',
N''BROKER_TRANSMITTER'', N''CHECKPOINT_QUEUE'',
N''CHKPT'', N''CLR_AUTO_EVENT'',
N''CLR_MANUAL_EVENT''
)
)
SELECT
GETDATE() AS [CaptureDate],
[W1].[wait_type] AS [WaitType],
[W1].[Wait_S] AS [Wait_S],
[W1].[Resource_S] AS [Resource_S],
[W1].[Signal_S] AS [Signal_S],
[W1].[WaitCount] AS [WaitCount],
[W1].[Percentage] AS [Percentage],
[W1].[Wait_S] / [W1].[WaitCount] AS [AvgWait_S],
[W1].[Resource_S] / [W1].[WaitCount] AS [AvgRes_S],
[W1].[Signal_S] / [W1].[WaitCount] AS [AvgSig_S]
FROM [Waits] AS [W1]
INNER JOIN [Waits] AS [W2]
ON [W2].[RowNum] <= [W1].[RowNum]
GROUP BY [W1].[RowNum], [W1].[wait_type], [W1].[Wait_S],
[W1].[Resource_S], [W1].[Signal_S], [W1].[WaitCount],
[W1].[Percentage]
HAVING SUM ([W2].[Percentage]) - [W1].[Percentage] < 95;'
);

How to calculate peak-valley drawdown with SQL Server 2012?

I'm wondering if some of the new SQL Server 2012 functions would help with this problem. Here's my DDL and sample data
CREATE TABLE [dbo].[transactions]
(
[transactionId] [int] NOT NULL,
[dt] [datetime] NOT NULL,
[balance] [decimal](22, 6) NULL
);
GO
INSERT [dbo].[transactions] ([transactionId], [dt], [balance]) VALUES
(174, CAST(0x0000A19600000000 AS DateTime), CAST(1000.000000 AS Decimal(22, 6))),
(178, CAST(0x0000A19700869520 AS DateTime), CAST(1100.000000 AS Decimal(22, 6))),
(179, CAST(0x0000A19700933780 AS DateTime), CAST(1212.000000 AS Decimal(22, 6))),
(180, CAST(0x0000A19700B4B9A0 AS DateTime), CAST(1342.000000 AS Decimal(22, 6))),
(181, CAST(0x0000A19700BB0AD0 AS DateTime), CAST(1198.000000 AS Decimal(22, 6))),
(182, CAST(0x0000A19700E67030 AS DateTime), CAST(1234.000000 AS Decimal(22, 6))),
(183, CAST(0x0000A19700F358E0 AS DateTime), CAST(900.000000 AS Decimal(22, 6))),
(184, CAST(0x0000A19700F58B60 AS DateTime), CAST(876.000000 AS Decimal(22, 6))),
(185, CAST(0x0000A19700F9AA10 AS DateTime), CAST(889.000000 AS Decimal(22, 6))),
(186, CAST(0x0000A19701034700 AS DateTime), CAST(1133.000000 AS Decimal(22, 6))),
(187, CAST(0x0000A19A0089E0E0 AS DateTime), CAST(1400.000000 AS Decimal(22, 6))),
(191, CAST(0x0000A19A009450C0 AS DateTime), CAST(1566.000000 AS Decimal(22, 6))),
(192, CAST(0x0000A19A00A5E4C0 AS DateTime), CAST(1800.000000 AS Decimal(22, 6))),
(188, CAST(0x0000A19A00AA49C0 AS DateTime), CAST(1900.000000 AS Decimal(22, 6))),
(189, CAST(0x0000A19A00B54640 AS DateTime), CAST(1456.000000 AS Decimal(22, 6))),
(190, CAST(0x0000A19A00CAB2A0 AS DateTime), CAST(1234.000000 AS Decimal(22, 6))),
(193, CAST(0x0000A19A00F12660 AS DateTime), CAST(1400.000000 AS Decimal(22, 6))),
(195, CAST(0x0000A19A010087E0 AS DateTime), CAST(1444.000000 AS Decimal(22, 6))),
(196, CAST(0x0000A19E00C7F380 AS DateTime), CAST(1556.000000 AS Decimal(22, 6))),
(197, CAST(0x0000A19E00FE5560 AS DateTime), CAST(1975.000000 AS Decimal(22, 6)));
I am after the largest percentage peak-valley drawdown of the balance for the series, ordered by dt. The peak to valley drawdown is the greatest percent change in a high in the balance to the lowest low before the previous high is crossed. Better described here http://www.investopedia.com/terms/p/peak-to-valley-drawdown.asp In this data set we have two drawdowns.
First one is from 1342.00 to 876.00 (-34.72%) and the second one from 1900 to 1234 (-35.05%)
The biggest peak to valley percent drawdown in this set therefore, is -35.05%. I need a SQL Server query that can provide this value. Would rather not have to use temp tables if possible. Any ideas?
I don't know that any SQL Server 2012 functionality will get this value any more succinctly or efficiently than this:
;WITH x AS
(
SELECT [drop] = ((s.balance-e.balance)*100.0/s.balance)
FROM dbo.transactions AS s
INNER JOIN dbo.transactions AS e
ON s.transactionId < e.transactionId
AND s.balance > e.balance
)
SELECT [Largest Drawdown] = -MAX([drop]) FROM x;
Result:
Largest Drawdown
----------------
-35.05263157894
I do confess, though, that this works for your sample data only because your valleys are convenient for the problem you want to solve. If you change the 4th-last row to 875 this query considers that a part of the set. In other words, I've calculated drawdown here for the entire range, rather than just the range until the high is crossed again.
I suspect there is a better way to solve this query using gap/island techniques and I will try to return to it when I can focus on it adequately.
This will miss if the first entry is a peak
;with trnsCTE (ID,bal) AS
( -- get seqential ID
SELECT ROW_NUMBER() OVER (ORDER BY DT) as ID, [balance]
from [transactions]
),
trnsCTE2 (ID,bal) AS
( -- any peaks
select t2.ID, t2.bal
from trnsCTE as T1
join trnsCTE as T2
on ( t2.ID = t1.ID+1
and t2.bal > t1.bal )
join trnsCTE as T3
on t3.ID = t2.ID+1
and t3.bal < t1.bal
)
,
trnsCTE3 (ID,bal) AS
( -- get first peak and then bigger peaks only
SELECT distinct T1.ID, T1.BAL
from trnsCTE2 as T1
where T1.ID = (select min(ID) from trnsCTE2)
or T1.bal > (select max(bal) from trnsCTE2 where trnsCTE2.ID < t1.ID)
)
-- calculate
select t1.id, t1.bal, min(trnsCTE.bal), (t1.bal - min(trnsCTE.bal)) * 100 / t1.bal
from trnsCTE
join trnsCTE3 t1
on t1.id < trnsCTE.id
and ( trnsCTE.id < (select min(id) from trnsCTE3 where id > t1.id)
or
t1.id = ( select max(id) from trnsCTE3 ) )
group by t1.id, t1.bal
order by t1.id
This translates directly to a #temps
Did not use #temp at OP said did not want to user #temp
insert into #trnsCTE (ID,bal)
SELECT ROW_NUMBER() OVER (ORDER BY DT) as ID, [balance]
from [transactions]
select peak_dt, peak_balance, trough_dt, trough_balance, (peak_balance - trough_balance) * 100.0 / peak_balance as drawdown
from (
select dt as peak_dt, balance as peak_balance, nullif(last_value(dt) over (partition by peak_valley_group order by dt rows between unbounded preceding and unbounded following), dt) as trough_dt, nullif(last_value(balance) over (partition by peak_valley_group order by dt rows between unbounded preceding and unbounded following), balance) as trough_balance, isPeak
from (
select *, sum(isPeak) over (order by dt) as peak_valley_group
from (
select dt, balance, (case when forward_trend = -1 then 1 else 0 end) as isPeak, max(balance) over (partition by forward_trend order by dt) as current_max_balance
from (
-- Nulls for lead/lag here produce the desired result
select *, (case when lead(balance, 1) over (order by dt) > balance then 1 else -1 end) as forward_trend, (case when lag(balance, 1) over (order by dt) > balance then 1 else -1 end) as backward_trend
from transactions
) t
where forward_trend = backward_trend
) t
where (isPeak = 1 and balance = current_max_balance)
or isPeak = 0
) t
) t
where isPeak = 1
order by peak_dt

Resources