How to reset the cumulative sum based on condition? - sql-server

Sample data:
CREATE TABLE [dbo].[agent_sales]
(
[date] [date] NULL,
[agent] [nvarchar](50) NULL,
[sale] [int] NULL
) ON [PRIMARY]
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-01-03' AS Date), N'Agent A', 10)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-02-05' AS Date), N'Agent A', 5)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-03-10' AS Date), N'Agent A', 20)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-04-10' AS Date), N'Agent A', 2)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-01-05' AS Date), N'Agent B', 5)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-02-06' AS Date), N'Agent B', 28)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-03-10' AS Date), N'Agent B', 5)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-04-10' AS Date), N'Agent B', 10)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-01-02' AS Date), N'Agent C', 35)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-02-04' AS Date), N'Agent C', 25)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-03-08' AS Date), N'Agent C', 15)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-04-10' AS Date), N'Agent C', 10)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-01-01' AS Date), N'Agent D', 5)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-02-02' AS Date), N'Agent D', 35)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-03-10' AS Date), N'Agent D', 31)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-04-10' AS Date), N'Agent D', 10)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-01-01' AS Date), N'Agent E', 32)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-02-01' AS Date), N'Agent E', 0)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-03-10' AS Date), N'Agent E', 20)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-04-10' AS Date), N'Agent E', 12)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-01-01' AS Date), N'Agent F', 32)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-02-02' AS Date), N'Agent F', 9)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-03-10' AS Date), N'Agent F', 11)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-04-10' AS Date), N'Agent F', 12)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-01-01' AS Date), N'Agent G', 32)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-02-02' AS Date), N'Agent G', 0)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-03-10' AS Date), N'Agent G', 20)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-04-10' AS Date), N'Agent G', 8)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-01-01' AS Date), N'Agent H', 32)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-03-10' AS Date), N'Agent H', 20)
GO
INSERT [dbo].[agent_sales] ([date], [agent], [sale]) VALUES (CAST(N'2021-04-10' AS Date), N'Agent H', 8)
SELECT statement output:
select date, agent, sale
from agent_sales
date agent sales
--------------------------------------
2021-01-03 00:00:00.000 Agent A 10
2021-02-05 00:00:00.000 Agent A 5
2021-03-10 00:00:00.000 Agent A 20
2021-04-10 00:00:00.000 Agent A 2
2021-01-05 00:00:00.000 Agent B 5
2021-02-06 00:00:00.000 Agent B 28
2021-03-10 00:00:00.000 Agent B 5
2021-04-10 00:00:00.000 Agent B 10
2021-01-02 00:00:00.000 Agent C 35
2021-02-04 00:00:00.000 Agent C 25
2021-03-08 00:00:00.000 Agent C 15
2021-04-10 00:00:00.000 Agent C 10
2021-01-01 00:00:00.000 Agent D 5
2021-02-02 00:00:00.000 Agent D 35
2021-03-10 00:00:00.000 Agent D 31
2021-04-10 00:00:00.000 Agent D 10
2021-01-01 00:00:00.000 Agent E 32
2021-02-02 00:00:00.000 Agent E 0
2021-03-10 00:00:00.000 Agent E 20
2021-04-10 00:00:00.000 Agent E 12
2021-01-01 00:00:00.000 Agent F 32
2021-02-02 00:00:00.000 Agent F 9
2021-03-10 00:00:00.000 Agent F 11
2021-04-10 00:00:00.000 Agent F 12
2021-01-01 00:00:00.000 Agent G 32
2021-02-02 00:00:00.000 Agent G 0
2021-03-10 00:00:00.000 Agent G 20
2021-04-10 00:00:00.000 Agent G 8
2021-01-01 00:00:00.000 Agent H 32
2021-03-10 00:00:00.000 Agent H 20
2021-04-10 00:00:00.000 Agent H 8
I want to get the counts of agents who have crossed 30 sales cumulatively summed, but the counter (cumulative sum logic) should get reset if an agent has not made 30 sales in last 45 days.
Expected output:
YrMon
Count_Agent_more_than_30_sales
Jan21
5
Feb21
7
Mar21
5
Apr21
6
Logic:
Jan21 - 5 since C, E, F, G, H cross 30.
Feb21 - 7 since B, C, D, E, F, G, H cumulatively cross 30.
Mar21 - 5 since A, B, C, D, F cumulatively cross 30. Where as E, G, H are excluded because it has been 45 days since the last entry cumulatively crossing 30 sales.
Apr21 - 6 since A, B, C, D, E, F cumulatively cross 30. Where as G, H are excluded because it has been 45 days since the last entry cumulatively crossing 30 sales.
My query to calculate sum over period:
;WITH CTE AS
(
SELECT
CAST(YEAR([DATE]) AS VARCHAR) + ' ' + CAST(MONTH([DATE]) AS VARCHAR) YRMON,
[DATE], AGENT, SUM(SALE) SALES
FROM
agent_sales
GROUP BY
CAST(YEAR([DATE]) AS VARCHAR) + ' ' + CAST(MONTH([DATE]) AS VARCHAR),
AGENT, [DATE]
)
SELECT
*,
SUM(SALES) OVER(PARTITION BY AGENT ORDER BY YRMON) SUMOVERPERIOD
FROM CTE
ORDER BY 3,2
Now I am trying to apply the logic on the calculated sum:
;WITH CTE AS (SELECT CAST(YEAR([DATE]) AS VARCHAR)+' '+CAST(MONTH([DATE]) AS VARCHAR) YRMON, [DATE], AGENT, SUM(SALE) SALES
FROM agent_sales
GROUP BY CAST(YEAR([DATE]) AS VARCHAR)+' '+CAST(MONTH([DATE]) AS VARCHAR), [DATE], AGENT
)
SELECT *, SUM(SALES) OVER(PARTITION BY AGENT ORDER BY YRMON) SUMOVERPERIOD,
CASE WHEN SUM(SALES) OVER(PARTITION BY AGENT ORDER BY YRMON)>30 THEN 1 ELSE 0 END AS CALC
FROM CTE
ORDER BY 3,2
This is giving me the cumulative sum.
To check cumulative sum based on 45 day:
;WITH CTE AS (SELECT CAST(YEAR([DATE]) AS VARCHAR)+' '+CAST(MONTH([DATE]) AS VARCHAR) YRMON, [DATE], AGENT, SUM(SALE) SALES
FROM agent_sales
GROUP BY CAST(YEAR([DATE]) AS VARCHAR)+' '+CAST(MONTH([DATE]) AS VARCHAR), [DATE], AGENT
)
SELECT *, SUM(SALES) OVER(PARTITION BY AGENT ORDER BY YRMON) SUMOVERPERIOD,
CASE WHEN SUM(SALES) OVER(PARTITION BY AGENT ORDER BY YRMON)>30 THEN 1 ELSE 0 END AS CUMULATIVE_ABOVE_30,
[DATE],LAG([DATE],1,[DATE]) OVER(PARTITION BY AGENT ORDER BY [DATE]) [LAG],
DATEDIFF(DAY,LAG([DATE],1,[DATE]) OVER(PARTITION BY AGENT ORDER BY [DATE]),[DATE]) [DDIFF],
CASE WHEN SUM(SALES) OVER(PARTITION BY AGENT ORDER BY YRMON)>30 AND DATEDIFF(DAY,LAG([DATE],1,[DATE]) OVER(PARTITION BY AGENT ORDER BY [DATE]),[DATE])<46 THEN 1 ELSE 0 END AS CUMULATIVE_ABOVE_30_AND_LAST_SALE_IN_45_DAYS
FROM CTE
ORDER BY 3,2
How do I get the above query to reset the cumulative sum counter with 45 day logic? For example - Agent G should not show up in Mar and Apr.
Same SQL as above, but with Month-wise Agent Names:
;WITH CTE AS (SELECT CAST(YEAR([DATE]) AS VARCHAR)+' '+CAST(MONTH([DATE]) AS VARCHAR) YRMON, [DATE], AGENT, SUM(SALE) SALES
FROM agent_sales
GROUP BY CAST(YEAR([DATE]) AS VARCHAR)+' '+CAST(MONTH([DATE]) AS VARCHAR), [DATE], AGENT
),
CTE1 as (SELECT *, SUM(SALES) OVER(PARTITION BY AGENT ORDER BY YRMON) SUMOVERPERIOD,
CASE WHEN SUM(SALES) OVER(PARTITION BY AGENT ORDER BY YRMON)>30 THEN 1 ELSE 0 END AS CUMULATIVE_ABOVE_30,
[DATE] AS [DT],LAG([DATE],1,[DATE]) OVER(PARTITION BY AGENT ORDER BY [DATE]) [LAG],
DATEDIFF(DAY,LAG([DATE],1,[DATE]) OVER(PARTITION BY AGENT ORDER BY [DATE]),[DATE]) [DDIFF],
CASE WHEN SUM(SALES) OVER(PARTITION BY AGENT ORDER BY YRMON)>30 AND DATEDIFF(DAY,LAG([DATE],1,[DATE]) OVER(PARTITION BY AGENT ORDER BY [DATE]),[DATE])<46 THEN 1 ELSE 0 END AS CUMULATIVE_ABOVE_30_AND_LAST_SALE_IN_45_DAYS
FROM CTE)
select YRMON,AGENT FROM CTE1 WHERE CUMULATIVE_ABOVE_30_AND_LAST_SALE_IN_45_DAYS=1
Same as above, but with month-wise counts:
;WITH CTE AS (SELECT CAST(YEAR([DATE]) AS VARCHAR)+' '+CAST(MONTH([DATE]) AS VARCHAR) YRMON, [DATE], AGENT, SUM(SALE) SALES
FROM agent_sales
GROUP BY CAST(YEAR([DATE]) AS VARCHAR)+' '+CAST(MONTH([DATE]) AS VARCHAR), [DATE], AGENT
),
CTE1 as (SELECT *, SUM(SALES) OVER(PARTITION BY AGENT ORDER BY YRMON) SUMOVERPERIOD,
CASE WHEN SUM(SALES) OVER(PARTITION BY AGENT ORDER BY YRMON)>30 THEN 1 ELSE 0 END AS CUMULATIVE_ABOVE_30,
[DATE] AS [DT],LAG([DATE],1,[DATE]) OVER(PARTITION BY AGENT ORDER BY [DATE]) [LAG],
DATEDIFF(DAY,LAG([DATE],1,[DATE]) OVER(PARTITION BY AGENT ORDER BY [DATE]),[DATE]) [DDIFF],
CASE WHEN SUM(SALES) OVER(PARTITION BY AGENT ORDER BY YRMON)>30 AND DATEDIFF(DAY,LAG([DATE],1,[DATE]) OVER(PARTITION BY AGENT ORDER BY [DATE]),[DATE])<46 THEN 1 ELSE 0 END AS CUMULATIVE_ABOVE_30_AND_LAST_SALE_IN_45_DAYS
FROM CTE)
select YRMON,count(*) FROM CTE1 WHERE CUMULATIVE_ABOVE_30_AND_LAST_SALE_IN_45_DAYS=1
group by YRMON
I am unable to get query to reset the cumulative sum counter with 45 day logic.

Note: I get different results, perhaps misinterpreting one of your rules, but you will get the idea.
Try this:
DECLARE #ClosingDay int = 21
, #CicleDays int = -45
, #TargetSales int = 30
;
WITH AgentSaleCicle AS
(
SELECT
*
, CicleBegin = DATEADD(DAY, #CicleDays, CONVERT(date, LTRIM(YEAR([date])*10000+MONTH([date])*100+#ClosingDay)))
, CicleEnd = CONVERT(date, LTRIM(YEAR([date])*10000+MONTH([date])*100+#ClosingDay))
FROM [dbo].[agent_sales]
)
, AgentSaleCicleSum AS
(
SELECT
*
-- , CicleDays = DATEDIFF(DAY, CicleBegin, CicleEnd)
, CicleSales = (
SELECT SUM(Sale)
FROM [dbo].[agent_sales] IA
WHERE 1=1
AND IA.Agent = OA.Agent
AND IA.[Date] BETWEEN CicleBegin AND CicleEnd
)
FROM AgentSaleCicle OA
)
SELECT
CicleEnd
, CicleAgentCount = SUM(1)
FROM AgentSaleCicleSum
WHERE CicleSales >= #TargetSales
GROUP BY CicleEnd
ORDER BY CicleEnd

Related

T-SQL - timespan by overlapping datetime columns

I want maximum period of date range that is overlapping each other and if the period is not clashing other date ranges then I want it as it is.
I have this table:
CREATE TABLE [dbo].[table1]
(
[id] [numeric](18, 0) IDENTITY(1,1) NOT NULL,
[StartDate] [datetime] NOT NULL,
[EndDate] [datetime] NOT NULL
)
And their respective values:
INSERT INTO [dbo].[table1]
VALUES (CAST('2013-11-01 00:00:00.000' AS DateTime), CAST('2013-11-10 00:00:00.000' AS DateTime)),
(CAST('2013-11-05 00:00:00.000' AS DateTime), CAST('2013-11-15 00:00:00.000' AS DateTime)),
(CAST('2013-11-10 00:00:00.000' AS DateTime), CAST('2013-11-15 00:00:00.000' AS DateTime)),
(CAST('2013-11-10 00:00:00.000' AS DateTime), CAST('2013-11-25 00:00:00.000' AS DateTime)),
(CAST('2013-11-26 00:00:00.000' AS DateTime), CAST('2013-11-29 00:00:00.000' AS DateTime))
And expected result is:
ID StartDate EndDate
--------------------------------------------------------
1 2013-11-01 00:00:00.000 2013-11-25 00:00:00.000
2 2013-11-26 00:00:00.000 2013-11-29 00:00:00.000
Thanks in advance.
// Edit 1: Thanks.
Works, but there is a new question for breaks in the same table
INSERT INTO [dbo].[table1]
VALUES (CAST('2018-05-03 08:30:00.000' AS DateTime), CAST('2018-05-03 08:45:00.000' AS DateTime)),
(CAST('2018-05-03 08:45:00.000' AS DateTime), CAST('2018-05-03 09:30:00.000' AS DateTime)),
(CAST('2018-05-03 08:45:00.000' AS DateTime), CAST('2018-05-03 11:30:00.000' AS DateTime)),
(CAST('2018-05-03 12:45:00.000' AS DateTime), CAST('2018-05-03 13:00:00.000' AS DateTime)),
(CAST('2018-05-03 14:00:00.000' AS DateTime), CAST('2018-05-03 15:45:00.000' AS DateTime)),
(CAST('2018-05-03 14:15:00.000' AS DateTime), CAST('2018-05-03 15:30:00.000' AS DateTime))
And expected result is:
ID StartDate EndDate
--------------------------------------------------------
1 2018-05-03 08:30:00.000 2018-05-03 11:30:00.000
2 2018-05-03 12:45:00.000 2018-05-03 13:00:00.000
3 2018-05-03 14:00:00.000 2018-05-03 15:45:00.000
Very similar answer, but making use of an index and windowed functions to make the gaps and islands analysis cheaper (faster).
http://sqlfiddle.com/#!18/f19569/3
SELECT
ROW_NUMBER() OVER (ORDER BY MIN(StartDate)),
MIN(StartDate),
MAX(EndDate)
from
(
SELECT
*,
SUM(CASE WHEN PrecedingEndDate >= StartDate THEN 0 ELSE 1 END)
OVER (ORDER BY StartDate, EndDate)
AS GroupID
FROM
(
SELECT
*,
MAX(EndDate)
OVER (ORDER BY StartDate, EndDate
ROWS BETWEEN UNBOUNDED PRECEDING
AND 1 PRECEDING
)
AS PrecedingEndDate
FROM
Table1
)
look_back
)
grouped
GROUP BY
GroupID
This is a form of the gaps and islands problem.
In this case, exists and cumulative sum and group by are the route to the solution:
select row_number() over (order by min(startdate)),
min(startdate), max(enddate)
from (select t1.*, sum(isstart) over (order by startdate) as grp
from (select t1.*,
(case when exists (select 1
from table1 tt1
where tt1.startdate <= t1.enddate and tt1.enddate >= t1.startdate and tt1.id <> t1.id
)
then 0 else 1
end) as isstart
from table1 t1
) t1
) t1
group by grp;

how to retrieve same empid have multiple deptnos with latest date in sql server

I have a question about SQL Server: how to get same empid and name have different deptno values (more than one deptnos) ,again
if we found more than one deptno then consider latest entrydate in SQL Server?
example:
empid |name |deptno |entrydate |deptname
6 |x |90 |2018-01-29 |PM
6 |x |80 |2018-01-29 |lead
6 |x |150 |2018-02-09 |tech
6 |y |170 |2015-03-09 |jn
6 |y |110 |2017-12-01 |Tester
6 |y |120 |2017-12-01 |analyst
6 |z |130 |2016-10-08 |support
Here empid:6 and name: x and entrydate: 2018-01-29 have multiple deptnos but we donot need this record because latest entry date is : 2018-02-09 for same empidandname
another one : empid:6 and name: y and entrydate : 2017-12-01 have multiple deptnos .this recrods we need because entry date is latest one is 2017-12-01 for thie id and name.
another one: empid:6 and name: z and entrydate: 2016-10-08 have donot have multiple deptnos then no need to show output.
Table :
CREATE TABLE [dbo].[empcnt](
[empid] [int] NULL,
[name] [varchar](50) NULL,
[deptno] [int] NULL,
[entrydate] [date] NULL,
[deptname] [varchar](50) NULL
)
INSERT [dbo].[empcnt] ([empid], [name], [deptno], [entrydate], [deptname]) VALUES (1, N'a', 10, CAST(N'2016-12-24' AS Date), N'HR')
GO
INSERT [dbo].[empcnt] ([empid], [name], [deptno], [entrydate], [deptname]) VALUES (1, N'b', 20, CAST(N'2017-10-29' AS Date), N'HR')
GO
INSERT [dbo].[empcnt] ([empid], [name], [deptno], [entrydate], [deptname]) VALUES (3, N'd', 40, CAST(N'2015-10-10' AS Date), N'IT')
GO
INSERT [dbo].[empcnt] ([empid], [name], [deptno], [entrydate], [deptname]) VALUES (3, N'd', 40, CAST(N'2015-10-10' AS Date), N'IT')
GO
INSERT [dbo].[empcnt] ([empid], [name], [deptno], [entrydate], [deptname]) VALUES (4, N'e', 59, CAST(N'2016-12-17' AS Date), N'Finance')
GO
INSERT [dbo].[empcnt] ([empid], [name], [deptno], [entrydate], [deptname]) VALUES (3, N'u', 40, CAST(N'2016-12-15' AS Date), N'CE')
GO
INSERT [dbo].[empcnt] ([empid], [name], [deptno], [entrydate], [deptname]) VALUES (5, N'h', 60, CAST(N'2017-12-27' AS Date), N'Sales')
GO
INSERT [dbo].[empcnt] ([empid], [name], [deptno], [entrydate], [deptname]) VALUES (1, N'a', 10, CAST(N'2016-12-24' AS Date), N'HR')
GO
INSERT [dbo].[empcnt] ([empid], [name], [deptno], [entrydate], [deptname]) VALUES (1, N'b', 50, CAST(N'2017-10-29' AS Date), N'Manager')
GO
INSERT [dbo].[empcnt] ([empid], [name], [deptno], [entrydate], [deptname]) VALUES (6, N'x', 90, CAST(N'2018-01-29' AS Date), N'PM')
GO
INSERT [dbo].[empcnt] ([empid], [name], [deptno], [entrydate], [deptname]) VALUES (6, N'x', 80, CAST(N'2018-01-29' AS Date), N'lead')
GO
INSERT [dbo].[empcnt] ([empid], [name], [deptno], [entrydate], [deptname]) VALUES (6, N'y', 110, CAST(N'2017-12-01' AS Date), N'Tester')
GO
INSERT [dbo].[empcnt] ([empid], [name], [deptno], [entrydate], [deptname]) VALUES (6, N'y', 120, CAST(N'2017-12-01' AS Date), N'analyst')
GO
INSERT [dbo].[empcnt] ([empid], [name], [deptno], [entrydate], [deptname]) VALUES (6, N'z', 130, CAST(N'2016-10-08' AS Date), N'support')
GO
INSERT [dbo].[empcnt] ([empid], [name], [deptno], [entrydate], [deptname]) VALUES (6, N'x', 150, CAST(N'2018-02-09' AS Date), N'tech')
GO
INSERT [dbo].[empcnt] ([empid], [name], [deptno], [entrydate], [deptname]) VALUES (6, N'y', 170, CAST(N'2015-03-09' AS Date), N'jn')
GO
based on above data I want output like below:
empid |name |deptno |entrydate |deptname
1 |b |50 |2017-10-29 |Manager
1 |b |20 |2017-10-29 |HR
6 |y |110 |2017-12-01 |Tester
6 |y |120 |2017-12-01 |analyst
I tried like below :
SELECT *
FROM empcnt a
WHERE EXISTS
(
select empid,name
from empcnt b
WHERE a.empid = b.empid AND a.name = b.Name and a.entrydate=b.entrydate and
group by empid,name ,entrydate
having count(distinct deptno)>1
)
but above query not given expected result.
please tell me how to write query to achive this task in sql server.
using a common table expression with dense_rank() and count() over() and a distinct subquery:
;with cte as (
select
c = count(empid) over (partition by empid, name, entrydate)
, dr = dense_rank() over (partition by empid, name order by entrydate desc)
, *
from (select distinct * from empcnt) x
)
select empid, name, deptno, entrydate, deptname
from cte
where c > 1
and dr = 1
rextester demo: http://rextester.com/LTN71730
returns:
+-------+------+--------+------------+----------+
| empid | name | deptno | entrydate | deptname |
+-------+------+--------+------------+----------+
| 1 | b | 20 | 2017-10-29 | HR |
| 1 | b | 50 | 2017-10-29 | Manager |
| 6 | y | 110 | 2017-12-01 | Tester |
| 6 | y | 120 | 2017-12-01 | analyst |
+-------+------+--------+------------+----------+
Another way using exists() and not exists()
select *
from empcnt o
where exists (
select 1
from empcnt i
where i.empid = o.empid
and i.name=o.name
and i.entrydate=o.entrydate
and i.deptno<>o.deptno
)
and not exists (
select 1
from empcnt i
where i.empid=o.empid
and i.name=o.name
and i.entrydate>o.entrydate
)

SQL Server Windowing - 24 Hour Window

I have the following data
CREATE TABLE [dbo].[Test](
[CustId] [int] NULL,
[Spend] [money] NULL,
[TimeOdSpent] [datetime] NULL,
[ID] [int] IDENTITY(1,1) NOT NULL
) ON [PRIMARY]
GO
SET IDENTITY_INSERT [dbo].[Test] ON
GO
INSERT [dbo].[Test] ([CustId], [Spend], [TimeOdSpent], [ID])
VALUES (11, 400.0000, CAST(N'2016-10-27 10:00:00.000' AS DateTime), 1)
INSERT [dbo].[Test] ([CustId], [Spend], [TimeOdSpent], [ID])
VALUES (11, 200.0000, CAST(N'2016-10-27 11:00:00.000' AS DateTime), 2)
INSERT [dbo].[Test] ([CustId], [Spend], [TimeOdSpent], [ID])
VALUES (11, 400.0000, CAST(N'2016-10-28 09:00:00.000' AS DateTime), 3)
INSERT [dbo].[Test] ([CustId], [Spend], [TimeOdSpent], [ID])
VALUES (11, 500.0000, CAST(N'2016-10-28 16:00:00.000' AS DateTime), 4)
GO
SET IDENTITY_INSERT [dbo].[Test] OFF
Expected Result should be like this
1 2016-10-27 11:00:00.000 600
2 2016-10-28 09:00:00.000 1000
3 2016-10-28 16:00:00.000 900
I want to find out the instances where the spend Totals > 500 within a 24 hour period. Being trying to write a windowing query without luck
You can query as below:
Select * from (
Select *, Sm = sum(spend) over(partition by convert(date,timeofuse)) from #customer
) a
Where Sm > 500
This is the sort of thing I was looking for. I used the Sales.SalesOrderHeader table from AdventureWorks Instead of my simple table above
;WITH cte1 as
(
select
LAG(ShipDate) OVER(PARTITION By SAlesPersonId ORDER BY ShipDate) ShipDateBefore,ShipDate, SalesPersonID,SubTotal,CAST(ShipDate as Date) Date
from Sales.SalesOrderHeader
where CAST(ShipDate as DATE)<'20080710' and SalesPersonID IS NOT NULL
),cte2 as
(Select * ,DATEDIFF(ss,ShipDateBefore,ShipDate) as DiffinDays
from cte1
), cte3 as (
select * ,SUM(DiffinDays) OVER(Partition BY SalesPersonId ORDER BY ShipDate ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as RunningTime
from cte2
),cte4 as
(select
*,ISNULL(CAST((RunningTime / 86400.00) AS INT),0) Cycle
FROM cte3
)
SELECT
SalesPersonID ,SUM(SubTotal)Total,MIN(ShipDate)DurationStart,MAX(ShipDate)DurationStart
from cte4
GROUP by SalesPersonID,Cycle
Having SUM(SubTotal) > 100000.00

Group up rows based on date overlapping

In a same id, if any of row's effective date and enddate overlaps then we need group it up in a unique id
In below image dategroup is the desired output column
Data is sorted in order by ID asc, EffectiveDate ASC, EndDate Desc
CREATE TABLE #DataTable (id int , EffectiveDate datetime, Enddate Datetime )
INSERT [dbo].#DataTable ([id], [EffectiveDate], [Enddate]) VALUES (1, CAST(N'2017-01-01 00:00:00.000' AS DateTime), CAST(N'2017-01-11 00:00:00.000' AS DateTime))
GO
INSERT [dbo].#DataTable ([id], [EffectiveDate], [Enddate]) VALUES (1, CAST(N'2017-01-02 00:00:00.000' AS DateTime), CAST(N'2017-01-05 00:00:00.000' AS DateTime))
GO
INSERT [dbo].#DataTable ([id], [EffectiveDate], [Enddate]) VALUES (1, CAST(N'2017-01-03 00:00:00.000' AS DateTime), CAST(N'2017-01-12 00:00:00.000' AS DateTime))
GO
INSERT [dbo].#DataTable ([id], [EffectiveDate], [Enddate]) VALUES (1, CAST(N'2017-01-06 00:00:00.000' AS DateTime), CAST(N'2017-01-09 00:00:00.000' AS DateTime))
GO
INSERT [dbo].#DataTable ([id], [EffectiveDate], [Enddate]) VALUES (1, CAST(N'2017-01-13 00:00:00.000' AS DateTime), CAST(N'2017-01-19 00:00:00.000' AS DateTime))
GO
INSERT [dbo].#DataTable ([id], [EffectiveDate], [Enddate]) VALUES (2, CAST(N'2017-02-01 00:00:00.000' AS DateTime), CAST(N'2017-02-11 00:00:00.000' AS DateTime))
GO
INSERT [dbo].#DataTable ([id], [EffectiveDate], [Enddate]) VALUES (2, CAST(N'2017-02-06 00:00:00.000' AS DateTime), CAST(N'2017-02-16 00:00:00.000' AS DateTime))
GO
Try this, Hope it helps. Not the most attractive code but it should work. I may clean it up later if I find some time.
;WITH cte_StepOne as
(
SELECT ROW_NUMBER() OVER (ORDER BY a.[id],
a.[EffectiveDate],
a.[Enddate]) AS SeqNo,
a.[id],
a.[EffectiveDate],
a.[Enddate],
b.[id] AS OverLapID,
b.[EffectiveDate] AS [OverLapEffectiveDate],
b.[Enddate] AS [OverLapEnddate]
FROM ##DataTable a
LEFT JOIN ##DataTable b
ON a.EffectiveDate BETWEEN b.EffectiveDate
AND b.EndDate
AND a.EffectiveDate <> b.EffectiveDate
AND a.EndDate <> b.EndDate --and a.ID <> b.ID
)
,cte_StepTwo AS
(
SELECT SeqNo,
id,
EffectiveDate,
Enddate,
LEAD(OverLapEffectiveDate, 1) OVER (ORDER BY SeqNo) AS LeadValue,LAG(id, 1) OVER (ORDER BY SeqNo) AS LeadValueID,
OverLapID,
OverLapEffectiveDate,
OverLapEnddate
FROM cte_StepOne
)
,cte_Result AS
(
SELECT id,
EffectiveDate,
Enddate,
CASE
WHEN LeadValue = EffectiveDate AND OverLapEffectiveDate IS NULL THEN ID
WHEN OverLapID IS NULL THEN LeadValueID + 1
ELSE OverLapID
END AS OverLapID,
CASE
WHEN LeadValue = EffectiveDate AND OverLapEffectiveDate IS NULL THEN EffectiveDate
ELSE OverLapEffectiveDate
END AS OverLapEffectiveDate,
CASE
WHEN LeadValue = EffectiveDate AND OverLapEffectiveDate IS NULL THEN Enddate
ELSE OverLapEnddate
END AS OverLapEnddate
FROM cte_StepTwo
)
SELECT DISTINCT id,
EffectiveDate,
Enddate,
DENSE_RANK() OVER (ORDER BY ID,OverLapID) AS DateGroup
FROM cte_Result
ORDER BY id,EffectiveDate
Result:
This answer takes the approach of trying to identify records for which the running DateGroup counter should be incremented. Ultimately, we will assign a value of 1 to such records. With this assignment in hand, we can then simply take a cumulative sum to generate the DateGroup.
-- this CTE identifies all new ID records
WITH cte1 AS (
SELECT t.ID, t.EffectiveDate, t.EndDate
FROM
(
SELECT ID, EffectiveDate, EndDate,
ROW_NUMBER() OVER (PARTITION BY ID ORDER BY EffectiveDate) rn
FROM yourTable
) t
WHERE t.rn = 1
),
-- this CTE identifies all records whose both effective and end dates
-- do not fall within the range of the start ID record
cte2 AS (
SELECT t1.ID, t1.EffectiveDate, t1.EndDate
FROM yourTable t1
INNER JOIN cte1 t2
ON t1.ID = t2.ID AND
t1.EffectiveDate NOT BETWEEN t2.EffectiveDate AND t2.EndDate AND
t1.EndDate NOT BETWEEN t2.EffectiveDate AND t2.EndDate
),
-- this CTE returns the original table with a new column, amount, which
-- contains a value of 1 should that record cause the DateGroup to be
-- incremented by 1
cte3 AS (
SELECT t1.ID, t1.EffectiveDate, t1.EndDate,
CASE WHEN t2.ID IS NOT NULL OR t3.ID IS NOT NULL THEN 1 ELSE 0 END AS amount
FROM yourTable t1
LEFT JOIN cte1 t2
ON t1.ID = t2.ID AND
t1.EffectiveDate = t2.EffectiveDate AND
t1.EndDate = t2.EndDate
LEFT JOIN cte2 t3
ON t1.ID = t3.ID AND
t1.EffectiveDate = t3.EffectiveDate AND
t1.EndDate = t3.EndDate
)
-- finally, take a cumulative sum of the 'amount' column to generate the DateGroup
SELECT t1.ID,
t1.EffectiveDate,
t1.EndDate,
SUM(t2.amount) AS DateGroup
FROM cte3 t1
INNER JOIN cte3 t2
ON t1.ID >= t2.ID AND
t1.EffectiveDate >= t2.EffectiveDate
GROUP BY t1.id, t1.EffectiveDate, t1.EndDate;
Output:
Demo here:
Rextester
Data used:
CREATE TABLE yourTable (ID int, EffectiveDate datetime, EndDate datetime);
INSERT INTO yourTable
VALUES
(1, '2017-01-01 00:00:00.000', '2017-01-11 00:00:00.000'),
(1, '2017-01-02 00:00:00.000', '2017-01-05 00:00:00.000'),
(1, '2017-01-03 00:00:00.000', '2017-01-12 00:00:00.000'),
(1, '2017-01-06 00:00:00.000', '2017-01-09 00:00:00.000'),
(1, '2017-01-13 00:00:00.000', '2017-01-19 00:00:00.000'),
(2, '2017-02-01 00:00:00.000', '2017-02-11 00:00:00.000'),
(2, '2017-02-06 00:00:00.000', '2017-02-16 00:00:00.000');
What about this? It's simpler that other solutions posted:
WITH
CTE_GetFirstRecordForEachId AS
(
SELECT
id,
EffectiveDate,
Enddate,
rn = ROW_NUMBER() OVER (PARTITION BY id ORDER BY EffectiveDate, EndDate)
FROM
#DataTable
),
CTE_GetOutOfDateRange AS
(
SELECT
a.*,
OutOfDateRange =
CASE WHEN (b.EffectiveDate>=a.EffectiveDate AND b.EffectiveDate<=b.Enddate) OR (b.Enddate>=a.EffectiveDate AND b.Enddate<=b.Enddate)
THEN 0
ELSE 1
END
FROM
#DataTable a
INNER JOIN
CTE_GetFirstRecordForEachId b ON a.id = b.id AND b.rn=1
)
SELECT
id,
Effectivedate,
Enddate,
DateGroup = DENSE_RANK() OVER (ORDER BY id, OutOfDateRange)
FROM
CTE_GetOutOfDateRange
ORDER BY
id, Effectivedate, Enddate
Output:
id Effectivedate Enddate DateGroup
----------- ----------------------- ----------------------- --------------------
1 2017-01-01 00:00:00.000 2017-01-11 00:00:00.000 1
1 2017-01-02 00:00:00.000 2017-01-05 00:00:00.000 1
1 2017-01-03 00:00:00.000 2017-01-12 00:00:00.000 1
1 2017-01-06 00:00:00.000 2017-01-09 00:00:00.000 1
1 2017-01-13 00:00:00.000 2017-01-19 00:00:00.000 2
2 2017-02-01 00:00:00.000 2017-02-11 00:00:00.000 3
2 2017-02-06 00:00:00.000 2017-02-16 00:00:00.000 3
What about this (I am still testing it)
WITH Z AS
(SELECT * FROM (SELECT ID, [EffectiveDate], ENDDate
, LAG(ID) OVER (PARTITION BY ID ORDER BY EffectiveDate, ENDDate Desc) AS ID_Prec
, LAG(EffectiveDate) OVER (PARTITION BY ID ORDER BY EffectiveDate, ENDDate Desc) AS EffDate_Prec
, LAG(ENDDate) OVER (PARTITION BY ID ORDER BY EffectiveDate, ENDDate Desc) AS EndDate_Prec
, ROW_NUMBER() OVER (ORDER BY ID, EffectiveDate,ENDDate DESC) AS RN
, 1 AS DATEGROUP
FROM #DataTable ) C WHERE RN = 1
UNION ALL
SELECT A.ID, A.EffectiveDate, A.Enddate
, A.ID_Prec, A.EffDate_Prec
, A.EndDate_Prec
, A.RN
, CASE WHEN A.ID = A.ID_PREC AND (A.EffectiveDate <=A.EndDate_Prec /* OR A.EndDate>=A.EffDate_Prec*/) THEN Z.DATEGROUP
ELSE Z.DATEGROUP+1 END AS DATEGROUP
FROM (SELECT A.ID, A.EffectiveDate, A.ENDDate
, LAG(A.ID) OVER (PARTITION BY A.ID ORDER BY A.EffectiveDate, A.ENDDate Desc) AS ID_Prec
, LAG(A.EffectiveDate) OVER (PARTITION BY A.ID ORDER BY A.EffectiveDate, A.ENDDate Desc) AS EffDate_Prec
, LAG(A.ENDDate) OVER (PARTITION BY A.ID ORDER BY A.EffectiveDate, A.ENDDate Desc) AS EndDate_Prec
, ROW_NUMBER() OVER (ORDER BY A.ID, A.EffectiveDate,A.ENDDate DESC) AS RN
, 1 AS DATEGROUP
FROM #DataTable A) A
INNER JOIN Z ON A.RN -1= Z.RN
)
SELECT ID, EffectiveDate, Enddate, DATEGROUP FROM Z
Output:
ID EffectiveDate Enddate DATEGROUP
----------- ----------------------- ----------------------- -----------
1 2017-01-01 00:00:00.000 2017-01-11 00:00:00.000 1
1 2017-01-02 00:00:00.000 2017-01-05 00:00:00.000 1
1 2017-01-03 00:00:00.000 2017-01-12 00:00:00.000 1
1 2017-01-06 00:00:00.000 2017-01-09 00:00:00.000 1
1 2017-01-13 00:00:00.000 2017-01-19 00:00:00.000 2
2 2017-02-01 00:00:00.000 2017-02-11 00:00:00.000 3
2 2017-02-06 00:00:00.000 2017-02-16 00:00:00.000 3
guess you are missing some test scenario in your sample date.
;with CTE as
(
select *,ROW_NUMBER()over(order by id, effectivedate)rn
from #DataTable
)
,CTE1 AS
(
select *, 1 New_ID
from cte
where rn=1
union ALL
select c.id,c.effectivedate,c.enddate,c.rn
,case when c.effectivedate between c1.effectivedate
and c1.enddate
and c.id=c1.id then c1.New_ID
else c1.New_ID+1
END
from cte c
inner join cte1 c1
on c.rn=c1.rn+1
and c.rn>1 and c.rn<=7
)
select * from cte1
drop table #DataTable
this may help you. I posted here shortest and simplest version of tsql...
WITH CTE AS (
SELECT *,
ISNULL(LAG(EffectiveDate) OVER (PARTITION BY id ORDER BY id,EffectiveDate,Enddate),EffectiveDate) AS PreviousEffDate,
ISNULL(LAG(Enddate) OVER (PARTITION BY id ORDER BY id,EffectiveDate,Enddate),Enddate) AS PreviousEndDate
FROM #DataTable)
SELECT id,
EffectiveDate,
Enddate,
DENSE_RANK() OVER (ORDER BY id,CASE
WHEN EffectiveDate BETWEEN PreviousEffDate AND PreviousEndDate OR
Enddate BETWEEN PreviousEffDate AND PreviousEndDate OR
PreviousEffDate BETWEEN EffectiveDate AND Enddate OR
PreviousEndDate BETWEEN EffectiveDate AND Enddate
THEN 0
ELSE 1
END) AS DateGroup
FROM CTE
Result:
Have got this one from another forum; altered as per my requirement . looks simple and effective.
WITH C1 AS (
SELECT *,
CASE WHEN EffectiveDate <= MAX(ISnull(EndDate,'9999-12-31 00:00:00.000')) OVER(partition by id ORDER BY EffectiveDate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) THEN 0 ELSE 1 END AS isstart
FROM #DataTable
)
SELECT ID,EffectiveDate,EndDate,
SUM(isstart) OVER(ORDER BY ID ROWS UNBOUNDED PRECEDING) AS DG
FROM C1

Return previous record for holiday/weekends

A table has stock name, values and effective date for the stocks and there wont be any entry for holidays/weekends. I would like to return a previous date record if I pass the range of dates contains holidays/weekends.
E.G.
Table name: Stock
ID Name Value EffectiveDate
1 IBM 200.0000 2015-12-31 00:00:00.000
2 IBM 201.4500 2016-01-04 00:00:00.000
3 IBM 201.0000 2016-01-05 00:00:00.000
4 IBM 202.0000 2016-01-06 00:00:00.000
SELECT Name, Value, EffectiveDate FROM Stock WHERE Name = 'IBM' AND EffectiveDate >= '20151231' AND EffectiveDate <= '20160105'
The above query returns top 3 records but I would like to return the below results:
Name Value EffectiveDate ActualDate
IBM 200.0000 2015-12-31 2015-12-31
IBM 200.0000 2015-12-31 2016-01-01
IBM 200.0000 2015-12-31 2016-01-02
IBM 200.0000 2015-12-31 2016-01-03
IBM 201.4500 2016-01-04 2015-01-04
IBM 201.0000 2016-01-05 2015-01-05
01/01/2016 to 03/01/2016 are holidays/weekends. I've a function which returns the previous date if I pass the holiday/weekend date. Could anyone help to write the query in SQL Server to achieve the above?
DECLARE #table TABLE (
id int,
name varchar(20),
value decimal(10, 4),
EffectiveDate datetime
)
INSERT INTO #table
VALUES (1, 'IBM', 200.0000, '2015-12-31 00:00:00.000')
, (2, 'IBM', 201.4500, '2016-01-04 00:00:00.000')
, (3, 'IBM', 201.0000, '2016-01-05 00:00:00.000')
, (4, 'IBM', 202.0000, '2016-01-06 00:00:00.000')
DECLARE #MinDate datetime = '20151231',
#MaxDate datetime = '20160105';
WITH Dates AS (
SELECT #MinDate AS ActualDate
UNION ALL
SELECT DATEADD(day, 1, ActualDate)
FROM Dates
WHERE ActualDate < #MaxDate
)
SELECT [Table].name
,[Table].value
,[Table].EffectiveDate
,[Dates].ActualDate
FROM Dates
CROSS APPLY (
SELECT MAX(EffectiveDate) AS LastEffectiveDate
FROM #table AS [Table]
WHERE [Table].EffectiveDate <= Dates.ActualDate
) AS CA1
INNER JOIN #table AS [Table]
ON [Table].EffectiveDate = CA1.LastEffectiveDate
The following code will return an ActualDate as required for the previous date if the EffectiveDate is a Weekend, however to include logic for bank holidays you will need to define these in a table and then add more logic to the below.
SELECT Name, Value, EffectiveDate,
CASE WHEN datename(dw,EffectiveDate) = 'Saturday'
THEN DATEADD(DAY, -1, EffectiveDate)
WHEN datename(dw,EffectiveDate) = 'Sunday'
THEN DATEADD(DAY, -2, EffectiveDate)
END AS ActualDate
FROM Stock
WHERE Name = 'IBM'
AND EffectiveDate BETWEEN '20151231' AND '20160105'
This might not be a perfect solution but this does the trick.
DECLARE #table TABLE (
id int,
name varchar(20),
value decimal(10, 4),
EffectiveDate datetime
)
INSERT INTO #table
VALUES (1, 'IBM', 200.0000, '2015-12-31 00:00:00.000')
, (2, 'IBM', 201.4500, '2016-01-04 00:00:00.000')
, (3, 'IBM', 201.0000, '2016-01-05 00:00:00.000')
, (4, 'IBM', 202.0000, '2016-01-06 00:00:00.000')
DECLARE #MinDate datetime = '20151231',
#MaxDate datetime = '20160105';
SELECT
id,
(CASE
WHEN Data.Value IS NULL THEN (SELECT TOP (1)
value
FROM #table AS T1
WHERE T1.EffectiveDate < Data.FinalDate
ORDER BY FinalDate DESC)
ELSE Data.value
END),
FinalDate
FROM (SELECT
id,
name,
value,
(CASE
WHEN EffectiveDate IS NULL THEN Date
ELSE EffectiveDate
END) AS FinalDate
FROM #table T
FULL OUTER JOIN (SELECT TOP (DATEDIFF(DAY, #MinDate, #MaxDate) + 1)
Date = DATEADD(DAY, ROW_NUMBER() OVER (ORDER BY a.object_id) - 1, #MinDate)
FROM sys.all_objects a
CROSS JOIN sys.all_objects b) H
ON T.EffectiveDate = H.Date) Data
ORDER BY finaldate

Resources