MS SQL value change - sql-server

I have a SQL query that looks like this.
Select Timestamp, Value From [dbo].[nro_ReadRawDataByTimeFunction](
'SV/SVTP01.BONF0335-D1-W1-BL1',
'2017-11-01 00:00',
'2017-12-01 00:00')
GO
This will return
Timestamp | Value
1 2017-11-01 10:00 | 0
2 2017-11-01 11:00 | 0
3 2017-11-01 12:00 | 0
4 2017-11-01 13:00 | 1
5 2017-11-01 14:00 | 1
6 2017-11-01 15:00 | 0
7 2017-11-01 16:00 | 0
8 2017-11-01 17:00 | 0
9 2017-11-01 18:00 | 1
10 2017-11-01 19:00 | 0
The full list is alot larger, and I'm only interested in in results where value change from last result, so in this case row 1,4,6,9,10
I know how to do it if it's directly from a table but not when it's from a function

You can use this construction:
;WITH cte AS (
Select [Timestamp],
[Value]
From [dbo].[nro_ReadRawDataByTimeFunction](
'SV/SVTP01.BONF0335-D1-W1-BL1',
'2017-11-01 00:00',
'2017-12-01 00:00')
)
SELECT TOP 1 WITH TIES c.*
FROM cte c
OUTER APPLY (
SELECT TOP 1 *
FROM cte
WHERE [Value] != c.[Value] AND c.[Timestamp] < [Timestamp]
ORDER BY [Timestamp] ASC
) t
ORDER BY ROW_NUMBER() OVER (PARTITION BY t.[Timestamp] ORDER BY c.[Timestamp] ASC)
Output:
Timestamp Value
2017-11-01 19:00 0
2017-11-01 10:00 0
2017-11-01 13:00 1
2017-11-01 15:00 0
2017-11-01 18:00 1
Explanation:
SELECT *
FROM cte c
OUTER APPLY (
SELECT TOP 1 *
FROM cte
WHERE [Value] != c.[Value] AND c.[Timestamp] < [Timestamp]
ORDER BY [Timestamp] ASC
) t
Here we select data from main table and with the help of OUTER APPLY add to each row data with different value and greater timestamp.
ROW_NUMBER() OVER (PARTITION BY t.[Timestamp] ORDER BY c.[Timestamp] ASC)
Hope, you are familiar with ROW_NUMBER it
returns the sequential number of a row within a partition of a result set, starting at 1 for the first row in each partition.
So, if you run the above query and add this code to SELECT, you will get:
Timestamp Value Timestamp Value rn
2017-11-01 19:00 0 NULL NULL 1
2017-11-01 10:00 0 2017-11-01 13:00 1 1
2017-11-01 11:00 0 2017-11-01 13:00 1 2
2017-11-01 12:00 0 2017-11-01 13:00 1 3
2017-11-01 13:00 1 2017-11-01 15:00 0 1
2017-11-01 14:00 1 2017-11-01 15:00 0 2
2017-11-01 15:00 0 2017-11-01 18:00 1 1
2017-11-01 16:00 0 2017-11-01 18:00 1 2
2017-11-01 17:00 0 2017-11-01 18:00 1 3
2017-11-01 18:00 1 2017-11-01 19:00 0 1
As you can see, all rows you need are marked with 1. We coluld put this in other CTE or sub-query and use rn = 1 but we can do it all-in-one with the help of TOP 1 WITH TIES (MSDN link).

Since you are referring to SQL Server 2012 you can enjoy the new features:
;WITH Hist AS (
SELECT r,
LAG(v) OVER(ORDER BY d) PreviousValue,
v,
LEAD(v) OVER(ORDER BY d) NextValue ---Just to know that also this is available
FROM #t
)
SELECT *
FROM Hist h Inner JOIN #t t ON h.r = t.r
WHERE ISNULL(h.PreviousValue, -1) != t.v
#t contains your results

If this function is a table-valued function, you can just put it in the where or do it inside a function, in this second case as a parameter?
Select a.Timestamp, a.Value From [dbo].[nro_ReadRawDataByTimeFunction](
'SV/SVTP01.BONF0335-D1-W1-BL1',
'2017-11-01 00:00',
'2017-12-01 00:00') as a
WHERE a.Value = 1

What i ended with is as following
#DECLARE #startDate DATE,
#tagName nVarChar(200);
WITH CTE AS(
SELECT Timestamp As StopTime, Value As [OFF], LAG(Value,1) OVER (order by Timestamp) As [ON], Quality
FROM [dbo].[nrp_ReadRawDataByTimeFunction] (#TagName,#startDate, DATEADD(MONTH,2,#startDate))
Where Quality & 127 = 100
),
CalenderCTE AS(
SELECT [DATE] = DATEADD(Day,Number,#startDate)
FROM master..spt_values
WHERE Type='P'
AND DATEADD(day,Number,#startDate) < DATEADD(MONTH,1,#startDate)
)
SELECT * FROM CTE
FULL OUTER JOIN
CalenderCTE on CalenderCTE.Date = CAST(CTE.StopTime as [Date])
Where CTE.[OFF] != CTE.[ON}
This is just a tiny part of the query since it doing alot more which ain't included in the original post.
Thanks all for your input's, it help me on the way to the final result.

Related

Find customer lapse across variable subscription periods

Hoping someone has run across this issue previously and has a solution.
I am trying to find customers who lapse based off subscription periods rather than a single order date.
Lapse is defined by us as not making a purchase/renewal within 30 days of the end of their subscription. A customer can have multiple subscriptions simultaneously and subscriptions can vary in length.
I have a data set that includes customerIDs, Orders, the subscription start date, the subscription expire date, and that order’s rank in the customer’s order history, something like this:
CREATE TABLE #Subscriptions
(CustomerID INT,
Orderid INT,
SubscriptionStart DATE,
SubscriptionEnd DATE,
OrderNumber INT);
INSERT INTO #Subscriptions
VALUES(1, 111111, '2017-01-01', '2017-12-31', 1),
(1, 211111, '2018-01-01', '2019-12-31' ,2),
(1, 311121, '2018-10-01', '2018-10-02', 3),
(1, 451515, '2019-02-01', '2019-02-28', 4),
(2, 158797, '2018-07-01', '2018-07-31', 1),
(2, 287584, '2018-09-01', '2018-12-31', 2),
(2, 387452, '2019-01-01', '2019-01-31', 3),
(3, 187498, '2019-01-01', '2019-02-28', 1),
(3, 284990, '2019-02-01', '2019-02-28', 2),
(4, 184849, '2019-02-01', '2019-02-28', 1)
Within this data set, customer 2 would have lapsed on 2018-07-31. Since Customer 1 has a subscription of 2017-01-01 - 2017-12-31 and then one that starts 2018-01-01 and ends 2019-12-31 they cannot lapse within that time period even if other orders made by the customer would qualify.
I have attempt some of simple gap calculations using LEAD() and LAG(), however, I have had no success due to the variable lengths of the subscription period where a single subscription can span across multiple other orders. Eventually, we will use this to calculate monthly churn rate across approximately 5 million records.
You're overthinking this trying to use LEAD() and LAG(). All you need is a NOT EXISTS() function in the WHERE clause
In psuedocode:
SELECT...FROM...
WHERE {SubscriptionEnd is at least 30 days in the past}
AND NOT EXISTS(
{A row for the same Customer where the StartDate is 30 days or less after this EndDate}
)
This one looks to be a tricky one. You are correct about the problem with using the LEAD() and LAG() functions. It stems from customers being able to have multiple subscriptions of variable length. So we need to deal with that issue first. Let's begin with creating a single list of dates instead of having a list of SubscriptionStart and SubscriptionEnd.
SELECT
CustomerId,
OrderId,
1 AS Activity,
SubscriptionStart AS ActivityDate
FROM
#Subscriptions
UNION ALL
SELECT
CustomerId,
OrderId,
-1 AS Activity,
SubscriptionEnd AS ActivityDate
FROM
#Subscriptions
ORDER BY
CustomerId,
ActivityDate
CustomerId OrderId Activity ActivityDate
----------- ----------- ----------- ------------
1 111111 1 2017-01-01
1 111111 -1 2017-12-31
1 211111 1 2018-01-01
1 311121 1 2018-10-01
1 311121 -1 2018-10-02
1 451515 1 2019-02-01
1 451515 -1 2019-02-28
1 211111 -1 2019-12-31
2 158797 1 2018-07-01
2 158797 -1 2018-07-31
2 287584 1 2018-09-01
2 287584 -1 2018-12-31
2 387452 1 2019-01-01
2 387452 -1 2019-01-31
3 187498 1 2019-01-01
3 284990 1 2019-02-01
3 187498 -1 2019-02-28
3 284990 -1 2019-02-28
4 184849 1 2019-02-01
4 184849 -1 2019-02-28
Notice the additional Activity field. It is 1 for the SubscriptionStart and -1 for the SubscriptionEnd.
Using this new Activity field it is possible to find places where there might be a lapse in the customer's subscriptions. At the same time use LEAD() to find the NextDate.
;WITH SubscriptionList AS (
SELECT
CustomerId,
OrderId,
1 AS Activity,
SubscriptionStart AS ActivityDate
FROM
#Subscriptions
UNION ALL
SELECT
CustomerId,
OrderId,
-1 AS Activity,
SubscriptionEnd AS ActivityDate
FROM
#Subscriptions
)
SELECT
CustomerId,
OrderId,
Activity,
SUM(Activity) OVER(PARTITION BY CustomerId ORDER BY ActivityDate ROWS UNBOUNDED PRECEDING) as SubscriptionCount,
ActivityDate,
LEAD(ActivityDate, 1, GETDATE()) OVER(PARTITION BY CustomerId ORDER BY ActivityDate) AS NextDate,
DATEDIFF(d, ActivityDate, LEAD(ActivityDate, 1, GETDATE()) OVER(PARTITION BY CustomerId ORDER BY ActivityDate)) AS LapsedDays
FROM
SubscriptionList
ORDER BY
CustomerId,
ActivityDate
CustomerId OrderId Activity SubscriptionCount ActivityDate NextDate LapsedDays
----------- ----------- ----------- ----------------- ------------ ---------- -----------
1 111111 1 1 2017-01-01 2017-12-31 364
1 111111 -1 0 2017-12-31 2018-01-01 1
1 211111 1 1 2018-01-01 2018-10-01 273
1 311121 1 2 2018-10-01 2018-10-02 1
1 311121 -1 1 2018-10-02 2019-02-01 122
1 451515 1 2 2019-02-01 2019-02-28 27
1 451515 -1 1 2019-02-28 2019-12-31 306
1 211111 -1 0 2019-12-31 2019-02-28 -306
2 158797 1 1 2018-07-01 2018-07-31 30
2 158797 -1 0 2018-07-31 2018-09-01 32
2 287584 1 1 2018-09-01 2018-12-31 121
2 287584 -1 0 2018-12-31 2019-01-01 1
2 387452 1 1 2019-01-01 2019-01-31 30
2 387452 -1 0 2019-01-31 2019-02-28 28
3 187498 1 1 2019-01-01 2019-02-01 31
3 284990 1 2 2019-02-01 2019-02-28 27
3 187498 -1 1 2019-02-28 2019-02-28 0
3 284990 -1 0 2019-02-28 2019-02-28 0
4 184849 1 1 2019-02-01 2019-02-28 27
4 184849 -1 0 2019-02-28 2019-02-28 0
Adding running total on the Activity field will effectively give the number of active subscriptions. While it is greater than 0 a lapse is not possible. So focus in on the rows WHERE the SubscriptionCount is zero.
Using LEAD() get the NextDate. If there isn't a next date then default to today. If the SubscriptionCount is 0 then the NextDate has to be from a new subscription and the NextDate will be the date that the new subscription starts. Using DATEDIFF count the number of days between the SubscriptionEnd and the SubscriptionBegin if it is > 30 days then there was a lapse. Sounds like a good WHERE statement.
;WITH SubscriptionList AS (
SELECT
CustomerId,
OrderId,
1 AS Activity,
SubscriptionStart AS ActivityDate
FROM
#Subscriptions
UNION ALL
SELECT
CustomerId,
OrderId,
-1 AS Activity,
SubscriptionEnd AS ActivityDate
FROM
#Subscriptions
)
, FindLapse AS (
SELECT
CustomerId,
OrderId,
Activity,
SUM(Activity) OVER(PARTITION BY CustomerId ORDER BY ActivityDate ROWS UNBOUNDED PRECEDING) as SubscriptionCount,
ActivityDate,
LEAD(ActivityDate, 1, GETDATE()) OVER(PARTITION BY CustomerId ORDER BY ActivityDate) AS NextDate
FROM
SubscriptionList
)
SELECT
CustomerId,
OrderId,
Activity,
SubscriptionCount,
ActivityDate,
NextDate,
DATEDIFF(d, ActivityDate, NextDate) AS LapsedDays
FROM
FindLapse
WHERE
SubscriptionCount = 0
AND DATEDIFF(d, ActivityDate, NextDate) >= 30
CustomerId OrderId Activity SubscriptionCount ActivityDate NextDate LapsedDays
----------- ----------- ----------- ----------------- ------------ ---------- -----------
2 158797 -1 0 2018-07-31 2018-09-01 32
Looks like we have a winner!

Recursive first day of each month for current getdate

Using T-SQL, I want a new column that will show me the first day of each month, for the current year of getdate().
After that I need to count the rows on this specific date. Should I do it with CTE or a temp table?
If 2012+, you can use DateFromParts()
To Get a List of Dates
Select D = DateFromParts(Year(GetDate()),N,1)
From (values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10),(11),(12)) N(N)
Returns
D
2017-01-01
2017-02-01
2017-03-01
2017-04-01
2017-05-01
2017-06-01
2017-07-01
2017-08-01
2017-09-01
2017-10-01
2017-11-01
2017-12-01
Edit For Trans Count
To get Transactions (assuming by month). It becomes a small matter of a left join to created Dates
-- This is Just a Sample Table Variable for Demonstration.
-- Remove this and Use your actual Transaction Table
--------------------------------------------------------------
Declare #Transactions table (TransDate date,MoreFields int)
Insert Into #Transactions values
('2017-02-18',6)
,('2017-02-19',9)
,('2017-03-05',5)
Select TransMonth = A.MthBeg
,TransCount = count(B.TransDate)
From (
Select MthBeg = DateFromParts(Year(GetDate()),N,1)
,MthEnd = EOMonth(DateFromParts(Year(GetDate()),N,1))
From (values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10),(11),(12)) N(N)
) A
Left Join #Transactions B on TransDate between MthBeg and MthEnd
Group By A.MthBeg
Returns
TransMonth TransCount
2017-01-01 0
2017-02-01 2
2017-03-01 1
2017-04-01 0
2017-05-01 0
2017-06-01 0
2017-07-01 0
2017-08-01 0
2017-09-01 0
2017-10-01 0
2017-11-01 0
2017-12-01 0
For an adhoc table of months for a given year:
declare #year date = dateadd(year,datediff(year,0,getdate() ),0)
;with Months as (
select
MonthStart=dateadd(month,n,#year)
from (values(0),(1),(2),(3),(4),(5),(6),(7),(8),(9),(10),(11)) t(n)
)
select MonthStart
from Months
rextester demo: http://rextester.com/POKPM51023
returns:
+------------+
| MonthStart |
+------------+
| 2017-01-01 |
| 2017-02-01 |
| 2017-03-01 |
| 2017-04-01 |
| 2017-05-01 |
| 2017-06-01 |
| 2017-07-01 |
| 2017-08-01 |
| 2017-09-01 |
| 2017-10-01 |
| 2017-11-01 |
| 2017-12-01 |
+------------+
The first part: dateadd(year,datediff(year,0,getdate() ),0) adds the number of years since 1900-01-01 to the date 1900-01-01. So it will return the first date of the year. You can also swap year for other levels of truncation: year, quarter, month, day, hour, minute, second, et cetera.
The second part uses a common table expression and the table value constructor (values (...),(...)) to source numbers 0-11, which are added as months to the start of the year.
Not sure why you require recursive... But for first day of month you can try query like below:
Select Dateadd(day,1,eomonth(Dateadd(month, -1,getdate())))
declare #year date = dateadd(year,datediff(year,0,getdate() ),0)
;WITH months(MonthNumber) AS
(
SELECT 0
UNION ALL
SELECT MonthNumber+1
FROM months
WHERE MonthNumber < 11
)
select dateadd(month,MonthNumber,#year)
from months

MSSQL 2008 Merge Contiguous Dates With Groupings

I have searched high and low for weeks now trying to find a solution to my problem.
As far as I can ascertain, my SQL Server version (2008r2) is a limiting factor on this but, I am positive there is a solution out there.
My problem is as follows:
A have a table with potential contiguous dates in the form of Customer-Status-DateStart-DateEnd-EventID.
I need to merge contiguous dates by customer and status - the status field can shift up and down throughout a customers pathway.
Some example data is as follows:
DECLARE #Tbl TABLE([CustomerID] INT
,[Status] INT
,[DateStart] DATE
,[DateEnd] DATE
,[EventID] INT)
INSERT INTO #Tbl
VALUES (1,1,'20160101','20160104',1)
,(1,1,'20160104','20160108',3)
,(1,2,'20160108','20160110',4)
,(1,1,'20160110','20160113',7)
,(1,3,'20160113','20160113',9)
,(1,3,'20160113',NULL,10)
,(2,1,'20160101',NULL,2)
,(3,2,'20160109','20160110',5)
,(3,1,'20160110','20160112',6)
,(3,1,'20160112','20160114',8)
Desired output:
Customer | Status | DateStart | DateEnd
---------+--------+-----------+-----------
1 | 1 | 2016-01-01| 2016-01-08
1 | 2 | 2016-01-08| 2016-01-10
1 | 1 | 2016-01-10| 2016-01-13
1 | 3 | 2016-01-13| NULL
2 | 1 | 2016-01-01| NULL
3 | 2 | 2016-01-09| 2016-01-10
3 | 1 | 2016-01-10| 2016-01-14
Any ideas / code will be greatly received.
Thanks,
Dan
Try this
DECLARE #Tbl TABLE([CusomerID] INT
,[Status] INT
,[DateStart] DATE
,[DateEnd] DATE
,[EventID] INT)
INSERT INTO #Tbl
VALUES (1,1,'20160101','20160104',1)
,(1,1,'20160104','20160108',3)
,(1,2,'20160108','20160110',4)
,(1,1,'20160110','20160113',7)
,(1,3,'20160113','20160113',9)
,(1,3,'20160113',NULL,10)
,(2,1,'20160101',NULL,2)
,(3,2,'20160109','20160110',5)
,(3,1,'20160110','20160112',6)
,(3,1,'20160112','20160114',8)
;WITH CTE
AS
(
SELECT CusomerID ,
Status ,
DateStart ,
COALESCE(DateEnd, '9999-01-01') AS DateEnd,
EventID,
ROW_NUMBER() OVER (ORDER BY CusomerID, EventID) RowId,
ROW_NUMBER() OVER (PARTITION BY CusomerID, Status ORDER BY EventID) StatusRowId FROM #Tbl
)
SELECT
A.CusomerID ,
A.Status ,
A.DateStart ,
CASE WHEN A.DateEnd = '9999-01-01' THEN NULL
ELSE A.DateEnd END AS DateEnd
FROM
(
SELECT
CTE.CusomerID,
CTE.Status,
MIN(CTE.DateStart) AS DateStart,
MAX(CTE.DateEnd) AS DateEnd
FROM
CTE
GROUP BY
CTE.CusomerID,
CTE.Status,
CTE.StatusRowId -CTE.RowId
) A
ORDER BY A.CusomerID, A.DateStart
Output
CusomerID Status DateStart DateEnd
----------- ----------- ---------- ----------
1 1 2016-01-01 2016-01-08
1 2 2016-01-08 2016-01-10
1 1 2016-01-10 2016-01-13
1 3 2016-01-13 NULL
2 1 2016-01-01 NULL
3 2 2016-01-09 2016-01-10
3 1 2016-01-10 2016-01-14

How to get rows per day (Fill gaps when no records)

I have the following results
Date | EmployeeID
2015-11-18 | 1
2015-11-18 | 1
2015-11-18 | 1
2015-11-19 | 1
2015-11-19 | 1
2015-11-20 | 1
2015-11-20 | 1
2015-11-20 | 1
2015-11-25 | 1
But given a range of dates (2015-11-15 - 2015-11-30) I want to display something like this
Date | NbEmployees
2015-11-15 | 0
2015-11-16 | 0
2015-11-17 | 0
2015-11-18 | 3
2015-11-19 | 2
2015-11-20 | 3
2015-11-21 | 0
2015-11-22 | 0
2015-11-23 | 0
2015-11-24 | 0
2015-11-25 | 1
2015-11-26 | 0
2015-11-27 | 0
2015-11-28 | 0
2015-11-29 | 0
2015-11-30 | 0
I've using this approach by I only get the values from the table with data
DECLARE #StartDate DATE = '2015-11-15 00:00:00', #EndDate DATE = '2015-11-30 23:59:00'
DECLARE #CurrentDate DATE = #StartDate
DECLARE #DateRange TABLE (CurrentDate DATETIME)
WHILE(#CurrentDate <= #EndDate)
BEGIN
INSERT INTO #DateRange VALUES(#CurrentDate)
SET #CurrentDate = DATEADD(DAY, 1, #CurrentDate)
END
SELECT r.CurrentDate, COUNT(EmployeeID)
FROM Employee e
RIGHT JOIN #DateRange r ON e.HireDate = r.Date
Results:
Date | NbEmployees
2015-11-18 | 3
2015-11-19 | 2
2015-11-20 | 3
2015-11-25 | 1
Try it like this
DECLARE #tbl TABLE([Date] DATE, EmployeeID INT);
INSERT INTO #tbl VALUES
('2015-11-18',1)
,('2015-11-18',1)
,('2015-11-18',1)
,('2015-11-19',1)
,('2015-11-19',1)
,('2015-11-20',1)
,('2015-11-20',1)
,('2015-11-20',1)
,('2015-11-25',1);
DECLARE #StartDate DATE = '2015-11-15 00:00:00', #EndDate DATE = '2015-11-30 23:59:00'
DECLARE #CurrentDate DATE = #StartDate
DECLARE #DateRange TABLE (CurrentDate DATETIME)
WHILE(#CurrentDate <= #EndDate)
BEGIN
INSERT INTO #DateRange VALUES(#CurrentDate)
SET #CurrentDate = DATEADD(DAY, 1, #CurrentDate)
END
SELECT CurrentDate,ISNULL(NbEmployees,0) AS NbEmployees
FROM #DateRange
LEFT JOIN
(
SELECT COUNT(tbl.EmployeeID) AS NbEmployees
,tbl.[Date] AS Date
FROM #tbl AS tbl
GROUP BY tbl.[Date]
) AS grouped ON CurrentDate=grouped.[Date]
The result
2015-11-15 00:00:00.000 0
2015-11-16 00:00:00.000 0
2015-11-17 00:00:00.000 0
2015-11-18 00:00:00.000 3
2015-11-19 00:00:00.000 2
2015-11-20 00:00:00.000 3
2015-11-21 00:00:00.000 0
2015-11-22 00:00:00.000 0
2015-11-23 00:00:00.000 0
2015-11-24 00:00:00.000 0
2015-11-25 00:00:00.000 1
2015-11-26 00:00:00.000 0
2015-11-27 00:00:00.000 0
2015-11-28 00:00:00.000 0
2015-11-29 00:00:00.000 0
2015-11-30 00:00:00.000 0
With something like this you could create your date-tally on the fly (avoid loops!!!)
DECLARE #StartDate DATE = '2015-11-15 00:00:00', #EndDate DATE = '2015-11-30 23:59:00';
WITH DayCount(Nmbr) AS
(
SELECT TOP (DATEDIFF(DAY,#StartDate,#EndDate)+1) ROW_NUMBER() OVER(ORDER BY (SELECT NULL))-1 FROM sys.objects
)
,RunningDates(CurrentDate) AS
(
SELECT DATEADD(DAY,Nmbr,#StartDate) FROM DayCount
)
SELECT * FROM RunningDates
This is bound to the max count of sys.objects... You'll find a lot of examples how to create running numbers on the fly or how to create a date-tally table (for example this: https://stackoverflow.com/a/32474751/5089204)
You don't need to create and maintain a list_of_dates table. You could just outter join to something like this:
For SqlServer:
SELECT
DATEADD(DAY,number,'20010101') [Date]
FROM
master..spt_values
WHERE
type = 'P'
AND DATEADD(DAY,number,'20010101') <= '20010104'
Or for Oracle, this:
select
rownum - 1 + to_date('01-JAN-2001', 'dd-mon-yyyy') dates
from
all_objects
where
rownum < to_date('01-FEB-2001', 'dd-mon-yyyy') - to_date('01-JAN-2001', 'dd-mon-yyyy') + 2
The output from this query looks like this:
DATES
---------
01-JAN-01
02-JAN-01
03-JAN-01
04-JAN-01
05-JAN-01
06-JAN-01
07-JAN-01
08-JAN-01
09-JAN-01
10-JAN-01
11-JAN-01
12-JAN-01
13-JAN-01
14-JAN-01
15-JAN-01
16-JAN-01
17-JAN-01
18-JAN-01
19-JAN-01
20-JAN-01
21-JAN-01
22-JAN-01
23-JAN-01
24-JAN-01
25-JAN-01
26-JAN-01
27-JAN-01
28-JAN-01
29-JAN-01
30-JAN-01
31-JAN-01
01-FEB-01

tsql grouping consecutive numbers in range

Is there any way to group these temperature measurement in a range with consecutive group?
I want to get group, time difference and count in between 0-7 and 8-12 and more than 12
Date Heat
01/01/2012 12:00 8
01/01/2012 12:03 9
01/01/2012 12:06 5
01/01/2012 12:09 3
01/01/2012 12:12 6
01/01/2012 12:15 7
01/01/2012 12:18 1
01/01/2012 12:21 12
01/01/2012 12:24 28
01/01/2012 12:27 25
01/01/2012 12:30 20
01/01/2012 12:33 20
01/01/2012 12:36 20
01/01/2012 12:39 12
01/01/2012 12:42 6
01/01/2012 12:45 3
01/01/2012 12:48 5
01/01/2012 12:51 7
01/01/2012 12:54 11
01/01/2012 12:57 12
01/01/2012 13:00 6
The result should be:
0-7 (01/01/2012 12:06-01/01/2012 12:18) 5
/* Rows of dataset:
01/01/2012 12:06 5
01/01/2012 12:09 3
01/01/2012 12:12 6
01/01/2012 12:15 7
01/01/2012 12:18 1
*/
0-7 (01/01/2012 12:42-01/01/2012 12:51) 5
/* Rows of dataset:
01/01/2012 12:42 6
01/01/2012 12:45 3
01/01/2012 12:48 5
01/01/2012 12:51 7
*/
8-12 (01/01/2012 12:00-01/01/2012 12:03) 2
/* Rows of dataset:
01/01/2012 12:00 8
01/01/2012 12:03 9
*/
more then 12 (01/01/2012 12:24-01/01/2012 12:36) 5
/* Rows of dataset:
01/01/2012 12:24 28
01/01/2012 12:27 25
01/01/2012 12:30 20
01/01/2012 12:33 20
01/01/2012 12:36 20
*/
8-12 (01/01/2012 12:21) 1
/* Rows of dataset:
01/01/2012 12:21 12 */
Note: because the processing order for RANK/DENSE_RANK is PARTITION BY and then ORDER BY, these functions are not useful in this case. Maybe, at some point in time, MS will introduce a supplementary syntax thus:
[DENSE_]RANK() OVER(ORDER BY fields PARTITION BY fields) so ORDER BY will be processed first and then PARTITION BY.
1) First solution (SQL2005+)
DECLARE #TestData TABLE
(
Dt SMALLDATETIME PRIMARY KEY,
Heat TINYINT NOT NULL
);
INSERT #TestData(Dt, Heat)
VALUES
SELECT '2012-01-01T12:00:00', 8 UNION ALL SELECT '2012-01-01T12:03:00', 9 UNION ALL SELECT '2012-01-01T12:06:00', 5
UNION ALL SELECT '2012-01-01T12:09:00', 3 UNION ALL SELECT '2012-01-01T12:12:00', 6 UNION ALL SELECT '2012-01-01T12:15:00', 7
UNION ALL SELECT '2012-01-01T12:18:00', 1 UNION ALL SELECT '2012-01-01T12:21:00', 12 UNION ALL SELECT '2012-01-01T12:24:00', 28
UNION ALL SELECT '2012-01-01T12:27:00', 25 UNION ALL SELECT '2012-01-01T12:30:00', 20 UNION ALL SELECT '2012-01-01T12:33:00', 20
UNION ALL SELECT '2012-01-01T12:36:00', 20 UNION ALL SELECT '2012-01-01T12:39:00', 12 UNION ALL SELECT '2012-01-01T12:42:00', 6
UNION ALL SELECT '2012-01-01T12:45:00', 3 UNION ALL SELECT '2012-01-01T12:48:00', 5 UNION ALL SELECT '2012-01-01T12:51:00', 7
UNION ALL SELECT '2012-01-01T12:54:00', 11 UNION ALL SELECT '2012-01-01T12:57:00', 12 UNION ALL SELECT '2012-01-01 13:00:00', 6;
SET STATISTICS IO ON;
WITH CteSource
AS
(
SELECT a.*,
CASE
WHEN a.Heat >= 0 AND a.Heat <= 7 THEN 1
WHEN a.Heat >= 8 AND a.Heat <= 12 THEN 2
WHEN a.Heat > 12 THEN 3
END AS Grp,
ROW_NUMBER() OVER(ORDER BY a.Dt) AS RowNum
FROM #TestData a
), CteRecursive
AS
(
SELECT s.RowNum,
s.Dt,
s.Heat,
s.Grp,
1 AS DENSE_RANK_OVER_ORDERBY_PARTITIONBY
FROM CteSource s
WHERE s.RowNum = 1
UNION ALL
SELECT crt.RowNum,
crt.Dt,
crt.Heat,
crt.Grp,
CASE
WHEN crt.Grp = prev.Grp THEN prev.DENSE_RANK_OVER_ORDERBY_PARTITIONBY
ELSE prev.DENSE_RANK_OVER_ORDERBY_PARTITIONBY + 1
END
FROM CteSource crt
INNER JOIN CteRecursive prev ON crt.RowNum = prev.RowNum + 1
)
SELECT r.DENSE_RANK_OVER_ORDERBY_PARTITIONBY,
MAX(r.Grp) AS Grp,
COUNT(*) AS Cnt,
MIN(r.Dt) AS MinDt,
MAX(r.Dt) AS MaxDt
FROM CteRecursive r
GROUP BY r.DENSE_RANK_OVER_ORDERBY_PARTITIONBY;
Results:
DENSE_RANK_OVER_ORDERBY_PARTITIONBY Grp Cnt MinDt MaxDt
----------------------------------- ----------- ----------- ----------------------- -----------------------
1 2 2 2012-01-01 12:00:00 2012-01-01 12:03:00
2 1 5 2012-01-01 12:06:00 2012-01-01 12:18:00
3 2 1 2012-01-01 12:21:00 2012-01-01 12:21:00
4 3 5 2012-01-01 12:24:00 2012-01-01 12:36:00
5 2 1 2012-01-01 12:39:00 2012-01-01 12:39:00
6 1 4 2012-01-01 12:42:00 2012-01-01 12:51:00
7 2 2 2012-01-01 12:54:00 2012-01-01 12:57:00
8 1 1 2012-01-01 13:00:00 2012-01-01 13:00:00
2) Second solution (SQL2012; better performance)
SELECT d.DENSE_RANK_OVER_ORDERBY_PARTITIONBY,
MAX(d.Grp) AS Grp,
MIN(d.Dt) AS MinDt,
MAX(d.Dt) AS MaxDt
FROM
(
SELECT c.*,
1+SUM(c.IsNewGroup) OVER(ORDER BY c.Dt) AS DENSE_RANK_OVER_ORDERBY_PARTITIONBY
FROM
(
SELECT b.*,
CASE
WHEN LAG(b.Grp) OVER(ORDER BY b.Dt) <> b.Grp THEN 1
ELSE 0
END
AS IsNewGroup
FROM
(
SELECT a.*,
CASE
WHEN a.Heat >= 0 AND a.Heat <= 7 THEN 1
WHEN a.Heat >= 8 AND a.Heat <= 12 THEN 2
WHEN a.Heat > 12 THEN 3
END AS Grp
FROM #TestData a
) b
) c
) d
GROUP BY d.DENSE_RANK_OVER_ORDERBY_PARTITIONBY;
Here's an alternative solution for SQL Server 2005 or newer version:
WITH auxiliary (HeatID, MinHeat, MaxHeat, HeatDescr) AS (
SELECT 1, 0 , 7 , '0-7' UNION ALL
SELECT 2, 8 , 12 , '8-12' UNION ALL
SELECT 3, 13, NULL, 'more than 12'
),
datagrouped AS (
SELECT
d.*,
a.HeatDescr,
grp = ROW_NUMBER() OVER ( ORDER BY d.Date)
- ROW_NUMBER() OVER (PARTITION BY a.HeatID ORDER BY d.Date)
FROM data d
INNER JOIN auxiliary a
ON d.Heat BETWEEN a.MinHeat AND ISNULL(a.MaxHeat, 0x7fffffff)
)
SELECT
HeatDescr,
DateFrom = MIN(Date),
DateTo = MAX(Date),
ItemCount = COUNT(*)
FROM datagrouped
GROUP BY
HeatDescr, grp
ORDER BY
MIN(Date)
Where data is defined as follows:
CREATE TABLE data (Date datetime, Heat int);
INSERT INTO data (Date, Heat)
SELECT '01/01/2012 12:00', 8 UNION ALL
SELECT '01/01/2012 12:03', 9 UNION ALL
SELECT '01/01/2012 12:06', 5 UNION ALL
SELECT '01/01/2012 12:09', 3 UNION ALL
SELECT '01/01/2012 12:12', 6 UNION ALL
SELECT '01/01/2012 12:15', 7 UNION ALL
SELECT '01/01/2012 12:18', 1 UNION ALL
SELECT '01/01/2012 12:21', 12 UNION ALL
SELECT '01/01/2012 12:24', 28 UNION ALL
SELECT '01/01/2012 12:27', 25 UNION ALL
SELECT '01/01/2012 12:30', 20 UNION ALL
SELECT '01/01/2012 12:33', 20 UNION ALL
SELECT '01/01/2012 12:36', 20 UNION ALL
SELECT '01/01/2012 12:39', 12 UNION ALL
SELECT '01/01/2012 12:42', 6 UNION ALL
SELECT '01/01/2012 12:45', 3 UNION ALL
SELECT '01/01/2012 12:48', 5 UNION ALL
SELECT '01/01/2012 12:51', 7 UNION ALL
SELECT '01/01/2012 12:54', 11 UNION ALL
SELECT '01/01/2012 12:57', 12 UNION ALL
SELECT '01/01/2012 13:00', 6;
For the above sample, the query gives the following output:
HeatDescr DateFrom DateTo ItemCount
------------ ------------------- ------------------- ---------
8-12 2012-01-01 12:00:00 2012-01-01 12:03:00 2
0-7 2012-01-01 12:06:00 2012-01-01 12:18:00 5
8-12 2012-01-01 12:21:00 2012-01-01 12:21:00 1
more than 12 2012-01-01 12:24:00 2012-01-01 12:36:00 5
8-12 2012-01-01 12:39:00 2012-01-01 12:39:00 1
0-7 2012-01-01 12:42:00 2012-01-01 12:51:00 4
8-12 2012-01-01 12:54:00 2012-01-01 12:57:00 2
0-7 2012-01-01 13:00:00 2012-01-01 13:00:00 1
You should reach your goal using RANK()
http://msdn.microsoft.com/en-us/library/ms176102.aspx
Something like
SELECT date, heat, RANK() OVER (PARTITION BY heat ORDER BY date DESC) AS Rank
FROM tbl
Then you can GROUP it after, or make more sub selects and unions them, depending what you have as result.

Resources