Group a set of chain values - sql-server

I have the following table in SQL Server 2008
DECLARE #UnitConvert table
(
ID int identity(1,1),
ConvertUnitOne nvarchar(50),
ConvertUnitTwo nvarchar(50)
)
INSERT INTO #UnitConvert
SELECT 100,500
UNION ALL SELECT 200,100
UNION ALL SELECT 500,300
UNION ALL SELECT 2000,1000
UNION ALL SELECT 3000,9000
UNION ALL SELECT 2000,700
UNION ALL SELECT 820,3000
SELECT * FROM #UnitConvert
Here value in UnitConvertOne is equivalent to UnitConvertTwo
So it has a chain of value linking
So i want to display the result like
Group unit
1 100
200
300
500
2 700
1000
2000
3 820
3000
9000
Group value will be autoincrement based on the number of groups can be created
Unit value can be sorted from small to large value

Thanks to Eugene Elutin from sqlservercentral.com
DECLARE #UnitConvert table
(
ID int identity(1,1),
ConvertUnitOne nvarchar(50),
ConvertUnitTwo nvarchar(50)
)
INSERT INTO #UnitConvert
SELECT 100,500
UNION ALL SELECT 200,100
UNION ALL SELECT 500,300
UNION ALL SELECT 2000,1000
UNION ALL SELECT 3000,9000
UNION ALL SELECT 2000,700
UNION ALL SELECT 820,3000
;WITH cteUP AS
(
SELECT ConvertUnitTwo AS childUP, ConvertUnitOne AS unitUP, 0 AS Lvl
FROM #UnitConvert
UNION ALL
SELECT cte.childUP, u.ConvertUnitOne AS unitUP, Lvl = Lvl + 1
FROM #UnitConvert u
INNER JOIN cteUP cte ON cte.unitUP = u.ConvertUnitTwo
)
--select * from cteUP
SELECT c.ConvertUnit
,DENSE_RANK() OVER (ORDER BY ISNULL(cm.unitUP, c.ConvertUnit)) AS GrpNO
FROM (SELECT ConvertUnitOne AS ConvertUnit FROM #UnitConvert
UNION
SELECT ConvertUnitTwo AS ConvertUnit FROM #UnitConvert) c
OUTER APPLY (SELECT TOP 1 unitUP FROM cteUP m WHERE
m.childUP = c.ConvertUnit ORDER BY Lvl DESC) cm

Related

SQL Server 2016 - Running Count and Sum for a 24 hours sliding window

I am trying to count orders over a 24 hours sliding window. I have a 'detetime' field and I'm calculating the 24 hours window aggregating at the minute level. It should re-start counting every time the order time between two consecutive orders is over 1440 minutes or when the running time of consecutive orders is over 1440 minutes.
Environment is SQL server 2016, I can create Temp tables but no physical tables and no memory-optimized objects (I guess anything working on 2012+ should work).
I tried an inner join on the same table and tested with recursive CTEs, ROW_NUMBER etc. but the issue is that there is never a set number of rows for the 24 hours window and the base time from which to calculate the start of the period changes. The only constant I have is the 24 hours time span.
Tried the following:
https://www.red-gate.com/simple-talk/sql/t-sql-programming/calculating-values-within-a-rolling-window-in-transact-sql/
Calculate running total / running balance
Cross apply seems to be working for the most part but in some instances - when calculating the running 24 hours window - it isn't. I tried changing the datetime conditions in the WHERE clause in many ways but I still can't figure out how to get it to work correctly.
I thought about creating a reset event at the 24 hours mark as showed here https://blog.jooq.org/2015/05/12/use-this-neat-window-function-trick-to-calculate-time-differences-in-a-time-series/ but at this point my brain is melting and I can't even get the logic straight.
DROP TABLE IF EXISTS #Data
CREATE TABLE #Data
(
START_TIME DATETIME
,ORDER_ID NUMERIC(18,0)
,PROD_ID NUMERIC(18,0)
,ACC_ID NUMERIC(18,0)
);
INSERT INTO #Data
SELECT '2018-06-22 11:00:00.000', 198151606, 58666, 1601554883
UNION ALL SELECT '2018-07-09 10:15:00.000',2008873061,58666,1601554883
UNION ALL SELECT '2018-07-09 12:33:00.000',2009269222,58666,1601554883
UNION ALL SELECT '2018-07-10 08:29:00.000',2010735393,58666,1601554883
UNION ALL SELECT '2018-07-10 10:57:00.000',2010735584,58666,1601554883
UNION ALL SELECT '2018-06-27 23:53:00.000',1991467555,58666,2300231016
UNION ALL SELECT '2018-06-28 00:44:00.000',1991583916,58666,2300231016
UNION ALL SELECT '2018-07-04 04:15:00.000',2001154497,58666,2300231016
UNION ALL SELECT '2018-07-04 15:44:00.000',2001154818,58666,2300231016
UNION ALL SELECT '2018-07-04 21:30:00.000',2002057919,58666,2300231016
UNION ALL SELECT '2018-07-05 02:09:00.000',1200205808,58666,2300231016
UNION ALL SELECT '2018-07-05 04:15:00.000',2200205814,58666,2300231016
UNION ALL SELECT '2018-07-05 17:23:00.000',3200370070,58666,2300231016
UNION ALL SELECT '2018-07-05 18:07:00.000',4200370093,58666,2300231016
UNION ALL SELECT '2018-07-06 20:15:00.000',5200571962,58666,2300231016
UNION ALL SELECT '2018-07-07 07:45:00.000',6200571987,58666,2300231016
UNION ALL SELECT '2018-07-07 12:13:00.000',7200571993,58666,2300231016
UNION ALL SELECT '2018-07-09 18:29:00.000',8200939551,58666,2300231016
UNION ALL SELECT '2018-07-09 21:05:00.000',9200939552,58666,2300231016
UNION ALL SELECT '2018-07-11 21:31:00.000',2011107311,58666,2300231016
UNION ALL SELECT '2018-06-27 18:23:00.000',1991016382,58669,2300231016
UNION ALL SELECT '2018-06-27 19:07:00.000',1991181363,58669,2300231016
UNION ALL SELECT '2018-06-27 19:28:00.000',1991181374,58669,2300231016
UNION ALL SELECT '2018-06-28 01:44:00.000',1991583925,58669,2300231016
UNION ALL SELECT '2018-06-28 02:19:00.000',1991583946,58669,2300231016
UNION ALL SELECT '2018-07-03 10:15:00.000',1999231747,58669,2300231016
UNION ALL SELECT '2018-07-03 10:45:00.000',2000293678,58669,2300231016
UNION ALL SELECT '2018-07-03 14:22:00.000',200029380,58669,2300231016
UNION ALL SELECT '2018-07-04 19:45:00.000',2002057789,58669,2300231016
UNION ALL SELECT '2018-07-04 21:00:00.000',1200205781,58669,2300231016
UNION ALL SELECT '2018-07-05 15:12:00.000',2200254833,58669,2300231016
UNION ALL SELECT '2018-07-05 17:52:00.000',3200370071,58669,2300231016
UNION ALL SELECT '2018-07-09 22:30:00.000',4200939553,58669,2300231016
UNION ALL SELECT '2018-07-09 23:23:00.000',5200939566,58669,2300231016
UNION ALL SELECT '2018-07-30 17:45:00.000',6204364207,58666,2300231016
UNION ALL SELECT '2018-07-30 23:30:00.000',7204364211,58666,2300231016
;WITH TimeBetween AS(
SELECT
ACC_ID
,PROD_ID
,ORDER_ID
,START_TIME
,TIME_BETWEEN_ORDERS = COALESCE(CASE WHEN DATEDIFF(MINUTE, LAG(START_TIME) OVER(PARTITION BY ACC_ID, PROD_ID
ORDER BY START_TIME), START_TIME) >= 1440
THEN 0
ELSE DATEDIFF(MINUTE, LAG(START_TIME) OVER(PARTITION BY ACC_ID, PROD_ID
ORDER BY START_TIME), START_TIME)
END, 0)
FROM #Data
)
SELECT
TimeBetween.ACC_ID
,TimeBetween.PROD_ID
,TimeBetween.ORDER_ID
,TimeBetween.START_TIME
,TIME_BETWEEN_ORDERS
--Not working correctly, repeats the previous time at the end of the window when it should be 0.
,RUNNING_TIME_BETWEEN_ORDERS = SUM(TIME_BETWEEN_ORDERS) OVER(PARTITION BY ACC_ID, PROD_ID ORDER BY START_TIME)
,Running24h.*
FROM TimeBetween
CROSS APPLY(SELECT TOP 1
RUNNING_COUNT_24h = COUNT(*) OVER() --Count admin units within the time window in the WHERE clause
--Check what APPLY is returning for running time
,RUNNING_TIME_BETWEEN_ORDERS_Apply = DATEDIFF(MINUTE, StageBaseApply.START_TIME, TimeBetween.START_TIME)
--Check what APPLY is using as base event anchor for the calculation
,START_TIME_Apply = StageBaseApply.START_TIME
FROM #Data AS StageBaseApply
WHERE
StageBaseApply.ACC_ID = TimeBetween.ACC_ID
AND StageBaseApply.PROD_ID = TimeBetween.PROD_ID
AND (StageBaseApply.START_TIME > DATEADD(MINUTE, -1440, TimeBetween.START_TIME)
AND StageBaseApply.START_TIME <= TimeBetween.START_TIME
)
ORDER BY StageBaseApply.START_TIME
) AS Running24h
ORDER BY ACC_ID,PROD_ID, START_TIME
When the running time between orders is over 24 hours the running count should re-start from 1.
Currently it repeats the last value and the time it's using for the calculation seems to be off.
Current result from CROSS APPLY with notes on where it's not working and what it should be for what I'm trying to achieve
First create a Numbers table with at least as many rows as the minutes in the maximum time range you will ever be dealing with
CREATE TABLE dbo.Numbers(Number INT PRIMARY KEY);
WITH E1(N) AS
(
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
) -- 1*10^1 or 10 rows
, E2(N) AS (SELECT 1 FROM E1 a, E1 b) -- 1*10^2 or 100 rows
, E4(N) AS (SELECT 1 FROM E2 a, E2 b) -- 1*10^4 or 10,000 rows
, E8(N) AS (SELECT 1 FROM E4 a, E4 b) -- 1*10^8 or 100,000,000 rows
, Nums AS (SELECT TOP (10000000) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS N FROM E8)
INSERT INTO dbo.Numbers
SELECT N
FROM Nums
And then you should be able to use something like this (I'm assuming that all start times are exact minutes and there are no duplicates per ACC_ID,PROD_ID,START_TIME as shown in your example data, if there are you will need to pre-aggregate at the minute level before participating in the left join)
WITH G
AS (SELECT ACC_ID,
PROD_ID,
MIN = MIN(START_TIME),
MAX = MAX(START_TIME),
Range = DATEDIFF(MINUTE, MIN(START_TIME), MAX(START_TIME))
FROM #Data
GROUP BY ACC_ID,
PROD_ID),
E
AS (SELECT *
FROM G
JOIN dbo.Numbers N
ON N.Number <= Range + 1),
R AS (SELECT E.ACC_ID,
E.PROD_ID,
D.START_TIME,
Cnt = COUNT(D.START_TIME) OVER (PARTITION BY E.ACC_ID, E.PROD_ID
ORDER BY DATEADD(MINUTE, NUMBER-1, MIN)
ROWS BETWEEN 1439 PRECEDING AND CURRENT ROW)
FROM E
LEFT JOIN #Data D
ON D.ACC_ID = E.ACC_ID
AND D.PROD_ID = E.PROD_ID
AND D.START_TIME = DATEADD(MINUTE, NUMBER-1, MIN) )
SELECT *
FROM R
WHERE START_TIME IS NOT NULL
ORDER BY ACC_ID,
PROD_ID,
START_TIME
After finding this post on how to reset a running sum, I think I may have finally been able to crack this nut. Not sure about how well it scales but it is working.
I also added a new column for order quantity since it may be useful sometimes to track the orders running total during the same time window.
The sliding time window can be set in this CASE statement:
CASE WHEN RunningOrders.LAG_LESS_THAN_24h + NextEventLag.NEXT_ORDER_TIME_LAG >= 1440 THEN 0 ELSE RunningOrders.LAG_LESS_THAN_24h + NextEventLag.NEXT_ORDER_TIME_LAG
END
DROP TABLE IF EXISTS #Data
CREATE TABLE #Data
(
ORDER_TIME DATETIME
,ORDER_ID NUMERIC(18,0)
,PROD_ID NUMERIC(18,0)
,ACCOUNT_ID NUMERIC(18,0)
,ORDER_QUANTITY INT
);
INSERT INTO #Data
SELECT '2018-06-22 11:00:00.000', 1981516061, 158666, 1601554883,5
UNION ALL SELECT '2018-07-09 10:15:00.000',2008873062,158666,1601554883,3
UNION ALL SELECT '2018-07-09 12:33:00.000',2009269223,158666,1601554883,2
UNION ALL SELECT '2018-07-10 08:29:00.000',2010735394,158666,1601554883,4
UNION ALL SELECT '2018-07-10 10:57:00.000',2010735584,158666,1601554883,7
UNION ALL SELECT '2018-06-27 23:53:00.000',1991467553,158666,2300231016,6
UNION ALL SELECT '2018-06-28 00:44:00.000',1991583913,158666,2300231016,6
UNION ALL SELECT '2018-07-04 04:15:00.000',2001154492,158666,2300231016,4
UNION ALL SELECT '2018-07-04 15:44:00.000',2001154814,158666,2300231016,5
UNION ALL SELECT '2018-07-04 21:30:00.000',2002057915,158666,2300231016,4
UNION ALL SELECT '2018-07-05 02:09:00.000',2002058086,158666,2300231016,4
UNION ALL SELECT '2018-07-05 04:15:00.000',2002058147,158666,2300231016,3
UNION ALL SELECT '2018-07-05 17:23:00.000',2003700706,158666,2300231016,2
UNION ALL SELECT '2018-07-05 18:07:00.000',2003700938,158666,2300231016,1
UNION ALL SELECT '2018-07-06 20:15:00.000',2005719626,158666,2300231016,7
UNION ALL SELECT '2018-07-07 07:45:00.000',2005719879,158666,2300231016,8
UNION ALL SELECT '2018-07-07 12:13:00.000',2005719931,158666,2300231016,9
UNION ALL SELECT '2018-07-09 18:29:00.000',2009395510,158666,2300231016,8
UNION ALL SELECT '2018-07-09 21:05:00.000',2009395523,158666,2300231016,6
UNION ALL SELECT '2018-07-11 21:31:00.000',2011107312,158666,2300231016,5
UNION ALL SELECT '2018-06-27 18:23:00.000',1991016381,258669,2300231016,4
UNION ALL SELECT '2018-06-27 19:07:00.000',1991181365,258669,2300231016,4
UNION ALL SELECT '2018-06-27 19:28:00.000',1991181376,258669,2300231016,3
UNION ALL SELECT '2018-06-28 01:44:00.000',1991583923,258669,2300231016,9
UNION ALL SELECT '2018-06-28 02:19:00.000',1991583943,258669,2300231016,2
UNION ALL SELECT '2018-07-03 10:15:00.000',1999231742,258669,2300231016,1
UNION ALL SELECT '2018-07-03 10:45:00.000',2000293679,258669,2300231016,1
UNION ALL SELECT '2018-07-03 14:22:00.000',2000293804,258669,2300231016,3
UNION ALL SELECT '2018-07-04 19:45:00.000',2002057785,258669,2300231016,2
UNION ALL SELECT '2018-07-04 21:00:00.000',2002057813,258669,2300231016,1
UNION ALL SELECT '2018-07-05 15:12:00.000',2002548332,258669,2300231016,7
UNION ALL SELECT '2018-07-05 17:52:00.000',2003700719,258669,2300231016,6
UNION ALL SELECT '2018-07-09 22:30:00.000',2009395530,258669,2300231016,5
UNION ALL SELECT '2018-07-09 23:23:00.000',2009395666,258669,2300231016,3
UNION ALL SELECT '2018-07-30 17:45:00.000',2043642075,158666,2300231016,2
UNION ALL SELECT '2018-07-30 23:30:00.000',2043642114,158666,2300231016,4
;WITH NextEventLag AS(
--Returns the next event information.
SELECT
ORDER_TIME
,ORDER_ID
,PROD_ID
,ACCOUNT_ID
,RowNum = ROW_NUMBER() OVER(PARTITION BY ACCOUNT_ID, PROD_ID ORDER BY ORDER_TIME)
--NEXT_ORDER_TIME_LAG: Returns the time difference between two consecutive order times.
,NEXT_ORDER_TIME_LAG = DATEDIFF(MINUTE, LAG(ORDER_TIME, 1, ORDER_TIME) OVER(PARTITION BY ACCOUNT_ID, PROD_ID ORDER BY ORDER_TIME), ORDER_TIME)
,ORDER_QUANTITY
FROM #Data
)
,RunningOrders AS(
SELECT
RowNum
,ORDER_TIME
,ACCOUNT_ID
,PROD_ID
,NEXT_ORDER_TIME_LAG
,LAG_LESS_THAN_24h = 0
,ORDER_QUANTITY
FROM NextEventLag
WHERE RowNum = 1
UNION ALL
SELECT
NextEventLag.RowNum
,NextEventLag.ORDER_TIME
,NextEventLag.ACCOUNT_ID
,NextEventLag.PROD_ID
,NextEventLag.NEXT_ORDER_TIME_LAG
--If the time lag between consecutive events and the time running sum is over 1440 minutes then set the value to 0.
--Change the NEXT_ORDER_TIME_LAG time interval to the desired interval value in minutes.
,LAG_LESS_THAN_24h = CASE WHEN RunningOrders.LAG_LESS_THAN_24h + NextEventLag.NEXT_ORDER_TIME_LAG >= 1440 THEN 0
ELSE RunningOrders.LAG_LESS_THAN_24h + NextEventLag.NEXT_ORDER_TIME_LAG
END
,NextEventLag.ORDER_QUANTITY
FROM RunningOrders
INNER JOIN NextEventLag ON RunningOrders.RowNum + 1 = NextEventLag.RowNum
AND RunningOrders.ACCOUNT_ID = NextEventLag.ACCOUNT_ID
AND RunningOrders.PROD_ID = NextEventLag.PROD_ID
)
,GroupedLags AS(
--This Groups together the LAG(s) less than 1440 minutes and is used by the outer query window functions
--to calculate the running aggregates.
SELECT RunningOrders.*
,Running24h.*
FROM RunningOrders
CROSS APPLY(SELECT TOP 1
Groups = COUNT(*) OVER(ORDER BY GroupApply.LAG_LESS_THAN_24h) --Count admin units within the time window in the WHERE clause
FROM RunningOrders AS GroupApply
WHERE
GroupApply.ACCOUNT_ID = RunningOrders.ACCOUNT_ID
AND GroupApply.PROD_ID = RunningOrders.PROD_ID
AND GroupApply.ORDER_TIME <= RunningOrders.ORDER_TIME
--ORDER BY StageBaseApply.ORDER_TIME
) AS Running24h
)
select
GroupedLags.ACCOUNT_ID
,GroupedLags.PROD_ID
,GroupedLags.ORDER_TIME
,GroupedLags.NEXT_ORDER_TIME_LAG
,GroupedLags.LAG_LESS_THAN_24h
,RUNNING_COUNT_24h = ROW_NUMBER() OVER(PARTITION BY GroupedLags.ACCOUNT_ID, GroupedLags.PROD_ID, GroupedLags.Groups ORDER BY GroupedLags.ORDER_TIME)
,RUNNING_SUM_24h = SUM(ORDER_QUANTITY) OVER(PARTITION BY GroupedLags.ACCOUNT_ID, GroupedLags.PROD_ID, GroupedLags.Groups ORDER BY GroupedLags.ORDER_TIME)
from GroupedLags
ORDER BY
GroupedLags.ACCOUNT_ID
,GroupedLags.PROD_ID
,GroupedLags.ORDER_TIME
Here is the db<>fiddle demo

. It’s working fine when I pass 10-15 input but taking more than 5 minutes for 30 input. I need to make it work for 100 inputs

Here is my ask:
Go through the code and understand it.
As first solution, query should complete within 10 secs for 30 input
It should be working with good performance for 100 input as well.
My code:
/**************************************************
Populating the Array values in table variable
**************************************************/
DECLARE #PUZZLE table(
ID int IDENTITY(1,1) NOT NULL,
Value int NOT NULL)
/****Sample 1*****/
INSERT INTO #PUZZLE (value)
--SELECT 0 UNION ALL
--SELECT -22 UNION ALL
--SELECT -33 UNION ALL
--SELECT -44 UNION ALL
--SELECT 55 UNION ALL
--SELECT -100 UNION ALL
--SELECT 100 UNION ALL
--SELECT 10 UNION ALL
--SELECT -30 UNION ALL
--SELECT -60 UNION ALL
--SELECT -60 UNION ALL
SELECT -60 UNION ALL
SELECT -10 UNION ALL
SELECT 10 UNION ALL
SELECT 10 UNION ALL
SELECT -10 UNION ALL
SELECT 0 UNION ALL
SELECT -22 UNION ALL
SELECT -33 UNION ALL
SELECT -44 UNION ALL
SELECT 55 UNION ALL
SELECT -100 UNION ALL
SELECT 100 UNION ALL
SELECT 10 UNION ALL
SELECT -30 UNION ALL
SELECT -60 UNION ALL
SELECT -60 UNION ALL
SELECT -60 UNION ALL
SELECT -10 UNION ALL
SELECT 10 UNION ALL
SELECT 10
/**************************************************
Populating possible hierarchy/path
**************************************************/
DECLARE #puzHierarchy table (parentid int, childid int,value int)
INSERT #puzHierarchy (parentid,childid,value)
SELECT *-- INTO #puzHierarchy
FROM (
SELECT NULL AS ParentId,ID AS ChildId, Value
FROM #PUZZLE
WHERE ID = (SELECT MIN(ID) FROM #PUZZLE)
UNION ALL
SELECT B.Id,C.ID,C.Value
FROM #PUZZLE B
JOIN #PUZZLE C
ON C.ID > B.ID AND C.ID < (B.ID + 7)
) A
--SELECT * FROM #puzHierarchy order by parentid
/*******************************************************
Logic using recursive CTE to get the path with max value
*******************************************************/
;WITH children AS
(
SELECT ParentId
,CAST(ISNULL(CAST(ParentId AS NVARCHAR) + '->' ,'') + CAST(ChildId AS NVARCHAR) AS NVARCHAR(Max)) AS Path
,value As PathValue
FROM #puzHierarchy
WHERE ChildId = (SELECT MAX(ChildId) FROM #puzHierarchy)
UNION ALL
SELECT t.ParentId
,list= CAST(ISNULL(CAST(t.ParentId AS NVARCHAR) + '->' ,'') + d.Path AS NVARCHAR(Max))
,(t.value+d.PathValue) As PathValue
FROM #puzHierarchy t
INNER JOIN children AS d
ON t.ChildId = d.ParentId
)
SELECT [Path],PathValue
FROM children c
WHERE ParentId IS NULL
AND c.PathValue = (SELECT max(PathValue) FROM children WHERE ParentId IS NULL)
A. Your code goes through too many cycles/data unrelated to result you want.
B. After running your sample data, the results are not accurate.
Parentid Path PathValue
NULL 1->3->4->6->10->12->13->19->20 145
NULL 1->3->4->10->12->13->19->20 145
The first result is wrong.
Basically you just want starting from ParentId IS NULL and ChildId = 1, among ParentId = 1 finding which ChildId has the MAX value, this ChildId becomes ParentID to find next MAX value, and so on.
;WITH cte_base AS (SELECT Parentid
, Childid
, Value
, ROW_NUMBER() OVER(PARTITION BY Parentid ORDER BY Value DESC) AS Rownum
FROM #puzHierarchy
), cte_re AS (SELECT ParentId
, Childid
, CAST(CAST(ChildId AS NVARCHAR) AS NVARCHAR(Max)) AS Path
, Value As PathValue
, Rownum
FROM cte_base
WHERE Parentid IS NULL
UNION ALL
SELECT b.parentid, b.childid
, CAST(Path + '->' + ISNULL(CAST(b.parentid AS NVARCHAR) ,'') AS NVARCHAR(Max))
,(b.value + r.PathValue) As PathValue
, b.Rownum
FROM cte_base AS b
INNER JOIN cte_re AS r
ON b.Parentid = r.childid
where b.Rownum = 1
)
SELECT *
FROM cte_re
(I changed your sample table variable to a temporary table.)

Trying to pivot event dates in t-sql without using a cursor

I have the following table:
What I want is to get to this:
EventTypeId 1 and 3 are valid start events and EventTypeId of 2 is the only valid end event.
I have tried to do a pivot, but I don't believe a pivot will get me the multiple events for a person in the result set.
SELECT PersonId, [1],[3],[2]
FROM
(
SELECT PersonId, EventTypeId, EventDate
from #PersonEvent
) as SourceTable
PIVOT
(
count(EventDate) FOR EventTypeId
IN ([1],[3],[2])
) as PivotTable
Select PersonID,
Min(Case WHEN EventTypeId IN (1,3) THEN EventDate END) as StartDate,
Min(Case WHEN EventTypeId IN (2) THEN EventDate END) as EndDate
FROM #PersonEvent
group by personid
I can do a cursor, but my original table is over 90,000 rows, and this is to be for a report, so I don't think I can use that option. Any other thoughts that I might be missing?
Assuming the table is called [dbo].[PersonEventRecords] this will work...
With StartEvents As
(
Select *
From [dbo].[PersonEventRecords]
Where EventTypeId In (1,3)
), EndEvents As
(
Select *
From [dbo].[PersonEventRecords]
Where EventTypeId In (2)
)
Select IsNull(se.PersonId,ee.PersonId) As PersonId,
se.EventTypeId As StartEventTypeId,
se.EventDate As StartEventDate,
ee.EventTypeId As EndEventTypeId,
ee.EventDate As EndEventDate
From StartEvents se
Full Outer Join EndEvents ee
On se.PersonId = ee.PersonId
And se.EventSequence = ee.EventSequence - 1
Order By IsNull(se.PersonId,ee.PersonId),
IsNull(se.EventDate,ee.EventDate);
/**** TEST DATA ****/
If Object_ID('[dbo].[PersonEventRecords]') Is Not Null
Drop Table [dbo].[PersonEventRecords];
Create Table [dbo].[PersonEventRecords]
(
PersonId Int,
EventTypeId Int,
EventDate Date,
EventSequence Int
);
Insert [dbo].[PersonEventRecords]
Select 1,1,'2012-10-13',1
Union All
Select 1,2,'2012-10-20',2
Union All
Select 1,1,'2012-11-01',3
Union All
Select 1,2,'2012-11-13',4
Union All
Select 2,1,'2012-05-07',1
Union All
Select 2,2,'2012-06-01',2
Union All
Select 2,3,'2012-07-01',3
Union All
Select 2,2,'2012-08-30',4
Union All
Select 3,2,'2012-04-05',1
Union All
Select 3,1,'2012-05-04',2
Union All
Select 3,2,'2012-05-24',3
Union All
Select 4,1,'2013-01-03',1
Union All
Select 4,1,'2013-02-20',2
Union All
Select 4,2,'2013-03-20',3;
Try this
SELECT E1.PersonId, E1.EventTypeId, E1.EventDate, E2.EventTypeId, E2.EventDate
FROM PersonEvent AS E1
OUTER APPLY(
SELECT TOP 1 PersonEvent.EventTypeId, PersonEvent.EventDate
FROM PersonEvent
WHERE PersonEvent.PersonId = E1.PersonId
AND PersonEvent.EventSequence = E1.EventSequence + 1
AND PersonEvent.EventTypeId = 2
) AS E2
WHERE E1.EventTypeId = 1 OR E1.EventTypeId = 3
UNION
SELECT E3.PersonId, NULL, NULL, E3.EventTypeId, E3.EventDate
FROM PersonEvent E3
WHERE E3.EventTypeId = 2
AND NOT EXISTS(
SELECT *
FROM PersonEvent
WHERE PersonEvent.PersonId = E3.PersonId
AND PersonEvent.EventSequence = E3.EventSequence - 1)
It is not completely clear how do you want the result to be ordered – add order as needed.

Finding the id of the nearest neighbour in SQL

I have a table, #geo, with points in geolocation.
Id geolocation
9201 0xE6100000010CE33995EB71164CC054791243B87441C0
9202 0xE6100000010C56B77A4E7A1B4CC0D15790662C6E41C0
I calculated the distance to the nearest neighbour for each data point.
I have 1000 points for 24 month. Now I replicate my code with the first 19 points in a month
create table #Geo
(
id int
,geolocation geography
)
INSERT INTO #geo (id, geolocation)
Select 224,0xE6100000010CE33995EB71164CC054791243B87441C0 UNION ALL
Select 225,0xE6100000010CE7D4BE4EA4184CC0CC947B26A07341C0 UNION ALL
Select 226,0xE6100000010C97A8DE1AD81A4CC0139B8F6B436941C0 UNION ALL
Select 227,0xE6100000010C2EAC1BEF8E164CC0DF80E03B7B7341C0 UNION ALL
Select 228,0xE6100000010CE49BD09887174CC00CADD206F57341C0 UNION ALL
Select 229,0xE6100000010C2B009DB436184CC0FD8E1B5D297441C0 UNION ALL
Select 230,0xE6100000010CFBAC32535A154CC054C72AA5677241C0 UNION ALL
Select 231,0xE6100000010CAE9E93DE37024CC0A167B3EA736141C0 UNION ALL
Select 232,0xE6100000010C70B1A206D3EC4BC0B4024356B76241C0 UNION ALL
Select 233,0xE6100000010CEA78CC40651C4CC097C5C4E6E30A41C0 UNION ALL
Select 234,0xE6100000010CDBFD2AC0770F4CC09E996038D76E41C0 UNION ALL
Select 235,0xE6100000010CA1CB487B8B794BC0C84C9AED277041C0 UNION ALL
Select 236,0xE6100000010CC0076D4108154CC07DD8A069E86E41C0 UNION ALL
Select 237,0xE6100000010C103B53E8BC1E4CC062670A9DD7E03FC0 UNION ALL
Select 238,0xE6100000010CDD94A1130A004CC0ACA6B697DEBB3FC0 UNION ALL
Select 239,0xE6100000010CAB750381252B4BC0F1DDFF2A343D41C0 UNION ALL
Select 240,0xE6100000010CD925AAB706CA4BC045813E91275D40C0 UNION ALL
Select 241,0xE6100000010CD1EB4FE2F3134BC014DA6ABD7C5441C0 UNION ALL
Select 242,0xE6100000010CB32A5F238B144CC0C3E37020037441C0
--select * from #Geo
select com.id
, min( com.GeoLocation.STDistance(com2.GeoLocation)) dist
from #geo com
join #geo com2 on com.id<>com2.id
group by com.id
id dist
224 608.936575787757
225 454.190509008084
... ...
Now I need to get the nearest neighbor's id:
id dist Id_with_minimum_distance
224 608.936575787757 ?
225 454.190509008084 ?
Thank you for your help.
You can use a subquery with row_number to filter out all except the nearest com2 rows:
select *
from (
select row_number() over (
partition by id1
order by dist) rn
, *
from (
select com1.id as id1
, com2.id as id2
, com1.GeoLocation.STDistance(com2.GeoLocation) as dist
from geo com1
join geo com2
on com1.id <> com2.id
) sub1
) sub2
where rn = 1 -- Only nearest com2
Example at SQL Fiddle.

How to Calculate Median?

I need to calculate a median on a set of data, so I created a temp table and have tried to follow some articles online with zero success, here is what I am working with:
CREATE TABLE #QuizTemp (QuizProfileID INT,Cnt INT,TotalScore INT)
INSERT INTO #QuizTemp
SELECT QuizAnswers.QuizProfileID, COUNT(QuizAnswers.QuizProfileID) AS Cnt, SUM(QuizAnswers.AnsweredYes) As TotalScore
FROM QuizAnswers INNER JOIN
Quizzes ON QuizAnswers.QuizID = Quizzes.QuizID
WHERE (Quizzes.PartnerID = 16)
GROUP BY QuizAnswers.QuizProfileID
HAVING COUNT(QuizAnswers.QuizProfileID)= 5
SELECT COUNT(*) AS CNT, Avg(TotalScore) AS AvgTotalScore FROM #QuizTemp
DROP TABLE #QuizTemp
The average works great and now I need the Median.
try capturing the row count on INSERT and then select the row that is in the middle using ROW_NUMBER():
CREATE TABLE #QuizTemp (QuizProfileID INT,Cnt INT,TotalScore INT)
DECLARE #Rows int
INSERT INTO #QuizTemp
SELECT QuizAnswers.QuizProfileID, COUNT(QuizAnswers.QuizProfileID) AS Cnt, SUM(QuizAnswers.AnsweredYes) As TotalScore
FROM QuizAnswers INNER JOIN
Quizzes ON QuizAnswers.QuizID = Quizzes.QuizID
WHERE (Quizzes.PartnerID = 16)
GROUP BY QuizAnswers.QuizProfileID
HAVING COUNT(QuizAnswers.QuizProfileID)= 5
DECLARE #Rows int
SELECT #Rows=##Rowcount
;with allrows as
(
SELECT TotalScore, ROW_NUMBER() (ORDER BY TotalScore) AS RowNumber
)
SELECT #Rows AS CNT, TotalScore AS MedianScore
FROM allrows WHERE RowNumber=#Rows/2
DROP TABLE #QuizTemp
EDIT
Here is a solution without a temp table:
DECLARE #YourTable table (TotalScore int)
INSERT INTO #YourTable Values (1)
INSERT INTO #YourTable Values (2)
INSERT INTO #YourTable Values (3)
INSERT INTO #YourTable Values (40)
INSERT INTO #YourTable Values (50)
INSERT INTO #YourTable Values (60)
INSERT INTO #YourTable Values (70)
;with allrows as
(
SELECT
TotalScore, ROW_NUMBER() OVER (ORDER BY TotalScore) AS RowNumber
FROM #YourTable
)
,MaxRows AS
(SELECT MAX(RowNumber) AS CNT,CONVERT(int,ROUND(MAX(RowNumber)/2.0,0)) AS Middle FROM allrows)
SELECT
m.CNT
,(SELECT AVG(TotalScore) FROM allrows) AS AvgTotalScore
,a.TotalScore AS Median
,m.Middle AS MedianRowNumber
FROM allrows a
CROSS JOIN MaxRows m
WHERE a.RowNumber=m.Middle
OUTPUT:
CNT AvgTotalScore Median MedianRowNumber
-------------------- -------------------- ----------- --------------------
7 32 40 4
(1 row(s) affected)
if you edit the first CTE to be:
;with allrows as
(
SELECT QuizAnswers.QuizProfileID, COUNT(QuizAnswers.QuizProfileID) AS Cnt, SUM(QuizAnswers.AnsweredYes) As TotalScore
, ROW_NUMBER() OVER (ORDER BY TotalScore) AS RowNumber
FROM QuizAnswers INNER JOIN
Quizzes ON QuizAnswers.QuizID = Quizzes.QuizID
WHERE (Quizzes.PartnerID = 16)
GROUP BY QuizAnswers.QuizProfileID
HAVING COUNT(QuizAnswers.QuizProfileID)= 5
)
it should work for your query
Median value of numeric values is often over though. Just use this example:
DECLARE #testTable TABLE
(
VALUE INT
)
--INSERT INTO #testTable -- Even Test
--SELECT 3 UNION ALL
--SELECT 5 UNION ALL
--SELECT 7 UNION ALL
--SELECT 12 UNION ALL
--SELECT 13 UNION ALL
--SELECT 14 UNION ALL
--SELECT 21 UNION ALL
--SELECT 23 UNION ALL
--SELECT 23 UNION ALL
--SELECT 23 UNION ALL
--SELECT 23 UNION ALL
--SELECT 29 UNION ALL
--SELECT 40 UNION ALL
--SELECT 56
--
--INSERT INTO #testTable -- Odd Test
--SELECT 3 UNION ALL
--SELECT 5 UNION ALL
--SELECT 7 UNION ALL
--SELECT 12 UNION ALL
--SELECT 13 UNION ALL
--SELECT 14 UNION ALL
--SELECT 21 UNION ALL
--SELECT 23 UNION ALL
--SELECT 23 UNION ALL
--SELECT 23 UNION ALL
--SELECT 23 UNION ALL
--SELECT 29 UNION ALL
--SELECT 39 UNION ALL
--SELECT 40 UNION ALL
--SELECT 56
DECLARE #RowAsc TABLE
(
ID INT IDENTITY,
Amount INT
)
INSERT INTO #RowAsc
SELECT VALUE
FROM #testTable
ORDER BY VALUE ASC
SELECT AVG(amount)
FROM #RowAsc ra
WHERE ra.id IN
(
SELECT ID
FROM #RowAsc
WHERE ra.id -
(
SELECT MAX(id) / 2.0
FROM #RowAsc
) BETWEEN 0 AND 1
)

Resources