Recursive CTE with partition by column

Recursive CTE with partition by column - sql-server

Below are the table structure
drop table if exists #Transactions
create table #Transactions (TID int, amt int)
insert into #Transactions values(1, 100)
insert into #Transactions values(1, -50)
insert into #Transactions values(1, 100)
insert into #Transactions values(1, -100)
insert into #Transactions values(1, 200)
;WITH y AS
(
SELECT TID, amt, rn = ROW_NUMBER() OVER (ORDER BY TID)
FROM #Transactions
), x AS
(
SELECT TID, rn, amt, rt = amt
FROM y
WHERE rn = 1
UNION ALL
SELECT y.TID, y.rn, y.amt, x.rt + y.amt
FROM x INNER JOIN y
ON y.rn = x.rn + 1
)
SELECT TID, amt, RunningTotal = rt
FROM x
ORDER BY x.rn
OPTION (MAXRECURSION 10000);
This is similar to question recursive cte with running balance
But I need to running balance for each TIds..suppose if I insert to following transaction of TId=2
insert into #Transactions values(2, 100)
insert into #Transactions values(2, -50)
insert into #Transactions values(2, 100)
insert into #Transactions values(2, -100)
insert into #Transactions values(2, 200)
I need to achieve same only in recursive CTE method without lots of modification.. Please suggest a solution

You need to handle TID in your ROW_NUMBER() window function and also CTE JOIN
;WITH y AS
(
SELECT TID, amt, rn = ROW_NUMBER() OVER (PARTITION BY TID -- <= added here
ORDER BY TID)
FROM #Transactions
), x AS
(
SELECT TID, rn, amt, rt = amt
FROM y
WHERE rn = 1
UNION ALL
SELECT y.TID, y.rn, y.amt, x.rt + y.amt
FROM x INNER JOIN y
ON y.rn = x.rn + 1
AND y.TID = x.TID -- <= added here
)
SELECT TID, amt, RunningTotal = rt
FROM x
ORDER BY x.rn
OPTION (MAXRECURSION 10000);
Any compelling reason that you must use CTE instead of a simple SUM() with window function ?

Add Partition with TID
FIDDLE DEMO
;WITH y AS
(
SELECT TID, amt, rn = ROW_NUMBER() OVER (PARTITION BY TID ORDER BY TID)
FROM #Transactions
), x AS
(
SELECT TID, rn, amt, rt = amt
FROM y
WHERE rn = 1
UNION ALL
SELECT y.TID, y.rn, y.amt,x.rt + y.amt
FROM x INNER JOIN y
ON y.rn = x.rn + 1 AND x.TID = y.TID
)
SELECT TID, amt, RunningTotal = rt
FROM x
ORDER BY x.TID, x.rn
OPTION (MAXRECURSION 10000);

You dont need recursive CTE. You can simply for a PARTITION BY based approach.
SELECT tid
, AMT
, SUM(amt) OVER(PARTITION BY tid ORDER BY tid
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS RunningTotal
FROM #Transactions
update
Sorry. Just now, went through comments.
If you have to use CTE, go with answer by #Squirrel. If you are fine with window functions, you can use the above approach.

Related

Recursion with CTE not working for non-consecutive days but very slow with ROW_NUMBER

I have to calculate Exponential Moving Average for stock price. In the query I made the recursion based on the PriceDate. But this is actually not working since there are missing days (weekends, holidays). I have tried to do it with ROW_NUMBER but it is running really slow (more than 40 minutes before I cancel it).
I am looking for a way to do it with dates but to take into account that there are missing days. Or if it is with ROW_NUMBER, I will need a serious speed optimization:
DECLARE #timePeriod12 INT = 12
DECLARE #smoothingFactor12 FLOAT = 2.0/(#timePeriod12 + 1);
;WITH SMA AS
(
-- calculate SMA for each row for the last N days
SELECT #smoothingFactor12 as alpha
-- , ROW_NUMBER() OVER (PARTITION BY Ticker ORDER BY PriceDate DESC) rownum
, Ticker
, PriceDate
, ClosePrice
, AVG(ClosePrice) OVER (PARTITION BY Ticker ORDER BY PriceDate ROWS BETWEEN 11 PRECEDING AND CURRENT ROW) AS sma
FROM price.PriceHist
WHERE PriceDate > (SELECT MAX(PriceDate) - 40 as PriceDate FROM price.PriceHist) --AND Ticker = 'AAPL'
),
EMA AS
(
SELECT Ticker, PriceDate, ClosePrice, CONVERT(DECIMAL(10, 4), sma) AS ema
FROM SMA
UNION ALL
SELECT curr.Ticker, curr.PriceDate, curr.ClosePrice, CONVERT(DECIMAL(10,4), calc.ema) AS EMA
FROM EMA previous
INNER
JOIN SMA curr
ON curr.PriceDate = previous.PriceDate + 1
AND curr.Ticker = previous.Ticker
CROSS
APPLY (SELECT curr.alpha * curr.ClosePrice + (1 - curr.alpha) * previous.ema AS ema) calc
)
INSERT INTO #tempEMA(Ticker, PriceDate, ClosePrice, EMA12)
SELECT * FROM EMA
OPTION (MAXRECURSION 0)
GO
The result from the query above clearly shows that it is very wrong to use PriceDate the way I use it.

I took the advice from #Alex and moved SMA calculation in separate table. It worked great.
IF OBJECT_ID('tempdb..#tempSMA') IS NOT NULL DROP TABLE #tempSMA
CREATE TABLE #tempSMA
(
Ticker VARCHAR(20),
PriceDate DATETIME2,
ClosePrice DECIMAL(17,5),
sma DECIMAL(17,5),
rownum INT,
);
INSERT INTO #tempSMA(Ticker, PriceDate, ClosePrice, sma, rownum)
SELECT
Ticker
, PriceDate
, ClosePrice
, AVG(ClosePrice) OVER (PARTITION BY Ticker ORDER BY PriceDate ROWS BETWEEN 26 PRECEDING AND CURRENT ROW) AS sma
, ROW_NUMBER() OVER (PARTITION BY Ticker ORDER BY PriceDate) rownum
FROM price.PriceHist
WHERE PriceDate > (SELECT MAX(PriceDate) - 40 as PriceDate FROM price.PriceHist)
DECLARE #timePeriod12 INT = 12
DECLARE #smoothingFactor12 FLOAT = 2.0/(#timePeriod12 + 1);
;WITH SMA AS
(
-- calculate SMA for each row for the last N days
SELECT #smoothingFactor12 as alpha
, rownum
, Ticker
, PriceDate
, ClosePrice
, sma
FROM #tempSMA
),
EMA AS
(
SELECT Ticker, PriceDate, ClosePrice, CONVERT(DECIMAL(10, 4), sma) AS ema, rownum
FROM SMA
WHERE rownum = (SELECT MAX(rownum) / 2 FROM SMA)
UNION ALL
SELECT curr.Ticker, curr.PriceDate, curr.ClosePrice, CONVERT(DECIMAL(10,4), calc.ema) AS EMA, curr.rownum
FROM EMA previous
INNER
JOIN SMA curr
ON curr.rownum = previous.rownum + 1
AND curr.Ticker = previous.Ticker
CROSS
APPLY (SELECT curr.alpha * curr.ClosePrice + (1 - curr.alpha) * previous.ema AS ema) calc
)
INSERT INTO #tempEMA(Ticker, PriceDate, ClosePrice, EMA12)
SELECT Ticker, PRiceDate, ClosePrice, ema FROM EMA
OPTION (MAXRECURSION 0)
GO

Updating Incremental value to a column in a SQL table for a group of rows

I have a situation where I have to update a incremental value to a column based on the value on the same column from the previous row for the same group of records.
Rule for updating 'COUNT' column is:
For the very 1st row of a particular REFNO,
If Amount 1 = Amount 2 then
COUNT = 1
Else
COUNT = 0
For all other rows (excluding the 1st row) of a particular REFNO:
If Amount 1 = Amount 2 then
COUNT = COUNT from previous row for the same REFNO + 1
Else
COUNT = COUNT from previous row for the same REFNO
So the result should look like below:
Though the sample data which I have shown has only 14 records the actual table I am updating is going to have few million rows in them. So I am searching for a solution which will do a set based update rather than row by row processing !!

You can update from a CTE that uses window functions to calculate the number.
The SQL below first calculates a row_number for the equal amounts.
Then for the others that don't have an equal amount, the maximum of the previous row_number is taken.
WITH CTE AS
(
SELECT *,
(CASE
WHEN [Amount 1] = [Amount 2]
THEN rn
ELSE MAX(rn) OVER (PARTITION BY [REFNO] ORDER BY [ROW ID] ASC ROWS UNBOUNDED PRECEDING)
END) as rnk
FROM (
SELECT
[ROW ID], [REFNO], [Amount 1], [Amount 2], [COUNT],
(CASE
WHEN [Amount 1] = [Amount 2]
THEN ROW_NUMBER() OVER (PARTITION BY [REFNO], IIF([Amount 1] = [Amount 2],0,1) ORDER BY [ROW ID] ASC)
ELSE 0
END) AS rn
FROM PAYMENT
) q
)
UPDATE CTE
SET [COUNT] = rnk;
A test on db<>fiddle here

Try it's
declare #t table (
rowid int identity,
refno int,
amount1 int,
amount2 int
)
insert into #t(refno,amount1,amount2) values (1000000,100,200)
insert into #t(refno,amount1,amount2) values (1000000,250,250)
insert into #t(refno,amount1,amount2) values (1000000,300,300)
insert into #t(refno,amount1,amount2) values (1000000,400,400)
insert into #t(refno,amount1,amount2) values (1000010,400,100)
insert into #t(refno,amount1,amount2) values (1000010,200,100)
insert into #t(refno,amount1,amount2) values (1000010,100,300)
insert into #t(refno,amount1,amount2) values (1000021,400,400)
insert into #t(refno,amount1,amount2) values (1000021,200,100)
insert into #t(refno,amount1,amount2) values (1000032,200,200)
insert into #t(refno,amount1,amount2) values (1000032,300,300)
insert into #t(refno,amount1,amount2) values (1000033,200,100)
insert into #t(refno,amount1,amount2) values (1000033,200,100)
select rowid,refno,amount1,amount2,rw-1 as count
from (
select
row_number() over(partition by amount1,amount2 order by rowid) rw,*
from #t) as src

This code works for that particular set, but no guarantee that there will be a situation when it won't work:
CREATE TABLE #tmp(
RowID INT IDENTITY(1,1),
RefNo INT,
Amount1 INT,
Amount2 INT
)
INSERT INTO #tmp(RefNo,Amount1,Amount2)
SELECT * FROM (VALUES
(100000,100,200),
(100000,250,250),
(100000,300,300),
(100000,400,400),
(100000,400,100),
(100010,200,100),
(100010,100,300),
(100010,400,400),
(100021,200,100),
(100021,200,200),
(100032,300,300),
(100032,200,100),
(100033,200,100),
(100033,200,100)) AS x(a,b,c)
;WITH Try1 AS (SELECT t1.*, [Count] =
CASE WHEN t1.Amount1 != t1.Amount2 AND
(t2.RowId IS NULL OR t2.Amount1 != t2.Amount2) THEN 0
WHEN t1.Amount1 != t1.Amount2 AND t2.Amount1 = t2.Amount2 THEN t2.RowId
WHEN t1.Amount1 = t1.Amount2 AND t2.RowId IS NULL THEN t1.RowId
WHEN t1.Amount1 = t1.Amount2 AND t2.RowId IS NOT NULL THEN t1.RowId
END
, NextRefNo = CASE WHEN t2.RowId IS NULL THEN 1 ELSE 0 END
FROM #tmp AS t1
OUTER APPLY ( SELECT * FROM #tmp AS t2
WHERE t2.RowId = t1.RowID - 1 AND t2.RefNo = t1.RefNo) AS t2)
, Try2 AS (SELECT RowID, RefNo, Amount1, Amount2, [Count]
, NextRefNo = ISNULL(t2.NextRefNo,0)
FROM Try1 AS t1
OUTER APPLY ( SELECT NextRefNo FROM Try1 AS t2
WHERE t2.[Count] > 0 AND t2.NextRefNo = 1
AND t2.RefNo = t1.RefNo ) AS t2)
SELECT RowID, RefNo, Amount1, Amount2
, [Count] = DENSE_RANK() OVER(PARTITION BY RefNo ORDER BY [Count]) - 1 + NextRefNo
FROM Try2
ORDER BY RowID;

Can the same query be done with Pivot?

I have a table as
CREATE TABLE #FinalRates
(
id int primary key identity(1,1),
RateDesc nvarchar(50),
Amt decimal(18,2)
)
insert into #FinalRates values('100',200)
insert into #FinalRates values('100',300)
insert into #FinalRates values('50-80',100)
insert into #FinalRates values('50-80',300)
insert into #FinalRates values('30-50',500)
insert into #FinalRates values('30-50',250)
Looking for an output as
RateDesc Amount1 Amount2
100 200 300
50-80 100 300
30-50 500 250
I have done this as
;with cte as(
select
RateDesc
,Amounts=
STUFF((Select ','+ cast(cast(Amt as int) as varchar(10))
from #FinalRates T1
where T1.RateDesc=T2.RateDesc
FOR XML PATH('')),1,1,'')
from #FinalRates T2
group by T2.RateDesc
)
select
RateDesc,
Amount1 = PARSENAME(REPLACE(Amounts,',','.'),2),
Amount2 = PARSENAME(REPLACE(Amounts,',','.'),1)
From Cte
Drop table #FinalRates
Can the same be done using PIVOT?

That's so complicated. How about this?
select ratedesc,
max(case when seqnum = 1 then amt end) as Amount1,
max(case when seqnum = 2 then amt end) as Amount2
from (select ft.*,
row_number() over (partition by ratedesc order by id) as seqnum
from #finalrates fr
) fr
group by ratedesc;
You could use a similar approach using pivot but conditional aggregation often performs better.
Plus, if you know you have no holes in id, you can do:
select ratedesc,
max(case when id % 2 = 1 then amt end) as Amount1,
max(case when id % 2 = 0 then amt end) as Amount2
from #finalrates fr
group by ratedesc;

Using PIVOT,
Assuming you have 2 Amt for each RateDesc.
Select RateDesc, [Amount1], [Amount2] From
(
Select RateDesc, Amt
, 'Amount' + cast(row_number() over (partition by RateDesc order by Amt) as varchar(5)) RowVal
from #FinalRates
) x
PIVOT
(
MAX(Amt) For RowVal in ([Amount1], [Amount2])
) p

Getting the last row from a ROW_NUMBER using SQL

I am thinking there is a better way to grab the last row from a row_number instead of doing multiple nesting using T-SQL.
I need the total number of orders and the last ordered date. Say I have the following:
DECLARE #T TABLE (PERSON_ID INT, ORDER_DATE DATE)
INSERT INTO #T VALUES(1, '2016/01/01')
INSERT INTO #T VALUES(1, '2016/01/02')
INSERT INTO #T VALUES(1, '2016/01/03')
INSERT INTO #T VALUES(2, '2016/01/01')
INSERT INTO #T VALUES(2, '2016/01/02')
INSERT INTO #T VALUES(3, '2016/01/01')
INSERT INTO #T VALUES(3, '2016/01/02')
INSERT INTO #T VALUES(3, '2016/01/03')
INSERT INTO #T VALUES(3, '2016/01/04')
What I want is:
PERSON_ID ORDER_DATE ORDER_CNT
1 2016-01-03 3
2 2016-01-02 2
3 2016-01-04 4
Is there a better way to do this besides the following:
SELECT *
FROM (
SELECT *
, ROW_NUMBER() OVER (PARTITION BY PERSON_ID ORDER BY ORDER_CNT DESC) AS LAST_ROW
FROM (
SELECT *
, ROW_NUMBER () OVER (PARTITION BY PERSON_ID ORDER BY ORDER_DATE) AS ORDER_CNT
FROM #T
) AS A
) AS B
WHERE LAST_ROW = 1

Yes, you can use this:
SELECT
PERSON_ID,
MAX(ORDER_DATE) AS ORDER_DATE,
COUNT(*) AS ORDER_CNT
FROM #T
GROUP BY PERSON_ID

SELECT a.PERSON_ID
, a.ORDER_DATE
, a.ORDER_CNT
FROM
(
SELECT PERSON_ID
, ORDER_DATE
, rn = ROW_NUMBER () OVER (PARTITION BY PERSON_ID ORDER BY ORDER_DATE DESC)
, ORDER_CNT = COUNT(ORDER_DATE) OVER (PARTITION BY PERSON_ID)
FROM #T
) AS a
WHERE rn = 1
ORDER BY a.PERSON_ID;

SQL Server Median Function

CREATE FUNCTION [dbo].[f_Get_Average_Order_Size_Median]
(
#ITEM char(15)
)
RETURNS decimal(21,6)
AS
BEGIN
SELECT #Median = AVG(1.0 * QTYSHP)
FROM
(
SELECT o.QTYSHP, rn = ROW_NUMBER() OVER (ORDER BY o.QTYSHP), c.c
FROM dbo.tbl AS o
WHERE RQDATE >=DATEADD (mm,-6, GETDATE())
AND PRICE != '0'
AND SALESMN != 'WB'
AND item = #ITEM )
+
SELECT o.QTYSHP, rn = ROW_NUMBER() OVER (ORDER BY o.QTYSHP), c.c
FROM tbl
WHERE RQDATE >=DATEADD (mm,-6, GETDATE())
AND PRICE != '0'
AND SALESMN != 'WB'
AND item = #ITEM
CROSS JOIN (SELECT c = COUNT(*)
FROM dbo.tblS) AS c
WHERE RQDATE >=DATEADD (mm,-6, GETDATE())
AND PRICE != '0'
AND SALESMN != 'WB'
AND item = #ITEM
+
(SELECT c = COUNT(*)
FROM dbo.tblS) AS c
WHERE RQDATE >=DATEADD (mm,-6, GETDATE())
AND PRICE != '0'
AND SALESMN != 'WB'
AND item = #ITEM
) AS x
WHERE rn IN ((c + 1)/2, (c + 2)/2);
#Return = #Median
BEGIN
END
RETURN #Return
END TRANSACTION...
Is this the correct median function? Please correct me ..I'm learning

The median is the value that accumulates 50% of the values (the 50% percentile). So I think the simplest way to do it is:
Count the number of records (let's say this count is 'n')
Select the top n / 2 records (if n is even, round it to the next integer value), sorted by the column that holds the value for which you want to calculate the median. Read the biggest (last) value of this column.
I'm not quite familiar with SQL server, but in MySQL I would do it like this:
set #n = (select count(*) from yourTable);
set #med = ceil(#n / 2);
select yourColumn
from (
select yourColumn
from yourTable
order by yourColumn
limit #med
) as a
order by yourColumn desc
limit 1;

For SQL Server 2005+ you could try this solution:
DECLARE #MyTable TABLE
(
ID INT PRIMARY KEY,
Value NUMERIC(9,2)
);
INSERT #MyTable (ID, Value) VALUES (1, 10);
INSERT #MyTable (ID, Value) VALUES (2, 20);
INSERT #MyTable (ID, Value) VALUES (3, 30);
INSERT #MyTable (ID, Value) VALUES (4, 40);
-- Test #1: 4 rows => AVG(20,30)
SELECT AVG(y.Value) AS Median#1
FROM
(
SELECT *,
ROW_NUMBER() OVER(ORDER BY x.ID ASC) AS RowNumASC,
ROW_NUMBER() OVER(ORDER BY x.ID DESC) AS RowNumDESC
FROM #MyTable x
) y
WHERE y.RowNumASC = y.RowNumDESC
OR y.RowNumASC + 1 = y.RowNumDESC
OR y.RowNumASC - 1 = y.RowNumDESC;
-- End of Test #1
-- Test #2: 5 rows => AVG(30)
INSERT #MyTable (ID, Value) VALUES (5, 50);
SELECT AVG(y.Value) AS Median#2
FROM
(
SELECT *,
ROW_NUMBER() OVER(ORDER BY x.ID ASC) AS RowNumASC,
ROW_NUMBER() OVER(ORDER BY x.ID DESC) AS RowNumDESC
FROM #MyTable x
) y
WHERE y.RowNumASC = y.RowNumDESC
OR y.RowNumASC + 1 = y.RowNumDESC
OR y.RowNumASC - 1 = y.RowNumDESC;
-- End of Test #2
Results:
Median#1
---------
25.000000
Median#2
---------
30.000000

Develop Reference

c reactjs sql-server angularjs arrays wpf database batch-file google-app-engine silverlight

Recursive CTE with partition by column - sql-server

Related

Recursion with CTE not working for non-consecutive days but very slow with ROW_NUMBER

Updating Incremental value to a column in a SQL table for a group of rows

Can the same query be done with Pivot?

Getting the last row from a ROW_NUMBER using SQL

SQL Server Median Function

Categories

Resources