Using recursion with a CTE in SQL Server - sql-server

I have following table Structure: (this is just a sample set with exact same columns in my final output query)
Actual data has a much higher number of rows in index and I have to remove few symbols before arriving to the index value. This is a custom index to be built for internal use.
https://dbfiddle.uk/?rdbms=sqlserver_2016&fiddle=b1d5ed7db79c665d8cc179ae4cc7d4f1
This is link to the fiddle for SQL data
below is the image of the same:
I want to calculate point contribution to the index value and finally the index value.
To calculate pts contribution by each symbol the formula is :
ptsC = yesterday_index * wt * px_change / yest_close
I do not have beginning value of yesterday Index .i.e for 17 Nov 2021 and should be considered as 1000
The Index Value of 18 Nov will then be 1000 + sum(ptsC)
This value should now be used to calculate ptsC for each symbol for 22-Nov and so on...
I am trying to write a recursive CTE but am not sure where I am going wrong.
Yesterday Index value should be recursively determined and thus the ptsC should be calculated.
The final output should be:
where total Point Contribution is sum of all the ptsC for the day and New index Value is yesterday Index Value + Total Point Contribution.
Below is the code I have which generates the first table:
declare #beginval as float=17671.65
set #beginval=1000
declare #indexname varchar(20)='NIFTY ENERGY'
declare #mindt as datetime
select #mindt=min(datetime) from indices_json where indexname=#indexname
;
with tbl as (
SELECT IndexName, datetime, sum(Indexmcap_today) totalMcap_today,sum(Indexmcap_yst) totalmcap_yst
FROM indices_json
WHERE IndexName = #indexname
group by indexname,datetime
)
,tbl2 as
(
select j.indexname,j.datetime,symbol,Indexmcap_today/d.totalMcap_today*100 calc_wt_today,Indexmcap_yst/d.totalmcap_yst*100 calc_wt_yest,iislPtsChange,adjustedClosePrice,pointchange
from indices_json j inner join tbl d on d.datetime=j.datetime and d.IndexName=j.IndexName
)
,tbl3 as
(
select indexname,datetime,symbol,calc_wt_today,calc_wt_yest,iislPtsChange,adjustedClosePrice,pointchange
,case when datetime=#mindt then #beginval*calc_wt_yest*iislPtsChange/adjustedClosePrice/100 else null end ptsC
from tbl2
)
,tbl4 as
(
select indexname,datetime,sum(ptsC) + #beginval NewIndexVal,sum(pointchange) PTSCC
from tbl3
group by indexname,datetime
)
,tbl5 as
(
select *,lag(datetime,1,null) over(order by datetime asc) yest_dt
from tbl4
)
,
tbl6 as
(
select d.*,s.yest_dt
from tbl2 d inner join tbl5 s on d.datetime=s.datetime
)
,tbl7 as
(
select d.IndexName,d.datetime,d.symbol,d.calc_wt_today,d.calc_wt_yest,d.iislPtsChange,d.adjustedClosePrice,d.pointchange,case when i.datetime is null then #beginval else i.NewIndexVal end yest_index
from tbl6 d left join tbl4 i on d.yest_dt=i.datetime
)
select IndexName,convert(varchar(12),datetime,106)date,symbol,round(calc_wt_yest,4) wt,iislPtsChange px_change,adjustedClosePrice yest_close--,pointchange,yest_index
from tbl7 d where datetime <='2021-11-24'
order by datetime
Thanks in advance.

I found a solution for this:
I calculated the returns for each constituent for each date
then summed up these returns for a date
then multiplied all the sum of the returns of all dates to arrive at the final value - this works
below is the query for the same. I did not require recursion here
declare #beginval as float=17671.65
declare #indexname varchar(20)='NIFTY 50'
declare #mindt as datetime
select #mindt=min(datetime) from indices_json where indexname=#indexname
declare #startdt as datetime = '2021-11-01'
;
with tbl as (
SELECT IndexName, datetime, sum(Indexmcap_today) totalMcap_today,sum(Indexmcap_yst) totalmcap_yst
FROM indices_json
WHERE IndexName = #indexname-- and symbol!='AXISBANK'
group by indexname,datetime
)
,tbl2 as
(
select j.indexname,j.datetime,symbol,Indexmcap_today/d.totalMcap_today*100 calc_wt_today,Indexmcap_yst/d.totalmcap_yst*100 calc_wt_yest,iislPtsChange,adjustedClosePrice,pointchange
from indices_json j inner join tbl d on d.datetime=j.datetime and d.IndexName=j.IndexName
)
,tbl7 as
(
select d.IndexName,d.datetime,d.symbol,d.calc_wt_today,d.calc_wt_yest,d.iislPtsChange,d.adjustedClosePrice,d.pointchange, d.calc_wt_yest*d.iislPtsChange/d.adjustedClosePrice/100 ret
from tbl2 d
)
,tbl8 as
(
select indexname,datetime,1+sum(ret) tot_ret from tbl7 group by indexname,datetime
)
select indexname,datetime date
,round(exp(sum(log(sum(tot_ret))) over (partition by IndexName order by datetime)),6)*#beginval final_Ret
from tbl8 where datetime>=#startdt
group by indexname,datetime order by date

Related

How can I refer to a LAG() function column in SQL Server?

I have a query in which I use LAG function :
WITH Tr AS
(
SELECT
DocDtls.Warehouse, Transactions.Code, DocDtls.zDate,
Transactions.ID, Transactions.QtyIn, Transactions.QtyOut,
Transactions.BalanceAfter
FROM
DocDtls
INNER JOIN
Transactions ON DocDtls.[PrimDocNum] = Transactions.[DocNum]
)
SELECT
ID, Code, QtyIn, QtyOut, BalanceAfter,
LAG(BalanceAfter, 1, 0) OVER (PARTITION BY Warehouse, Code
ORDER BY Code, ID) Prev_BlncAfter
FROM
Tr;
It's working fine but when I try to add this column before FROM:
SUM(Prev_BlncAfter + QtyIn) - QtyOut AS NewBlncAfter
I get this error :
Msg 207, Level 16, State 1, Line 3
Invalid column name 'Prev_BlncAfter'
How can I fix this ? Thanks
You can create the LAG column inside the CTE instead of in the outer query. E.g.
declare #DocDtls table (Warehouse int, zDate date, [PrimDocNum] int);
declare #Transactions table (code int, id int, QtyIn int, QtyOut int, balanceafter int, [DocNum] int)
;with Tr As
(
SELECT
d.Warehouse
, t.Code
, d.zDate
, t.ID
, t.QtyIn
, t.QtyOut
, t.BalanceAfter
,LAG(BalanceAfter,1,0) Over (partition by Warehouse,Code order by Code,ID) Prev_BlncAfter
FROM #DocDtls d
INNER JOIN #Transactions t ON d.[PrimDocNum] = t.[DocNum]
)
select ID,Code,QtyIn,QtyOut,BalanceAfter
,SUM(Prev_BlncAfter + QtyIn)-QtyOut As NewBlncAfter
from Tr
group by ID,Code,QtyIn,QtyOut,BalanceAfter;
You can nest this query to refer the newly added column from the outer scope, or create another with like you've done before for referencing it afterwards:
with Tr As (
SELECT
DocDtls.Warehouse,
Transactions.Code,
DocDtls.zDate,
Transactions.ID,
Transactions.QtyIn,
Transactions.QtyOut,
Transactions.BalanceAfter
FROM
DocDtls
INNER JOIN Transactions ON DocDtls.[PrimDocNum] = Transactions.[DocNum]
),
formatted_tr as (
select
ID,
Code,
QtyIn,
QtyOut,
BalanceAfter,
LAG(BalanceAfter, 1, 0) Over (
partition by Warehouse,
Code
order by
Code,
ID
) Prev_BlncAfter
from
Tr
)
select
SUM(Prev_BlncAfter + QtyIn) - QtyOut As NewBlncAfter
from
formatted_tr
group by
ID, QtyOut
;
Based on comments , I combined the two answers to get what I need :
with Tr As (
SELECT
DocDtls.Warehouse,
Transactions.Code,
DocDtls.zDate,
Transactions.ID,
Transactions.QtyIn,
Transactions.QtyOut,
Transactions.BalanceAfter
FROM
DocDtls
INNER JOIN Transactions ON DocDtls.[PrimDocNum] = Transactions.[DocNum]
),
formatted_tr as (
select
ID,
Code,
QtyIn,
QtyOut,
BalanceAfter,
LAG(BalanceAfter, 1, 0) Over (
partition by Warehouse,
Code
order by
Code,zDate,ID
) Prev_BlncAfter
from
Tr
)
select ID,Code,QtyIn,QtyOut,BalanceAfter
,SUM(Prev_BlncAfter + QtyIn)-QtyOut As NewBlncAfter
from formatted_tr
group by ID,Code,QtyIn,QtyOut,BalanceAfter;
;

snowflake unsupported subquery cannot be evaluated

/Table TEMP has customer hash, effective start date and effective end date. Table CDTLS has customer hash, effective start date.I want to customer hash, effective from, Customer name from TEMP and CDTLS. I am calculating CDTLS end date on the fly and comparing it with TEMP.EFFECTIVE_FROM and TEMP_EFFECTIVE_TO dates. I get an error that unsupported subquery cannot be evaluated./
SELECT
TEMP.CUSTOMER_HASH,
TEMP.EFFECTIVE_FROM,
TEMP.EFFECTIVE_TO,
CDTLS.NAME
FROM TEMP
LEFT CDTLS
ON
TEMP.CUSTOMER_HASH = CDTLS.CUSTOMER_HASH
AND
CDTLS.EFFECTIVE_FROM <= TEMP.EFFECTIVE_FROM
AND
(
SELECT VW.EFFECTIVE_TO FROM
(
SELECT CUSTOMER_HASH, EFFECTIVE_FROM, LEAD(EFFECTIVE_FROM, 1, '9999-12-31') OVER (PARTITION
BY CUSTOMER_HASH ORDER BY EFFECTIVE_FROM ASC) AS EFFECTIVE_TO
FROM CUST_DETAILS
) AS VW
WHERE CDTLS.CUSTOMER_HASH = VW.CUSTOMER_HASH AND CDTLS.EFFECTIVE_FROM = VW.EFFECTIVE_FROM
) >= TEMP.EFFECTIVE_TO
;
I suppose you wanted to run this query:
SELECT
TEMP.CUSTOMER_HASH,
TEMP.EFFECTIVE_FROM,
TEMP.EFFECTIVE_TO,
CDTLS.NAME
FROM TEMP
LEFT join CDTLS
ON
TEMP.CUSTOMER_HASH = CDTLS.CUSTOMER_HASH
AND
CDTLS.EFFECTIVE_FROM <= TEMP.EFFECTIVE_FROM
left join (
SELECT CUSTOMER_HASH, EFFECTIVE_FROM, LEAD(EFFECTIVE_FROM, 1, '9999-12-31') OVER (PARTITION
BY CUSTOMER_HASH ORDER BY EFFECTIVE_FROM ASC) AS EFFECTIVE_TO
FROM CUST_DETAILS
) AS VW on CDTLS.CUSTOMER_HASH = VW.CUSTOMER_HASH AND CDTLS.EFFECTIVE_FROM = VW.EFFECTIVE_FROM
where
VW.EFFECTIVE_TO >= TEMP.EFFECTIVE_TO
You could try using MIN / MAX / LISTAGG etc in the select query to make it deterministically scalar to check if that helps.
https://docs.snowflake.net/manuals/user-guide/querying-subqueries.html#differences-between-correlated-and-non-correlated-subqueries

Optimizing query with huge amount of data

How can I optimize the query. I looked at the execution plan and created all the index. Every table has huge data. And this query execution time is very large. By looking at the query could you please suggest where can I optimize more.
If I give little background of the query the structure like:
There are many companies
Each company can have multiple managers
Data is in pagination format
Filter on #parent_manager so another temp table created parent_manager_filter just to use for the filtering purpose as #parent_manager has name in "," separated format
CREATE TABLE #parent_manager
(
cid NUMERIC(18) PRIMARY KEY,
name NVARCHAR(MAX),
code NVARCHAR(MAX)
);
CREATE INDEX cte_parent_manager ON #parent_manager(cid);
CREATE TABLE #parent_manager_filter
(
cid NUMERIC(18),
name NVARCHAR(1000),
code NVARCHAR(1000)
);
CREATE INDEX cte_parent_manager_filter_idx ON #parent_manager_filter(cid);
INSERT INTO #parent_manager
SELECT DISTINCT
mgrc.cid,
name = CAST (STUFF ((SELECT ', ' + CAST(c.company_name AS varchar(2000))
FROM manager_company mc
INNER JOIN company c ON (mc.mgr_cid = c.cid )
WHERE mc.cid = mgrc.cid
AND c.company_name IS NOT NULL
FOR XML PATH ('')), 1, 1, '') AS VARCHAR(2000)),
code = CAST (STUFF ((SELECT ', ' + CAST(c.code AS varchar(2000))
FROM manager_company mc
INNER JOIN company c ON (mc.mgr_cid = c.cid )
WHERE mc.cid = mgrc.cid
AND c.company_name IS NOT NULL
FOR XML PATH ('')), 1, 1, '') AS VARCHAR(2000))
FROM
manager_company mgrc
INNER JOIN
company c ON (mgrc.mgr_cid = c.cid )
JOIN
handler h ON (c.handlerId = h.handlerid )
WHERE
h.handlerid = 5800657002370
INSERT INTO #parent_manager_filter
SELECT DISTINCT
mc.cid,
c.company_name as name,
c.code as code
FROM
manager_company mc
INNER JOIN
company c ON (mc.mgr_cid = c.cid )
JOIN
handler h ON (h.handlerid = c.handlerid)
WHERE
h.handlerid = 5800657002370 ;
WITH company AS
(
SELECT DISTINCT
c.cid AS cid,
parentManager.name AS MANAGER_NAME,
parentManager.code AS code
FROM
company c
LEFT JOIN
#parent_manager parentManager ON (parentManager.cid = c.cid)
LEFT JOIN
# parent_manager_filter parentManagerFilter ON (parentManagerFilter.cid = c.cid)
WHERE
parentManagerFilter.name IN (:managerList)
),
total_rows AS
(
SELECT
COUNT(*) OVER () AS TOTALCOUNT,
ROW_NUMBER() OVER (ORDER BY company_name ASC) AS rnum,
grid.*
FROM
company grid
)
SELECT *
FROM total_rows rnum
WHERE rnum >= 1
AND rnum <= 10
DROP TABLE #parent_manager;
DROP TABLE #parent_manager_filter;
If you are building up temp tables then I would make sure you don't miss a clustered index, else your temp table is simply a heap. You don't have one covering the filter table.
INSERT INTO #parent_manager_filter ...
CREATE CLUSTERED INDEX cte_parent_manager_filter On #parent_manager_filter(cid);

SSRS:How to return count of events per day for Month

I have a table with the following information
ID,DateTime,EventType
1,6/5/2013 9:35:00,B
1,6/5/2013 9:35:24,A
2,6/5/2013 9:35:36,B
3,6/5/2013 9:36:11,D
2,6/5/2013 9:39:16,A
3,6/5/2013 9:40:48,B
4,7/5/2013 9:35:19,B
4,7/5/2013 9:35:33,A
5,7/5/2013 9:35:53,B
5,7/5/2013 9:36:06,D
6,7/5/2013 9:39:39,A
7,7/5/2013 9:40:28,B
8,8/5/2013 9:35:02,A
7,8/5/2013 9:35:08,A
8,8/5/2013 9:35:29,B
6,8/5/2013 9:36:39,B
I need to count how many times each day an event changed state as long as the time between states was less than 30 seconds over the time period.
Basically I am looking for the following result set
6/5/2013 | 1
7/5/2013 | 2
8/5/2013 | 1
I've tried several different types of queries, but nothing works. I am using SQL Server Reporting Services 2008.
declare #t table (ID int,[DateTime] datetime ,EventType varchar);
insert #t values
(1,'6/5/2013 9:35:00','B'),
(1,'6/5/2013 9:35:24','A'),
(2,'6/5/2013 9:35:36','B'),
(3,'6/5/2013 9:36:11','D'),
(2,'6/5/2013 9:39:16','A'),
(3,'6/5/2013 9:40:48','B'),
(4,'7/5/2013 9:35:19','B'),
(4,'7/5/2013 9:35:33','A'),
(5,'7/5/2013 9:35:53','B'),
(5,'7/5/2013 9:36:06','D'),
(6,'7/5/2013 9:39:39','A'),
(7,'7/5/2013 9:40:28','B'),
(8,'8/5/2013 9:35:02','A'),
(7,'8/5/2013 9:35:08','A'),
(8,'8/5/2013 9:35:29','B'),
(6,'8/5/2013 9:36:39','B');
--select * from #t order by ID, DateTime;
with cte as (
select *, cast([DateTime] as date) the_date, row_number() over (partition by ID order by DateTime) row_num
from #t
)
select c1.the_date, count(1)
from cte c1
join cte c2
on c2.ID = c1.ID
and c2.row_num = c1.row_num + 1
where datediff(S,c1.DateTime, c2.DateTime) < 30
group by c1.the_date
order by c1.the_date;
Try this:
select CONVERT(VARCHAR(10), a.DateTime, 103) [Date], count(a.ID) Count from Table a
inner join Table b on a.ID = b.ID
where DATEDIFF(second,a.DateTime,b.DateTime) between 1 and 29 and a.ID = b.ID
and Cast(a.DateTime as Date) = Cast(b.DateTime as date)
group by CONVERT(VARCHAR(10), a.DateTime, 103)

Get all parent rows that do not have a row for current date in child table?

SELECT
[dbo].[Mission].[MissionId]
FROM
[dbo].[Mission]
LEFT OUTER JOIN
[dbo].[Report] ON [dbo].[Mission].[MissionId] = [dbo].[Report].[MissionId]
WHERE
[dbo].[Report].ReportDate IS NULL
ORDER BY
[dbo].[Mission].[MissionId]
How can I change the above query such that it gives me all MissionId's from table [dbo].[Mission] that do not have a row in table [dbo].[Report] where [dbo].[Report].ReportDate is today?
MissionId is the primary key in table Mission and a foreign key in table Report. So I want to get all missions that do not have a row in table Report for the current date.
I've introduced some aliases to make the query easier to read, and added the needed condition. I've also changed the WHERE clause, not sure if that's required:
SELECT m.[MissionId]
FROM [dbo].[Mission] m LEFT OUTER JOIN [dbo].[Report] r
ON m.[MissionId] = r.[MissionId]
AND r.ReportDate = DATEADD(day,DATEDIFF(day,0,GETDATE()),0)
WHERE r.MissionId IS NULL
ORDER BY m.[MissionId]
This assumes that ReportDate contains dates with the time portions set to midnight. If that's not so, then a slightly more complex query is required:
SELECT m.[MissionId]
FROM [dbo].[Mission] m
WHERE NOT EXISTS(select * from dbo.Report r
where r.MissionID = m.MissionID and
r.ReportDate >= DATEADD(day,DATEDIFF(day,0,GETDATE()),0) and
r.ReportDate < DATEADD(day,DATEDIFF(day,0,GETDATE()),1)
)
ORDER BY m.[MissionId]
GETDATE() returns the current date and time. I'm using a couple of tricks with DATEADD and DATEDIFF to take that value and turn it into the current date at midnight, and (in the second query) tomorrow's date at midnight.
Second query as a fully runnable query:
declare #mission table (MissionID int not null);
insert into #mission (MissionID) select 1 union all select 2;
declare #report table (MissionID int not null,ReportDate datetime not null);
insert into #report (MissionID,ReportDate)
select 2,GETDATE() union all select 1,DATEADD(day,-1,GETDATE());
SELECT m.[MissionId]
FROM #mission m
WHERE NOT EXISTS(select * from #report r
where r.MissionID = m.MissionID and
r.ReportDate >= DATEADD(day,DATEDIFF(day,0,GETDATE()),0) and
r.ReportDate < DATEADD(day,DATEDIFF(day,0,GETDATE()),1)
)
ORDER BY m.[MissionId]
Result:
MissionId
-----------
1
select
m.MissionId
from Mission m
left join Report r
on m.MissionId = r.MissionId
and day(r.ReportDate) = day(getdate())
and month(r.ReportDate) = month(getdate())
and year(r.ReportDate) = year(getdate())
WHERE r.ReportDate is null
ORDER BY m.MissionId

Resources