Avoid WHILE loop and CURSOR for better performance? - sql-server

I'm wondering if someone can help simplify this procedure - and improve performance...!?
We have data on grants. 'Donors' give funds to 'Recipients' and we want to show the top 15 recipients for each donor over 3 periods: CurrentYear-20, CurrentYear-10 and CurrentYear. We publish an annual report and show percentage shares of World and GeoZone totals for each donor.
I have "inherited" this code which was written by one of my predecessors. Until we switched to using a view, execution time was around 15-30 mins. Currently, this runs in just under FOUR hours (scheduled as a Server Agent job)! Management are not happy. For various reasons, the view must continue to be used and currently has just under 900,000 rows with data from the 1950s onwards. We current run this report for 30 (large) donors and more are added each year.
To help improve performance, I have thought about using a CTE or/using SUM() OVER(Partition BY...) or combination of these, but I'm not sure how to go about it.
Could someone point me in the right direction?
Here is the process:
create a table (variable) to hold the top 15 recipients for the current donor
create a table (variable) to hold the list of donors
populate the donor table with the donors in the order they appear in the report
loop thru the donor table and for each donor:
put the donor ID for this donor into a temp table
loop 3 times (for CurrentYear-20, CurrentYear-10, CurrentYear)
calculate the share totals for each of 18 regions/zones
print the values for each section in the report
get the next donor ID
As you may see from the above, the calculations are run 54 times (18x3) for each donor!
Here is the code (simplified):
-- #LatestYear is passed as a parameter, hardcoded here for simplicity
DECLARE #LatestYear SMALLINT ,
#CurrentYear SMALLINT ,
#DonorID SMALLINT ,
#totalWorld NUMERIC(10, 2) ,
#LoopCounter TINYINT ,
#DonorName VARCHAR(100)
SELECT #latestyear = 2012
-- create a table to hold list of top 15 recipients for each donor and their 'share' of ODA.
DECLARE #Top15 TABLE
(
Country VARCHAR(100) ,
Percentage REAL
)
-- create a table to hold list of donors, ordered as they need to appear in the report.
DECLARE #PageOrder TABLE
(
DonorID SMALLINT ,
DonorName VARCHAR(100) ,
SortOrder SMALLINT IDENTITY(1, 1)
)
-- create a table to store the "focus" donor.
DECLARE #CurrentDonor TABLE ( DonorID SMALLINT )
INSERT INTO #PageOrder
SELECT DonorID ,
DonorName
FROM dbo.LookupDonor
ORDER BY DonorName;
-- cursor to loop through the donors in SortOrder
DECLARE DonorCursor CURSOR
FOR
SELECT DonorID ,
DonorName
FROM #PageOrder
ORDER BY DonorName;
OPEN DonorCursor
FETCH NEXT FROM DonorCursor INTO #DonorID, #DonorName
WHILE ##fetch_status = 0
BEGIN
INSERT INTO pubOutput
( XMLText )
SELECT #DonorName;
-- Populate the DonorID table
INSERT INTO #CurrentDonor
VALUES ( #DonorID )
/* The following loop is invoked 3 times. The first time through, the year will be 20 years before the latest year,
the second time through, 10 years before. The last time through the year will be the latest year.
*/
SET #LoopCounter = 1
WHILE #LoopCounter <= 3
BEGIN
SELECT #CurrentYear = CASE #LoopCounter
WHEN 1 THEN #LatestYear - 20
WHEN 2 THEN #LatestYear - 10
ELSE #LatestYear
END
-- calculate the world total for the current years (year,year-1) for all recipients
SELECT #totalWorld = SUM(Amount)
FROM dbo.vData2 d
INNER JOIN ( SELECT RecipientID
FROM dbo.RecipientGroup
WHERE GroupID = 160
) c ON d.RecipientID = c.RecipientID
INNER JOIN #CurrentDonor z ON d.DonorID = z.DonorID
WHERE d.year IN ( #CurrentYear - 1, #CurrentYear )
-- calculate the GeoZones total for the current years (year,year-1)
SELECT #totalGeoZones = SUM(Amount)
FROM dbo.vDac2a d
INNER JOIN ( SELECT RecipientID
FROM dbo.GeoZones
WHERE GeoZoneID = 100
) x ON d.RecipientID = x.RecipientID
INNER JOIN #CurrentDonor z ON d.DonorCode = z.DonorCode
WHERE d.year IN ( #CurrentYear - 1, #CurrentYear )
-- Find the top 15 recipients for the current donor
INSERT INTO #Top15
SELECT TOP 15
r.RecipientName ,
( ISNULL(SUM(Amount), 0) / #totalWorld ) * 100
FROM dbo.vData2 d
INNER JOIN dbo.LookupRecipient r ON r.RecipientID = d.RecipientID
INNER JOIN #CurrentDonor z ON d.DonorID = z.DonorID
WHERE d.year IN ( #CurrentYear - 1, #CurrentYear )
GROUP BY r.RecipientName
ORDER BY 2 DESC
-- Print the top 15 recipients and total
INSERT INTO pubOutput
(
XMLText
)
SELECT country + #Separator + CAST(percentage AS VARCHAR)
FROM #Top15
ORDER BY percentage DESC
INSERT INTO pubOutput
(
XMLText
)
SELECT #Heading1 + #Separator + CAST(SUM(Percentage) AS VARCHAR)
FROM #Top15
-- Breakdown by Regionas
-- Region1
IF #totalWorld IS NOT NULL
INSERT INTO pubOutput
(
XMLText
)
SELECT 'Region1' + #Separator
+ CAST(( ISNULL(SUM(Amount), 0) / #totalWorld ) * 100 AS VARCHAR)
FROM dbo.vData2 d
INNER JOIN ( SELECT RecipientID
FROM dbo.RecipientGroup
WHERE RegionID = 1
) c ON d.RecipientID = c.RecipientID
INNER JOIN #CurrentDonor z ON d.DonorID = z.DonorID
WHERE d.year IN ( #CurrentYear - 1, #CurrentYear )
ELSE -- force output of sub-total heading
INSERT INTO pubOutput
(
XMLText
)
SELECT #Heading2 + #Separator + '--'
-- Region2-8
/* similar syntax as Region1 above, for all Regions 2-8 */
-- Total Regions
INSERT INTO pubOutput
(
XMLText
)
SELECT #Heading2 + #Separator + CAST(#totalWorld AS VARCHAR)
-- Breakdown by GeoZones 1-7
-- GeoZone1
INSERT INTO pubOutput
(
XMLText
)
SELECT 'GeoZone1' + #Separator
+ CAST(( ISNULL(SUM(Amount), 0) / #totalGeoZones ) * 100 AS VARCHAR)
FROM dbo.vDac2a d
INNER JOIN ( SELECT RecipientID
FROM dbo.GeoZones
WHERE GeoZoneID = 1
) m ON d.RecipientID = m.RecipientID
INNER JOIN #CurrentDonor z ON d.DonorCode = z.DonorCode
WHERE d.year IN ( #CurrentYear - 1, #CurrentYear )
-- GeoZones2-8
/* similar syntax as GeoZone1 above for GeoZones 2-7 */
-- Total GeoZones - currently hard-coded as 100, due to minor rounding errors
INSERT INTO pubOutput
(
XMLText
)
SELECT #Heading3 + #Separator + '100'
SET #LoopCounter = #LoopCounter + 1
END -- year loop
-- Get the next donor from the cursor
FETCH NEXT FROM DonorCursor
INTO #DonorID, #DonorName
END
-- donorcursor
-- Cleanup
CLOSE DonorCursor
DEALLOCATE DonorCursor
Many thanks in advance for any help you may be able to provide.

Avoiding cursor is must. You can use 'while' instead of cursor. However considering the complexity of query, keep cursor at this moment.
To improve performance in other way, check the number of records for below queries:
SELECT RecipientCode FROM dbo.RecipientGroup WHERE GroupID=160
SELECT RecipientCode FROM dbo.GeoZones WHERE GeoZoneID=100
SELECT RecipientID FROM dbo.RecipientGroup WHERE RegionID=1
I suggest create 3 temp tables for above query "outside" of cursor and use them inside of cursor.
Hope this helps!

Related

Single query alternative to Dynamic PIVOT

This has been a bit of a brain-teaser for me for a couple of days and I can't seem to solve it.
Basically I have a Project, Resource and Allocation table where I store on a daily basis the Project-Resource allocation. You can use the below queries to build the table structure:
CREATE TABLE Projects ([ProjectID] INT IDENTITY PRIMARY KEY, [Name] VARCHAR(100));
CREATE TABLE Resources ([ResourceID] INT IDENTITY PRIMARY KEY,[Name] VARCHAR(100));
CREATE TABLE Allocation (
[Resource] INT FOREIGN KEY REFERENCES Resources (ResourceID),
[Project] INT FOREIGN KEY REFERENCES Projects (ProjectID),
[Date] DATE);
Also, you can use the next queries to generate dummy data:
DECLARE #seed INT = 65;
;WITH proj_cte
AS (
SELECT #seed [seed]
UNION ALL
SELECT [seed] + 1
FROM proj_cte
WHERE [seed] < 90
)
INSERT INTO Projects (name)
SELECT 'Project ' + CHAR([seed])
FROM proj_cte;
;WITH res_cte
AS (
SELECT #seed [seed]
UNION ALL
SELECT [seed] + 1
FROM res_cte
WHERE [seed] < 90
)
INSERT INTO Resources (name)
SELECT 'Resource ' + CHAR([seed])
FROM res_cte;
And to populate the Allocation table with random dummy data:
CREATE UNIQUE NONCLUSTERED INDEX ncu_resource_project_date ON Allocation (Resource, Project, DATE);
INSERT INTO Allocation (Resource, Project, DATE)
SELECT DISTINCT ResourceId, ProjectID, AllocationDate
FROM (
SELECT ProjectID, abs(checksum(newid())) % 26 [ResourceId], cast(getdate() + (abs(checksum(newid())) % 26) AS DATE) [AllocationDate], abs(checksum(newid())) % 26 [filter]
FROM Projects
) f
WHERE f.ResourceId > 0
AND f.ProjectID > 0;
GO
INSERT INTO Allocation (Resource, Project, DATE)
SELECT DISTINCT ResourceId, ProjectID, AllocationDate
FROM (
SELECT ProjectID, abs(checksum(newid())) % 26 [ResourceId], cast(getdate() + (abs(checksum(newid())) % 26) AS DATE) [AllocationDate], abs(checksum(newid())) % 26 [filter]
FROM Projects
) f
INNER JOIN Allocation u
ON u.Resource = f.ResourceId
AND u.Project = f.ProjectID
AND f.AllocationDate <> u.[Date]
AND f.ResourceId > 0
AND f.ProjectID > 0
WHERE NOT EXISTS (
SELECT 1
FROM Allocation uin
WHERE uin.resource = u.resource
AND uin.project = uin.project
AND uin.DATE = f.AllocationDate;
) GO 250
Now, the goal of all this is to write a single query (that includes CTE's) that can be parameterized, and that returns a result similar to a cross-section of the Allocation table, based on 2 dates, a #StartDate and #EndDate (which are the parameters).
The goal output should look something like this:
I've tried to extend a CTE which generates the date range between #StartDate and #EndDate and almost got to a working solution with the below query, but it has a few hurdles:
I still have to manually enter the date ranges in the PIVOT
I couldn't find an aggregate function for PIVOT that fits my logic (currently I have a "bug" because I use MIN(Project) which returns the MIN(Project) for that Resource for the entire date range).
DECLARE #srcdt DATE = '20180101', #enddt DATE = '20180116';
;WITH dates
AS (
SELECT #srcdt srcdt
UNION ALL
SELECT dateadd(day, 1, srcdt)
FROM dates
WHERE srcdt < #enddt
)
SELECT pvt.*
FROM (
SELECT r.Name AS Resource, p.Name AS Project, DATE
FROM Allocation u
INNER JOIN Projects p
ON u.Project = p.ProjectID
RIGHT JOIN Resources r
ON u.resource = r.ResourceID
) pvt_src
PIVOT (min(Project) FOR [Date] IN ([2018-05-01], [2018-05-02], [2018-05-03], [2018-05-04], [2018-05-05], [2018-05-06], [2018-05-07], [2018-05-08], [2018-05-09], [2018-05-10], [2018-05-11], [2018-05-12], [2018-05-13], [2018-05-14], [2018-05-15], [2018-05-16], [2018-05-17], [2018-05-18], [2018-05-19], [2018-05-20], [2018-05-21], [2018-05-22], [2018-05-23], [2018-05-24], [2018-05-25], [2018-05-26], [2018-05-27], [2018-05-28], [2018-05-29], [2018-05-30])) pvt
I can get this working with a dynamic PIVOT and with Dynamic SQL to generate the date range columns that I PIVOT to, but that's already more than 1 query.
It's just not possible to produce a result set of an unknown (or data-driven) number of columns without using dynamic sql. This isn't a brain-teaser, it's a brick wall.

Comma separated string total count by variable by row

This is the schema:
User_ID Page_ID Timestamp
1 48,51,94 7/26/2017 8:30
2 42,11,84 7/26/2017 9:40
3 4,16,24 7/26/2017 16:20
4 7,2,94 7/27/2017 8:00
1 48,22,94 7/27/2017 13:50
2 42,11 7/27/2017 14:00
3 4,24 7/27/2017 18:15
The code below gives aggregate count of page ids ran per user (non-unique on purpose):
SELECT User_ID, sum(len(Page_ID) - len(replace(Page_ID, ',', '')) +1) as TotalPageCount
FROM DBTABLE
group by User_ID
Output:
User_ID TotalPageCount
1 6
2 5
3 5
4 3
However, I am looking to add a (comma separated) column with page count per page id per user id. ie. a column as newsletter id 1: count, newsletter id 2: count, etc. (essentially a dictionary). Can be a different format, but needs to be descriptive at the page id level, with its respective count.
Something like this:
User_ID PageIDCount TotalPageCount
1 48:2, 51:1, 94:2, 22:1, 6
2 42:2, 11:2, 84:1, 5
3 4:2, 16:1, 24:2, 5
4 7:1, 2:1, 94:1, 3
Your help is greatly appreciated!
Edit:
As per SeanLange's amazing solution, you can change the definition to MyCTE to the below, in order to avoid using any functions:
select user_id, page_id, page_count = count(*)
FROM (
SELECT user_id, Split.a.value('.', 'NVARCHAR(max)') AS page_id FROM
( SELECT user_id, CAST ('<M>' + REPLACE(page_id, ',', '</M><M>') + '</M>' AS XML) page_id
FROM #temp
) AS A
CROSS APPLY page_id.nodes ('/M') AS Split(a)
) x
group by user_id, page_id
Wow this is a nightmare. You are going to need a string splitter to start with. My personal favorite is this one. http://www.sqlservercentral.com/articles/Tally+Table/72993/ There are a number of other excellent choices here. https://sqlperformance.com/2012/07/t-sql-queries/split-strings
Starting with your data you will need to do something like this.
declare #Something table
(
User_ID int
, Page_ID varchar(100)
, MyDate datetime
)
insert #Something
select 1, '48,51,94', '7/26/2017 8:30' union all
select 2, '42,11,84', '7/26/2017 9:40' union all
select 3, '4,16,24', '7/26/2017 16:20' union all
select 4, '7,2,94', '7/27/2017 8:00' union all
select 1, '48,22,94', '7/27/2017 13:50' union all
select 2, '42,11', '7/27/2017 14:00' union all
select 3, '4,24', '7/27/2017 18:15'
select User_ID
, Page_ID = x.Item
, count(*)
from #Something s
cross apply dbo.DelimitedSplit8K(s.Page_ID, ',') x
group by User_ID
, x.Item
order by User_ID
, x.Item
This gets the data with the counts you want. From there you are going to have to shove this back into the denormalized structure that you want. You can do this with FOR XML. Here is an article that explains how to do that part of this. Simulating group_concat MySQL function in Microsoft SQL Server 2005?
-----EDIT-----
OK here is the complete working solution. You have obviously been working hard at trying to get this sorted out. I am using the DelimitedSplit8K function here so I didn't have to inline XML like your solution was doing.
with MyCTE as
(
select User_ID
, Page_ID = x.Item
, PageCount = count(*)
from #Something s
cross apply dbo.DelimitedSplit8K(s.Page_ID, ',') x
group by User_ID
, x.Item
)
, GroupedPageViews as
(
select c.User_ID
, sum(c.PageCount) as TotalPageCount
, PageViews = STUFF((select ', ' + convert(varchar(4), c2.Page_ID) + ':' + convert(varchar(4), c2.PageCount)
from MyCTE c2
where c.User_ID = c2.User_ID
order by c2.Page_ID
for xml path('')), 1, 1, '')
from MyCTE c
group by c.User_ID
)
select gpv.User_ID
, gpv.PageViews
, gpv.TotalPageCount
from GroupedPageViews gpv
join MyCTE c on c.User_ID = gpv.User_ID
group by gpv.PageViews
, gpv.User_ID
, gpv.TotalPageCount
order by gpv.User_ID
This will return your data like this.
User_ID PageViews TotalPageCount
1 22:1, 48:2, 51:1, 94:2 6
2 11:2, 42:2, 84:1 5
3 16:1, 24:2, 4:2 5
4 2:1, 7:1, 94:1 3
Here you go
SELECT DISTINCT User_Id
, (
SELECT CAST(t.Value AS VARCHAR) + ':' + CAST(COUNT(t.value) AS VARCHAR) + ', '
FROM TBL_46160346_DBTABLE ii
CROSS APPLY (
SELECT *
FROM fn_ParseText2Table(Page_ID, ',')
) t
WHERE pp.User_Id = ii.User_Id
GROUP BY User_Id
, VALUE
ORDER BY User_Id
FOR XML PATH('')
) PageIDCount
, (
SELECT COUNT(*)
FROM TBL_46160346_DBTABLE ii
CROSS APPLY (
SELECT *
FROM fn_ParseText2Table(Page_ID, ',')
) t
WHERE pp.User_Id = ii.User_Id
GROUP BY User_Id
) TotalPageCount
FROM TBL_46160346_DBTABLE pp
fn_ParseText2Table function
ALTER FUNCTION [dbo].[fn_ParseText2Table] (
#p_SourceText VARCHAR(8000), #p_Delimeter VARCHAR(10) = ',' --default comma
)
RETURNS #retTable TABLE (Value BIGINT)
AS
BEGIN
DECLARE #w_Continue INT, #w_StartPos INT, #w_Length INT, #w_Delimeter_pos INT, #w_tmp_txt VARCHAR(48), #w_Delimeter_Len TINYINT
IF LEN(#p_SourceText) = 0
BEGIN
SET #w_Continue = 0 -- force early exit
END
ELSE
BEGIN
-- parse the original #p_SourceText array into a temp table
SET #w_Continue = 1
SET #w_StartPos = 1
SET #p_SourceText = RTRIM(LTRIM(#p_SourceText))
SET #w_Length = DATALENGTH(RTRIM(LTRIM(#p_SourceText)))
SET #w_Delimeter_Len = LEN(#p_Delimeter)
END
WHILE #w_Continue = 1
BEGIN
SET #w_Delimeter_pos = CHARINDEX(#p_Delimeter, SUBSTRING(#p_SourceText, #w_StartPos, #w_Length - #w_StartPos + #w_Delimeter_Len))
IF #w_Delimeter_pos > 0 -- delimeter(s) found, get the value
BEGIN
SET #w_tmp_txt = LTRIM(RTRIM(SUBSTRING(#p_SourceText, #w_StartPos, #w_Delimeter_pos - 1)))
SET #w_StartPos = #w_Delimeter_pos + #w_StartPos + #w_Delimeter_Len - 1
END
ELSE -- No more delimeters, get last value
BEGIN
SET #w_tmp_txt = LTRIM(RTRIM(SUBSTRING(#p_SourceText, #w_StartPos, #w_Length - #w_StartPos + #w_Delimeter_Len)))
SELECT #w_Continue = 0
END
INSERT INTO #retTable
VALUES (#w_tmp_txt)
END
RETURN
END

How can I pivot SUM(Premium) by each quarter between two dates?

As you can see on a picture below (Excel), I have two dates: TransEffDate and TransExpDate. How can I break the premium of $490 and put it in a quarter buckets?
How can I achieve the same in SQL?
I have this:
SELECT PolicyNumber,
TransactionEffectiveDate,
TransactionExpirationDate,
Coverage,
WrittenPremium,
CAST(YEAR(TransactionEffectiveDate) as varchar(5))+'.'+ CAST(DATEPART(QUARTER,TransactionEffectiveDate) as varchar(1)) as YearQuarter
FROM PlazaInsuranceWPDataSet
WHERE PolicyNumber ='PACA1000101-00'
ORDER BY PolicyNumber
For 1st quarter will be 0, because TransEffDate starts in a second quarter.
For 2nd quarter we need to find the number of days between TransEffDaya and TransExpDate which is 365 days , then divide Premium($490) by 365 days which is $1.34 per day. Then 1.34 multiply by number of days between TransEffDate and end of second quarter (which is 65 days).
so something like that:
WrittenPremium/DATEDIFF(DAY,TransactionEffectiveDate,TransactionExpirationDate) * DATEDIFF(DAY,TransactionEffectiveDate, EndOfQuarter) END AS Year_Quarter_1
But how can I get EndOfQuarter dynamically for each PolicyNumber
There are should be some formulas for this purpose.
Thanks
Consider the following dynamic pivot.
Now, I cheated a bit by dropping the intermediate results in a temp table, but this can be changed if necessary...
By using an ad-hoc tally table in CROSS APPLY the dates and values are allocated correctly via a day-weighted methodology. In other words, the math works.
--Drop Table #TempData
Select A.[PolicyNumber]
,A.[Coverage]
,A.[Premium]
,A.[TransEff]
,A.[TransExp]
,B.*
Into #TempData
From YourTable A
Cross Apply (
Select Qtr = Format(max(DatePart(YY,D)+DatePart(QQ,D)/10.0),'0000.0')
,Value = (A.Premium/(DateDiff(DD,A.TransEff,A.TransExp)+1.0))*count(*)
From (Select Top (DateDiff(DD,A.TransEff,A.TransExp)+1) D=DateAdd(DD,Row_Number() Over (Order By (Select null))-1,A.TransEff) From master..spt_values ) D
Group By DatePart(YY,D),DatePart(QQ,D)
) B
Where PolicyNumber ='PACA1000101-00'
Declare #SQL varchar(max) = Stuff((Select Distinct ',' + QuoteName(Qtr) From #TempData Order by 1 For XML Path('') ),1,1,'')
Select #SQL = '
Select [PolicyNumber],[Coverage],[Premium],[TransEff],[TransExp],' + #SQL + '
From #TempData
Pivot (Sum([Value]) For [Qtr] in (' + #SQL + ') ) p
Order By 1,3'
Exec(#SQL);
Returns
If it helps witht he visualization, the temp table looks like the image below. Then it be comes a simple PIVOT
EDIT - To Fix the Order By QTR - Notice the Order By 1
Declare #SQL varchar(max) = Stuff((Select Distinct ',' + QuoteName(Qtr) From #TempData Order by 1 For XML Path('') ),1,1,'')
Boy that's tough. Here's one way, you create a table with the quarter boundaries in it. You can add dates way into the future.
CREATE TABLE quarters(
lo DATETIME NOT NULL PRIMARY KEY,
hi DATETIME NOT NULL
);
INSERT INTO quarters VALUES ('2012-01-01','2012-04-01');
INSERT INTO quarters VALUES ('2012-04-01','2012-07-01');
INSERT INTO quarters VALUES ('2012-07-01','2012-10-01');
INSERT INTO quarters VALUES ('2012-10-01','2013-01-01');
INSERT INTO quarters VALUES ('2013-01-01','2013-04-01');
INSERT INTO quarters VALUES ('2013-04-01','2013-07-01');
INSERT INTO quarters VALUES ('2013-07-01','2013-10-01');
INSERT INTO quarters VALUES ('2013-10-01','2014-01-01');
Here's one line of policy data
CREATE TABLE Insurance (
policynumber VARCHAR(10) NOT NULL PRIMARY KEY,
premium INT,
TransEff datetime,
TransExp datetime
);
INSERT INTO Insurance VALUES ('PACA1',490,'2012-04-27','2013-04-27');
You can join this with your data table - the join condition is that the periods overlap:
SELECT datepart(YEAR,l1) y,datepart(quarter,l1) q,l1,h1,
CASE WHEN l1>l2 THEN l1 ELSE l2 END AS maxst,
CASE WHEN h1>h2 THEN h2 ELSE h1 END AS minend
FROM
(SELECT policynumber,TransEff,
CAST(lo AS INT) l1,CAST(transeff AS INT) l2,
CAST(hi AS INT) h1,CAST(transexp AS INT) h2
FROM Insurance JOIN quarters ON(hi>transeff AND lo<transexp)
) AS i;
That gives the overlapping dates:
y q l1 h1 maxst minend
2012 2 40998 41088 41024 41088
2012 3 41089 41180 41089 41180
2012 4 41181 41272 41181 41272
2013 1 41273 41362 41273 41362
2013 2 41363 41453 41363 41389
You can now do the subtraction to find how many days apply to each quarter.
SELECT policynumber pn, y, q, minend-maxstart v
FROM(
SELECT policynumber, datepart(YEAR,l1) y,datepart(quarter,l1) q,
CASE WHEN l1>l2 THEN l1 ELSE l2 END AS maxstart,
CASE WHEN h1>h2 THEN h2 ELSE h1 END AS minend
FROM
(SELECT policynumber,TransEff,
CAST(lo AS INT) l1,CAST(transeff AS INT)l2,
CAST(hi AS INT) h1,CAST(transexp AS INT)h2
FROM Insurance JOIN quarters ON(hi>transeff AND lo<transexp)
) AS i
) as x
Which gives...
pn y q v
PACA1 2012 2 65
PACA1 2012 3 92
PACA1 2012 4 92
PACA1 2013 1 90
PACA1 2013 2 26

TSQL While Loop over months in year

I have an output that I need to achieve and I am not too certain how to go about it.
I first need to start by looping over each month in the year and using that month in a select statement to check for data.
For example:
Select * from table where MONTH(A.[submissionDate]) = 1
Select * from table where MONTH(A.[submissionDate]) = 2
Select * from table where MONTH(A.[submissionDate]) = 3
My end result is to create this XML output to use with a chart plugin. It needs to include the months even if there is no data which is why I wanted to loop through each month to check for it.
<root>
<dataSet>
<areaDesc>Area 1</areaDesc>
<data>
<month>January</month>
<monthValue>1</monthValue>
<submissions>0</submissions>
</data>
<data>
<month>February</month>
<monthValue>2</monthValue>
<submissions>7</submissions>
</data>
<data>
<month>March</month>
<monthValue>3</monthValue>
<submissions>5</submissions>
</data>
</dataSet>
<dataSet>
<areaDesc>Area 2</areaDesc>
<data>
<month>January</month>
<monthValue>1</monthValue>
<submissions>0</submissions>
</data>
<data>
<month>February</month>
<monthValue>2</monthValue>
<submissions>7</submissions>
</data>
<data>
<month>March</month>
<monthValue>3</monthValue>
<submissions>5</submissions>
</data>
</dataSet>
</root>
I may be way over thinking this but I'm hoping I talking it through may help me out a little.
Here is my current set up of how I get some other stats:
--Temp table
DECLARE #areas TABLE (
area VARCHAR (100));
IF #dept = 'global'
OR #dept = ''
BEGIN
INSERT INTO #areas (area)
SELECT DISTINCT(AreaDesc)
FROM dbo.EmpTable;
END
ELSE
BEGIN
INSERT INTO #areas
SELECT #dept;
END
IF (#action = 'compare')
BEGIN
SELECT DATENAME(month, A.[submissionDate]) AS [month],
MONTH(A.[submissionDate]) AS [monthValue],
count(A.[submissionID]) AS submissions,
B.[AreaDesc]
FROM empowermentSubmissions AS A
INNER JOIN empTable AS B
ON A.[nomineeQID] = B.[QID]
WHERE YEAR(A.[submissionDate]) = #year
AND A.[statusID] = 3
AND A.[locationID] IN (SELECT location
FROM #table)
GROUP BY DATENAME(month, A.[submissionDate]), MONTH(A.[submissionDate]), B.[AreaDesc]
ORDER BY [monthValue] ASC
FOR XML PATH ('dataSet'), TYPE, ELEMENTS, ROOT ('root');
END
ELSE
This is a great application for a "Dates" table or view. Create a new table in your database with schema like:
CREATE TABLE dbo.Dates (
Month INT,
MonthName VARCHAR(20)
)
Populate this table with the years and months you may want to aggregate over. Then, you can make your query like:
SELECT
Area
Dates.MonthName,
COUNT(*) AS Count
FROM
dbo.Dates
LEFT OUTER JOIN
dbo.Submissions
AND Dates.Month = MONTH(Submissions.SubmissionDate)
GROUP BY
Dates.MonthName,
Area
The LEFT OUTER JOIN will give you one row for every Year and Month in the dates table, and a count of any submissions on that month. You end up with output like:
Area | MonthName | Count
Area 1 | Jan | 0
Area 2 | Feb | 2
&c.
You'll want to do a FOR XML structure to get the exact result set you're looking for in one go, I think. I put this together with what I could glean about your XML. Just change the name of the table variable here to your real table name and this should work.
EDIT: changed up the query to match the definition from the posted query. Updated the data element where clause to maintain month instantiation when zero counts were found in a month.
EDIT: Added Status requirement.
EDIT: Moved areaDesc criteria for constant month output.
declare #empowermentSubmissions table (submissionID int primary key identity(1,1), submissionDate datetime, nomineeQID INT, statusID INT)
declare #empTable table (QID int primary key identity(1,1), AreaDesc varchar(10))
declare #n int = 1
while #n < 50
begin
insert into #empTable (AreaDesc) values ('Area ' + cast((#n % 2)+1 as varchar(1)))
set #n = #n + 1
end
set #n = 1
while #n < 500
begin
insert into #empowermentSubmissions (submissionDate, nomineeQID, StatusID) values (dateadd(dd,-(cast(rand()*600 as int)),getdate()), (select top 1 QID from #empTable order by newid()), 3 + (#n % 2) - (#n % 3) )
set #n = #n + 1
end
declare #year int = 2014
select (
select (
select (
select e1.areaDesc
from #empTable e1
where e1.areaDesc = e2.areaDesc
group by e1.areaDesc
for xml path(''),type
)
, (
select [month], [monthValue], count(s1.submissionID) as submissions
from (
select #year [Year]
, datename(month,dateadd(mm,RowID-1,#year-1900)) [Month]
, month(dateadd(mm,RowID-1,#year-1900)) [MonthValue]
from (
select *, row_number()over(order by name) as RowID
from master..spt_values
) d
where d.RowID <= 12
) t
left join (
select s3.submissionID, s3.submissionDate, e3.AreaDesc
from #empowermentSubmissions s3
inner join #empTable e3 on s3.nomineeQID = e3.QID
where s3.statusID = 3
and e3.areaDesc = e2.areaDesc
) s1 on year(s1.submissionDate) = t.[Year]
and month(s1.submissionDate) = t.[MonthValue]
group by [Month], [MonthValue]
order by [MonthValue]
for xml path('data'),type
)
for xml path(''),type
) dataset
from #empowermentSubmissions s2
inner join #empTable e2 on s2.nomineeQID = e2.QID
group by e2.areaDesc
for xml path(''), type
) root
for xml path (''), type
You should be able to use a tally table to get the months:
SELECT TOP 12 IDENTITY(INT,1,1) AS N
INTO #tally
FROM master.dbo.syscolumns sc1
SELECT DATENAME(MONTH,DATEADD(MONTH,t.N-1,'2014-01-01')) AS namemonth, t.N AS monthvalue, COUNT(tbl.submissionDate) AS submissions, tbl.Area
FROM #tally t
LEFT OUTER JOIN tbl ON MONTH(tbl.submissionDate) = t.N
GROUP BY t.n, tbl.Area
DROP TABLE #tally

How can I maintain a running total in a SQL Server database using VB.NET?

I am using Visaul Studio 2010 to build a Windows Forms application to maintain a table in an SQL Server 2008 database. The table is named CASHBOOK and here are the further details:
DATE | DESCRIPTION | DEBIT | CREDIT | BALANCE
--------|----------------|---------|-----------|---------
1/1/2011| CASH BALANCE | | | 5000
1/1/2011| SALES | 2500 | | 7500
2/1/2011| PURCHASE | | 3000 | 4500
2/1/2011| RENT | | 4000 | 500
2/1/2011| SALES | 5000 | | 5500
I can use CASHBOOKTABLEADAPTER.INSERT(...) to insert appropriately, but my problem is how do I update the BALANCE column?
See this article by Alexander Kuznetsov
Denormalizing to enforce business rules: Running Totals
You can try an insert with a subquery, something like following:
INSERT INTO CASHBOOK ( DESCRIPTION, DEBIT, BALANCE )
'asdf', 2500, SELECT TOP(1) BALANCE FROM CASHBOOK + 2500
It's a bit heavy handed, but here's a way to update the full table with balance information.
update
a
set
a.Balance = (
select sum(isnull(x.debit, 0.0) - isnull(x.credit, 0.0))
from cashbook x
where x.Date < a.Date
or (x.Date = a.Date and x.ID <= a.ID)
) + (
select top 1 y.Balance
from cashbook y
where y.debit is null
and y.credit is null
order by y.ID
)
from
cashbook a
Now that's useful only if you HAVE to have the balance in the table. A more appropriate solution might be to create a UDF that encompasses this logic and call that to calculate the balance field for a specific row only when you need it. It really all depends on your usage.
create function dbo.GetBalance(#id int) returns decimal(12, 2) as
begin
declare #result decimal(12, 2) = 0.0
select
#result = (
select sum(isnull(x.debit, 0.0) - isnull(x.credit, 0.0))
from cashbook x
where x.Date < a.Date
or (x.Date = a.Date and x.ID <= a.ID)
) + (
select top 1 y.Balance
from cashbook y
where y.debit is null
and y.credit is null
order by y.ID
)
from
cashback a
where
a.ID = #id
return #result
end
Why do you need to? This is something that should be calculated as a reporting / viewing function. I would suggest either creating a view with a running total column (various ways to achieve this).
Alternatively if you're viewing this in VB.Net calculate it in your app.
I agree with Joel, you should be calculating this at runtime, not storing the running totals in the database. Here's an example of how to figure out the running totals using a recursive cte in sql server:
declare #values table (ID int identity(1,1), Value decimal(4,2))
declare #i int
insert into #values values (1.00)
insert into #values values (2.00)
insert into #values values (3.00)
insert into #values values (4.00)
insert into #values values (5.00)
insert into #values values (6.00)
select #i=min(ID) from #values
;with a as
(
select ID, Value, Value as RunningTotal
from #values
where ID=#i
union all
select b.ID, b.Value, cast(b.Value + a.RunningTotal as decimal(4,2)) as RunningTotal
from #values b
inner join a
on b.ID=a.ID+1
)
select * from a
here's a blog on recursive queries: Recursive CTEs
Also here's a lengthy discusson about running totals.
One potential problem with recursive CTEs is the maximum depth limit of 32767, which can be prohibitive in a production environment.
In this solution you add an id column that is ordinal to the transaction sequence and then update the balance column in place.
declare #t table(id int identity(1,1) not null
, [DATE] date not null
, [DESCRIPTION] varchar(80) null
, [DEBIT] money not null default(0)
, [CREDIT] money not null default(0)
, [BALANCE] money not null default(0)
);
declare #bal money=0;
insert into #t([DATE],[DESCRIPTION],[DEBIT],[CREDIT],[BALANCE])
select '1/1/2011','CASH BALANCE',0,0,5000 UNION ALL
select '1/1/2011','SALES',2500,0,0 UNION ALL
select '2/1/2011','PURCHASE',0,3000,0 UNION ALL
select '2/1/2011','RENT',0,4000,0 UNION ALL
select '2/1/2011','SALES',5000,0,0;
set #bal=(select top 1 [BALANCE] from #t order by id); /* opening balance is stored but not computed, so we simply look it up here. */
update t
set #bal=t.[BALANCE]=(t.[DEBIT]-t.[CREDIT])+#bal
output
inserted.*
from #t t
left join #t t0 on t0.id+1=t.id; /*should order by id by default, but to be safe we force the issue here. */

Resources