Recursive CTE trouble - sql-server

I'm having some trouble coming up with the CTE way of doing a task. I already have a loop method and it is fast enough, but I wanted to do it in a proper way to better learn and understand CTE usage.
SQL:
DECLARE #Income MONEY=125000.00,
#Active INT=0,
#Year CHAR(4)='2018'
DECLARE #T TABLE ([Year] CHAR(4),Active INT,UpperLimit MONEY,Factor DECIMAL(6,3))
INSERT INTO #T ([Year],Active,UpperLimit,Factor) SELECT '2018',0,5000.0,1.00;
INSERT INTO #T ([Year],Active,UpperLimit,Factor) SELECT '2018',0,100000.0,0.85;
INSERT INTO #T ([Year],Active,UpperLimit,Factor) SELECT '2018',0,500000.0,0.80;
INSERT INTO #T ([Year],Active,UpperLimit,Factor) SELECT '2018',0,999999999.0,0.75;
WITH GradientCTE ([Year], Active, UpperLimit, Factor,[Income],WeightedValue,[Row])
AS
(
SELECT [Year], Active, UpperLimit, Factor
,#Income AS [Income]
,CAST(0.0 AS DECIMAL(16,3))AS WeightedValue
,ROW_NUMBER() OVER(PARTITION BY [Year],Active ORDER BY UpperLimit ASC) AS [Row]
From #T
)
SELECT *
FROM GradientCTE
ORDER BY UpperLimit
TLDR version; current output:
Year Active UpperLimit Factor Income WeightedValue Row
2018 0 5000.000 1.000 125000.000 0.000 1
2018 0 100000.000 0.850 125000.000 0.000 2
2018 0 500000.000 0.800 125000.000 0.000 3
2018 0 999999999.000 0.750 125000.000 0.000 4
What I would like:
Year Active UpperLimit Factor Income WeightedValue Row
2018 0 5000.000 1.000 125000.000 5000.000 1
2018 0 100000.000 0.850 120000.000 85000.000 2
2018 0 500000.000 0.800 20000.000 16000.000 3
2018 0 999999999.000 0.750 0.000 0.000 4
Explained:
Currently, the looping logic goes over the set row by row and reduces #Income by the UpperLimit for each row until no money is left. It uses that to multiply that amount by the Factor to get a weighted amount. So, in the example provided, the starting income is 125,000.00. The first 5000 are at full weight (1.00), so we reduce the income by 5000 and move the the next row saving the summed weighted value. This is done until income is 0. So, 125,000 should come out to (5000 * 1.0) + (100000 * 0.85) + (20000 * 0.80) + (0.00 * 0.75) or 106,000 total if summed.

Thanks to Ross Bush's answer, it led me down the right track to solve the problem. From a maintenance standpoint, I think a looping pattern is easier to understand, so I likely won't implement the CTE version and performance isn't an issue as the data set is tiny.
DECLARE #Income DECIMAL(18,3)=125000.00,
#Active INT=0,
#Year CHAR(4)='2018'
DECLARE #T TABLE ([Year] CHAR(4),Active INT,UpperLimit DECIMAL(18,3),Factor DECIMAL(18,3))
INSERT INTO #T ([Year],Active,UpperLimit,Factor) SELECT '2018',0,5000.0,1.00;
INSERT INTO #T ([Year],Active,UpperLimit,Factor) SELECT '2018',0,100000.0,0.85;
INSERT INTO #T ([Year],Active,UpperLimit,Factor) SELECT '2018',0,500000.0,0.80;
INSERT INTO #T ([Year],Active,UpperLimit,Factor) SELECT '2018',0,999999999.0,0.75;
;WITH GradientCTE
AS
(
SELECT DISTINCT
[YEAR],Active,UpperLimit=0.00, Factor = 0.00, [Row] = 0
FROM #T
UNION ALL
SELECT [Year],Active,UpperLimit, Factor
,ROW_NUMBER() OVER(PARTITION BY [Year],Active ORDER BY UpperLimit ASC) AS [Row]
From #T
)
,Reduce AS (
SELECT
[YEAR],Active,CAST(#Income AS DECIMAL(18,3)) AS [RemainingIncome],
Row,
Factor
,UpperLimit
,CAST(0.00 AS DECIMAL(18,3)) AS WeightedValue
FROM GradientCTE
WHERE UpperLimit=0
UNION ALL
SELECT
g.[YEAR],g.Active,CASE WHEN CAST([RemainingIncome] - G.UpperLimit AS DECIMAL(18,3)) < 0 THEN 0 ELSE CAST([RemainingIncome] - G.UpperLimit AS DECIMAL(18,3)) END AS [RemainingIncome],
G.Row,
g.Factor
,g.UpperLimit
,CAST(CASE WHEN [RemainingIncome]>G.UpperLimit THEN G.UpperLimit * G.Factor ELSE R.[RemainingIncome] * G.Factor END AS DECIMAL(18,3)) AS WeightedValue
FROM GradientCTE G
INNER JOIN Reduce R ON R.Row = G.Row -1
AND g.Year=r.Year
AND g.Active=r.Active
)
SELECT
*
-- [Year],Active,SUM(WeightedValue)
FROM Reduce
WHERE [RemainingIncome] >= 0
--GROUP BY [Year],Active

You can reduce the results within another CTE, recursively. I added a UNION with 0 to in the first set to produce the first line showing the starting income.
DECLARE #Income DECIMAL(18,3)=125000.00,
#Active INT=0,
#Year CHAR(4)='2018'
DECLARE #T TABLE ([Year] CHAR(4),Active INT,UpperLimit DECIMAL(18,3),Factor DECIMAL(18,3))
INSERT INTO #T ([Year],Active,UpperLimit,Factor) SELECT '2018',0,5000.0,1.00;
INSERT INTO #T ([Year],Active,UpperLimit,Factor) SELECT '2018',0,100000.0,0.85;
INSERT INTO #T ([Year],Active,UpperLimit,Factor) SELECT '2018',0,500000.0,0.80;
INSERT INTO #T ([Year],Active,UpperLimit,Factor) SELECT '2018',0,999999999.0,0.75;
;WITH GradientCTE
AS
(
SELECT ReduceAmount = 0, UpperLimit=0.00, Factor = 0.00, Row = 0
UNION ALL
SELECT ReduceAmount = UpperLimit * Factor, UpperLimit, Factor
,ROW_NUMBER() OVER(PARTITION BY [Year],Active ORDER BY UpperLimit ASC) AS [Row]
From #T
)
,Reduce AS (
SELECT
Income = CAST(#Income AS DECIMAL(18,3)),
Row,
ReduceAmount
FROM GradientCTE
WHERE ReduceAmount=0
UNION ALL
SELECT
Income = CASE WHEN CAST(Income - G.ReduceAmount AS DECIMAL(18,3)) < 0 THEN 0 ELSE CAST(Income - G.ReduceAmount AS DECIMAL(18,3)) END,
G.Row,
G.ReduceAmount
FROM GradientCTE G
INNER JOIN Reduce R ON R.Row = G.Row -1
)
SELECT * FROM Reduce
WHERE
Income >= 0

DECLARE #Income MONEY=125000.00,
#Active INT=0,
#Year CHAR(4)='2018',
#vIncome Money = 0
DECLARE #T TABLE ([Year] CHAR(4),Active INT,UpperLimit MONEY,Factor
DECIMAL(6,3))
INSERT INTO #T ([Year],Active,UpperLimit,Factor) SELECT
'2018',0,5000.0,1.00;
INSERT INTO #T ([Year],Active,UpperLimit,Factor) SELECT
'2018',0,100000.0,0.85;
INSERT INTO #T ([Year],Active,UpperLimit,Factor) SELECT
'2018',0,500000.0,0.80;
INSERT INTO #T ([Year],Active,UpperLimit,Factor) SELECT
'2018',0,999999999.0,0.75;
Select * ,ROW_NUMBER() OVER(PARTITION BY [Year],Active ORDER BY
UpperLimit ASC) AS [Row],CAST(0.0 AS DECIMAL(16,3))AS Income,CAST(0.0 AS
DECIMAL(16,3))AS WeightedValue
into #tmp from #T t1
--Select (t2.UpperLimit * t2.Factor),t2.*,t1.Row as prev,t1.UpperLimit
--from #tmp t1
--inner join #tmp t2 on (t1.Row = t2.Row +1)
update t2
set
#vIncome = #Income,
#Income = case when (#Income > t2.UpperLimit) then
#Income - t2.UpperLimit
else
0
end,
t2.Income = #Income,
t2.WeightedValue = case when (#vIncome > t2.UpperLimit) then
(t2.UpperLimit * t2.Factor)
else
#vIncome *t2.Factor
end
from #tmp t1
inner join #tmp t2 on (t1.Row = t2.Row +1)
Select * from #tmp
drop table #tmp

Related

How to do this in SQL Server query instead of function?

I have a table that has a string in one of its columns.
My table look like this:
RowCnt
Lvl
TargetID
Codes
1000
1
0
1,1,0,1,0,1,...,1,0,0,0,0
1000
1
1
0,0,1,0,1,0,...,0,1,1,1,1
1000
1
2
1,0,0,0,1,1,...,0,0,0,0,0
1000
1
3
0,1,1,1,0,1,...,1,1,1,1,1
1000
1
4
1,1,0,0,1,0,...,0,0,1,0,0
1000
2
0
0,0,1,1,0,1,...,0,1,0,1,1
1000
2
1
0,1,0,1,1,1,...,1,1,1,1,0
1000
2
2
0,0,0,0,0,1,...,0,0,0,0,1
1500
1
0
1,1,1,1,1,0,...,1,1,1,1,0
1500
1
1
1,0,0,0,0,1,...,0,0,0,0,1
I have to compare each line with each line and see how many of digits differ in the Codes column.
So the first record 1,1,0,1,0,1,...,1,0,0,0,0 will be compared with the 2nd 0,0,1,0,1,0,...,0,1,1,1,1 and find that there are 14 out 328 digits different, then compare with the 3rd record 1,0,0,0,1,1,...,0,0,0,0,0 and find 29 / 328 till do all records
Then compare the 2nd record with the 3rd then the 4th and do on till do them all
My table has around 2000 records and I assume that would take around 4 million operations.
I have built a function to do the comparison
ALTER FUNCTION [dbo].[Sim]
(#x varchar(max),
#y varchar(max))
RETURNS decimal(18,10)
AS
BEGIN
DECLARE #Xt AS TABLE (id int identity, x int)
DECLARE #Yt AS TABLE (id int identity, y int)
DECLARE #Match int
DECLARE #All int
INSERT INTO #Xt (x)
SELECT value FROM STRING_SPLIT(#x, ',')
INSERT INTO #Yt (y)
SELECT value FROM STRING_SPLIT(#y, ',')
SELECT #Match = COUNT(*)
FROM #Xt xx
INNER JOIN #Yt yy ON xx.id = yy.id AND xx.x = yy.y
SELECT #All = COUNT(*)
FROM #Xt xx
INNER JOIN #Yt yy ON xx.id = yy.id
RETURN 1.0 * #Match / #All
END
and my query is like this
WITH Y AS (
select a.RowCnt, a.Lvl, a.TargetID a_TargetID, b.targetid b_TargetID, a.codes a_codes, b.codes b_codes, dbo.sim(a.codes, b.codes) sim
from TargetsComp A inner join TargetsComp B
on a.RowCnt = b.RowCnt
and a.TargetID < b.TargetID
)
insert into TargetFilled (RowCnt, Lvl, a_TargetID, b_TargetID, a_codes, b_codes, sim)
SELECT RowCnt, Lvl, a_TargetID, b_TargetID, a_codes, b_codes, sim FROM Y ORDER BY RowCnt,Lvl, sim desc
but my method fills C drive and does not finish!!
Any better way?
A faster version of your function would be an inline Table Valued Function.
CREATE OR ALTER FUNCTION dbo.Similar (#x varchar(max), #y varchar(max))
RETURNS TABLE AS RETURN
SELECT COUNT(CASE WHEN xJ.value <> yJ.value THEN 1 END) * 1.0 / COUNT(*) AS Pct
FROM (
SELECT *,
ROW_NUMBER() OVER (ORDER BY (SELECT 1)) rn
FROM STRING_SPLIT(#x, ',')
) xJ
JOIN (
SELECT *,
ROW_NUMBER() OVER (ORDER BY (SELECT 1)) rn
FROM STRING_SPLIT(#y, ',')
) yJ ON yJ.rn = xJ.rn;
However, STRING_SPLIT with a row-number is not guaranteed to always return results in the actual order of the string. It may do it once, it may do it a million times, but there is always a chance the compiler could rearrange things. So instead you could use OPENJSON
CREATE OR ALTER FUNCTION dbo.Similar (#x varchar(max), #y varchar(max))
RETURNS TABLE AS RETURN
SELECT COUNT(CASE WHEN xJ.value <> yJ.value THEN 1 END) * 1.0 / COUNT(*) AS Pct
FROM OPENJSON('[' + #x + ']') xJ
JOIN OPENJSON('[' + #y + ']') yJ ON yJ.[key] = xJ.[key];
You would use it like this
WITH Y AS (
select
a.RowCnt,
a.Lvl,
a.TargetID a_TargetID,
b.targetid b_TargetID,
a.codes a_codes,
b.codes b_codes,
sim.Pct sim
from TargetsComp A
inner join TargetsComp B
on a.RowCnt = b.RowCnt
and a.TargetID < b.TargetID
CROSS APPLY dbo.sim(a.codes, b.codes) sim
)
insert into TargetFilled
(RowCnt, Lvl, a_TargetID, b_TargetID, a_codes, b_codes, sim)
SELECT RowCnt, Lvl, a_TargetID, b_TargetID, a_codes, b_codes, sim
FROM Y;
-- you may want to add
-- WHERE sim.Pct < 100
I have removed the ORDER BY from the insert as I don't think it's necessary.
You should index your table as follows
CLUSTERED INDEX ON TargetsComp (RowCnt, TargetID)

In T-SQL, how can I create a new partition every time I hit a certain criteria, and keep that partition until the next time I see that criteria?

Here's the output I'm hoping to achieve:
I'd like to start with the most recent ReportMonth working backwards and start a new partition every time there's a MonthsBetween > 2. I'd like to avoid loops if possible.
EDIT: Here's the create statement for the table
CREATE TABLE #temp (
MonthsBetween int,
ReportMonth date)
INSERT INTO #temp VALUES(0, '2019-12-01')
INSERT INTO #temp VALUES(1, '2019-11-01')
INSERT INTO #temp VALUES(1, '2019-10-01')
INSERT INTO #temp VALUES(3, '2019-07-01')
INSERT INTO #temp VALUES(1, '2019-06-01')
INSERT INTO #temp VALUES(3, '2019-03-01')
You may try with the following approach using LAG() and windowed SUM():
Statement (for groups based on difference between each two values for MonthsBetween > 2):
SELECT
MonthsBetween,
ReportMonth,
SUM(GroupID) OVER (ORDER BY ReportMonth DESC) AS PartitionID
FROM (
SELECT
*,
CASE
WHEN LAG(MonthsBetween) OVER (ORDER BY ReportMonth DESC) IS NULL THEN 1
WHEN MonthsBetween - LAG(MonthsBetween) OVER (ORDER BY ReportMonth DESC) >= 2 THEN 1
ELSE 0
END AS GroupId
FROM #temp
) cte
Statement (for groups based on value for MonthsBetween > 2):
SELECT
MonthsBetween,
ReportMonth,
SUM(CASE WHEN MonthsBetween > 2 THEN 1 ELSE 0 END) OVER (ORDER BY ReportMonth DESC) + 1 AS PartitionID
FROM #temp
Result:
MonthsBetween ReportMonth PartitionID
0 2019-12-01 1
1 2019-11-01 1
1 2019-10-01 1
3 2019-07-01 2
1 2019-06-01 2
3 2019-03-01 3
Looks like your MonthsBetween uses a Lag or Lead function already(or precalculated)
create table #temp (
MonthsBetween int null,
ReportMonth date null
)
insert into #temp (MonthsBetween,ReportMonth)
Values ('0','2019-12-01'),
('1','2019-11-01'),
('1','2019-10-01'),
('3','2019-07-01'),
('1','2019-06-01'),
('3','2019-03-01')
select t.*,
sum(case when MonthsBetween <= 2 then 0 else 1 end) over (order by t.[ReportMonth] DESC) +1 as [PartitionID]
from #temp t

SQL Query - Multiple Table Join With Grouping Functions that Keep Branch Structure

I have exhausted my search for a solution and would like to post my question to see if a solution exists.
I need to write a report to show the debits and credits per branch. The report needs also show if branches have had no DBs or CRs.
For simplicity I have scaled down my tables to try highlight my issue.
My first table holds my Branch Data
BranchNo BranchName
1 Main
2 Mgorogoro
3 Arusha
My second table holds all Debit Transactions
txid Narrative Amount Date BranchNo
1 Test 1 50.00 2014/11/26 1
2 Test 2 20.00 2014/11/27 3
I've written a SQL statement that gives me the results I need:-
DECLARE #get_Dates CURSOR;
DECLARE #Date VarChar(10);
DECLARE #tbl TABLE
(
DebitOutCount int,
BranchCode VarChar(250),
TxDate VarChar(10)
)
--DECLARE #tbl TABLE(Idx1 VarChar(50), Idx8 VarChar(50), Idx3 VarChar(50))
SET #get_Dates = CURSOR FOR
Select Debits_OUT.Date FROM Debits_OUT GROUP BY Debits_OUT.Date ORDER BY Debits_OUT.Date
OPEN #get_Dates;
FETCH NEXT FROM #get_Dates into #Date;
WHILE (##FETCH_STATUS = 0)
BEGIN
--INSERT INTO #tbl SELECT Idx1, Idx8, Idx3 FROM SessionDailyControl WHERE Idx1 = #sessionId
INSERT INTO #tbl
SELECT
(SELECT ISNULL(SUM(DB_OUT.Amount), 0) FROM Debits_OUT AS DB_OUT WHERE B.BranchNo = DB_OUT.BranchNo AND DB_OUT.Date = #Date) AS DebitOutValue,
CAST(B.BranchNo As VarChar(10)) + ' ' + B.BranchName As [Branch Names],
#Date
From exBranches As B
FETCH NEXT FROM #get_Dates into #Date
END
CLOSE #get_Dates
DEALLOCATE #get_Dates
SELECT * FROM #tbl
The result is in the format that I need:-
DebitOutCount BranchCode TxDate
50 1 Main 2014/11/26
0 2 Mgorogoro 2014/11/26
0 3 Arusha 2014/11/26
0 1 Main 2014/11/27
0 2 Mgorogoro 2014/11/27
20 3 Arusha 2014/11/27
However, the report tools and Views cannot work with the above. I have tried Left Joins - but the problem is the result set will not keep the branch numbers for dates where there were zero transactions. For Example:-
SELECT
ISNULL(SUM(B.Amount), 0),
CAST(A.BranchNo As VarChar(10)) + ' ' + A.BranchName As [Branch Names],
B.Date
From exBranches As A
LEFT JOIN Debits_OUT AS B ON A.BranchNo = B.BranchNo
GROUP BY B.Date, A.BranchNo, A.BranchName
ORDER BY B.Date, A.BranchNo, A.BranchName
Returns:-
DB_OUT Branch Names Date
0.00 2 Mgorogoro NULL
50.00 1 Main 2014/11/26
20.00 3 Arusha 2014/11/27
In all the JOIN combinations that I try, I cannot get the branches to show ALL the branches for each date that is in the debits table.
Is there a fundamental concept that I have completely missed? I need have a query that can be run in a view that returns the same data as the cursor statement. Is this possible?
The idea is to generate possible combinations of Branches and dates first:
create table exBranches(
BranchNo int,
BranchName varchar(20)
)
create table Debits_OUT(
txId int,
Narrative varchar(20),
Amount decimal (6,2),
[Date] date,
BranchNo int
)
insert into exBranches values (1, 'Main'), (2, 'Mgorogoro'), (3, 'Arusha')
insert into Debits_OUT values (1, 'Test 1', 50.00, '20141126', 1), (2, 'Test 2', 20.00, '20141127', 3);
with BranchDate as(
select
b.BranchNo,
b.BranchName,
d.Date
from exBranches b
cross join (
select distinct [Date] from Debits_OUT
)d
)
select
isnull(DebitOutCount,0),
cast(b.BranchNo as varchar(10)) + ' ' + b.BranchName as BranchName,
b.Date
from BranchDate b
left join (
select
branchNo,
[Date],
sum(Amount) as DebitOutCount
from Debits_OUT
group by
BranchNo, [Date]
)d
on d.BranchNo = b.BranchNo
and d.Date = b.Date
order by b.date, b.BranchNo asc
drop table exBranches
drop table Debits_OUT
Try This it's works.....
select BranchName,amount,date1,BranchNo into #temp from exBranches
cross join (select distinct date1,amount from Debits_OUT)a
select isnull(t.amount,0),a.BranchName,a.date1 from #temp a
left join Debits_OUT t on t.BNo=a.BranchNo and a.date1=t.date1
order by date1
view here..
http://sqlfiddle.com/#!3/ad815/1

How to Calculate Median?

I need to calculate a median on a set of data, so I created a temp table and have tried to follow some articles online with zero success, here is what I am working with:
CREATE TABLE #QuizTemp (QuizProfileID INT,Cnt INT,TotalScore INT)
INSERT INTO #QuizTemp
SELECT QuizAnswers.QuizProfileID, COUNT(QuizAnswers.QuizProfileID) AS Cnt, SUM(QuizAnswers.AnsweredYes) As TotalScore
FROM QuizAnswers INNER JOIN
Quizzes ON QuizAnswers.QuizID = Quizzes.QuizID
WHERE (Quizzes.PartnerID = 16)
GROUP BY QuizAnswers.QuizProfileID
HAVING COUNT(QuizAnswers.QuizProfileID)= 5
SELECT COUNT(*) AS CNT, Avg(TotalScore) AS AvgTotalScore FROM #QuizTemp
DROP TABLE #QuizTemp
The average works great and now I need the Median.
try capturing the row count on INSERT and then select the row that is in the middle using ROW_NUMBER():
CREATE TABLE #QuizTemp (QuizProfileID INT,Cnt INT,TotalScore INT)
DECLARE #Rows int
INSERT INTO #QuizTemp
SELECT QuizAnswers.QuizProfileID, COUNT(QuizAnswers.QuizProfileID) AS Cnt, SUM(QuizAnswers.AnsweredYes) As TotalScore
FROM QuizAnswers INNER JOIN
Quizzes ON QuizAnswers.QuizID = Quizzes.QuizID
WHERE (Quizzes.PartnerID = 16)
GROUP BY QuizAnswers.QuizProfileID
HAVING COUNT(QuizAnswers.QuizProfileID)= 5
DECLARE #Rows int
SELECT #Rows=##Rowcount
;with allrows as
(
SELECT TotalScore, ROW_NUMBER() (ORDER BY TotalScore) AS RowNumber
)
SELECT #Rows AS CNT, TotalScore AS MedianScore
FROM allrows WHERE RowNumber=#Rows/2
DROP TABLE #QuizTemp
EDIT
Here is a solution without a temp table:
DECLARE #YourTable table (TotalScore int)
INSERT INTO #YourTable Values (1)
INSERT INTO #YourTable Values (2)
INSERT INTO #YourTable Values (3)
INSERT INTO #YourTable Values (40)
INSERT INTO #YourTable Values (50)
INSERT INTO #YourTable Values (60)
INSERT INTO #YourTable Values (70)
;with allrows as
(
SELECT
TotalScore, ROW_NUMBER() OVER (ORDER BY TotalScore) AS RowNumber
FROM #YourTable
)
,MaxRows AS
(SELECT MAX(RowNumber) AS CNT,CONVERT(int,ROUND(MAX(RowNumber)/2.0,0)) AS Middle FROM allrows)
SELECT
m.CNT
,(SELECT AVG(TotalScore) FROM allrows) AS AvgTotalScore
,a.TotalScore AS Median
,m.Middle AS MedianRowNumber
FROM allrows a
CROSS JOIN MaxRows m
WHERE a.RowNumber=m.Middle
OUTPUT:
CNT AvgTotalScore Median MedianRowNumber
-------------------- -------------------- ----------- --------------------
7 32 40 4
(1 row(s) affected)
if you edit the first CTE to be:
;with allrows as
(
SELECT QuizAnswers.QuizProfileID, COUNT(QuizAnswers.QuizProfileID) AS Cnt, SUM(QuizAnswers.AnsweredYes) As TotalScore
, ROW_NUMBER() OVER (ORDER BY TotalScore) AS RowNumber
FROM QuizAnswers INNER JOIN
Quizzes ON QuizAnswers.QuizID = Quizzes.QuizID
WHERE (Quizzes.PartnerID = 16)
GROUP BY QuizAnswers.QuizProfileID
HAVING COUNT(QuizAnswers.QuizProfileID)= 5
)
it should work for your query
Median value of numeric values is often over though. Just use this example:
DECLARE #testTable TABLE
(
VALUE INT
)
--INSERT INTO #testTable -- Even Test
--SELECT 3 UNION ALL
--SELECT 5 UNION ALL
--SELECT 7 UNION ALL
--SELECT 12 UNION ALL
--SELECT 13 UNION ALL
--SELECT 14 UNION ALL
--SELECT 21 UNION ALL
--SELECT 23 UNION ALL
--SELECT 23 UNION ALL
--SELECT 23 UNION ALL
--SELECT 23 UNION ALL
--SELECT 29 UNION ALL
--SELECT 40 UNION ALL
--SELECT 56
--
--INSERT INTO #testTable -- Odd Test
--SELECT 3 UNION ALL
--SELECT 5 UNION ALL
--SELECT 7 UNION ALL
--SELECT 12 UNION ALL
--SELECT 13 UNION ALL
--SELECT 14 UNION ALL
--SELECT 21 UNION ALL
--SELECT 23 UNION ALL
--SELECT 23 UNION ALL
--SELECT 23 UNION ALL
--SELECT 23 UNION ALL
--SELECT 29 UNION ALL
--SELECT 39 UNION ALL
--SELECT 40 UNION ALL
--SELECT 56
DECLARE #RowAsc TABLE
(
ID INT IDENTITY,
Amount INT
)
INSERT INTO #RowAsc
SELECT VALUE
FROM #testTable
ORDER BY VALUE ASC
SELECT AVG(amount)
FROM #RowAsc ra
WHERE ra.id IN
(
SELECT ID
FROM #RowAsc
WHERE ra.id -
(
SELECT MAX(id) / 2.0
FROM #RowAsc
) BETWEEN 0 AND 1
)

Get a list of dates between two dates using a function

My question is similar to this MySQL question, but intended for SQL Server:
Is there a function or a query that will return a list of days between two dates? For example, lets say there is a function called ExplodeDates:
SELECT ExplodeDates('2010-01-01', '2010-01-13');
This would return a single column table with the values:
2010-01-01
2010-01-02
2010-01-03
2010-01-04
2010-01-05
2010-01-06
2010-01-07
2010-01-08
2010-01-09
2010-01-10
2010-01-11
2010-01-12
2010-01-13
I'm thinking that a calendar/numbers table might be able to help me here.
Update
I decided to have a look at the three code answers provided, and the results of the execution - as a % of the total batch - are:
Rob Farley's answer : 18%
StingyJack's answer : 41%
KM's answer : 41%
Lower is better
I have accepted Rob Farley's answer, as it was the fastest, even though numbers table solutions (used by both KM and StingyJack in their answers) are something of a favourite of mine. Rob Farley's was two-thirds faster.
Update 2
Alivia's answer is much more succinct. I have changed the accepted answer.
this few lines are the simple answer for this question in sql server.
WITH mycte AS
(
SELECT CAST('2011-01-01' AS DATETIME) DateValue
UNION ALL
SELECT DateValue + 1
FROM mycte
WHERE DateValue + 1 < '2021-12-31'
)
SELECT DateValue
FROM mycte
OPTION (MAXRECURSION 0)
Try something like this:
CREATE FUNCTION dbo.ExplodeDates(#startdate datetime, #enddate datetime)
returns table as
return (
with
N0 as (SELECT 1 as n UNION ALL SELECT 1)
,N1 as (SELECT 1 as n FROM N0 t1, N0 t2)
,N2 as (SELECT 1 as n FROM N1 t1, N1 t2)
,N3 as (SELECT 1 as n FROM N2 t1, N2 t2)
,N4 as (SELECT 1 as n FROM N3 t1, N3 t2)
,N5 as (SELECT 1 as n FROM N4 t1, N4 t2)
,N6 as (SELECT 1 as n FROM N5 t1, N5 t2)
,nums as (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT 1)) as num FROM N6)
SELECT DATEADD(day,num-1,#startdate) as thedate
FROM nums
WHERE num <= DATEDIFF(day,#startdate,#enddate) + 1
);
You then use:
SELECT *
FROM dbo.ExplodeDates('20090401','20090531') as d;
Edited (after the acceptance):
Please note... if you already have a sufficiently large nums table then you should use:
CREATE FUNCTION dbo.ExplodeDates(#startdate datetime, #enddate datetime)
returns table as
return (
SELECT DATEADD(day,num-1,#startdate) as thedate
FROM nums
WHERE num <= DATEDIFF(day,#startdate,#enddate) + 1
);
And you can create such a table using:
CREATE TABLE dbo.nums (num int PRIMARY KEY);
INSERT dbo.nums values (1);
GO
INSERT dbo.nums SELECT num + (SELECT COUNT(*) FROM nums) FROM nums
GO 20
These lines will create a table of numbers containing 1M rows... and far quicker than inserting them one by one.
You should NOT create your ExplodeDates function using a function that involves BEGIN and END, as the Query Optimizer becomes unable to simplify the query at all.
This does exactly what you want, modified from Will's earlier post. No need for helper tables or loops.
WITH date_range (calc_date) AS (
SELECT DATEADD(DAY, DATEDIFF(DAY, 0, '2010-01-13') - DATEDIFF(DAY, '2010-01-01', '2010-01-13'), 0)
UNION ALL SELECT DATEADD(DAY, 1, calc_date)
FROM date_range
WHERE DATEADD(DAY, 1, calc_date) <= '2010-01-13')
SELECT calc_date
FROM date_range;
DECLARE #MinDate DATETIME = '2012-09-23 00:02:00.000',
#MaxDate DATETIME = '2012-09-25 00:00:00.000';
SELECT TOP (DATEDIFF(DAY, #MinDate, #MaxDate) + 1) Dates = DATEADD(DAY, ROW_NUMBER() OVER(ORDER BY a.object_id) - 1, #MinDate)
FROM sys.all_objects a CROSS JOIN sys.all_objects b;
I'm an oracle guy, but I believe MS SQL Server has support for the connect by clause:
select sysdate + level
from dual
connect by level <= 10 ;
The output is:
SYSDATE+LEVEL
05-SEP-09
06-SEP-09
07-SEP-09
08-SEP-09
09-SEP-09
10-SEP-09
11-SEP-09
12-SEP-09
13-SEP-09
14-SEP-09
Dual is just a 'dummy' table that comes with oracle (it contains 1 row and the word 'dummy' as the value of the single column).
A few ideas:
If you need the list dates in order to loop through them, you could have a Start Date and Day Count parameters and do a while loop whilst creating the date and using it?
Use C# CLR Stored Procedures and write the code in C#
Do this outside the database in code
Would all these dates be in the database already or do you just want to know the days between the two dates? If it's the first you could use the BETWEEN or <= >= to find the dates between
EXAMPLE:
SELECT column_name(s)
FROM table_name
WHERE column_name
BETWEEN value1 AND value2
OR
SELECT column_name(s)
FROM table_name
WHERE column_name
value1 >= column_name
AND column_name =< value2
All you have to do is just change the hard coded value in the code provided below
DECLARE #firstDate datetime
DECLARE #secondDate datetime
DECLARE #totalDays INT
SELECT #firstDate = getDate() - 30
SELECT #secondDate = getDate()
DECLARE #index INT
SELECT #index = 0
SELECT #totalDays = datediff(day, #firstDate, #secondDate)
CREATE TABLE #temp
(
ID INT NOT NULL IDENTITY(1,1)
,CommonDate DATETIME NULL
)
WHILE #index < #totalDays
BEGIN
INSERT INTO #temp (CommonDate) VALUES (DATEADD(Day, #index, #firstDate))
SELECT #index = #index + 1
END
SELECT CONVERT(VARCHAR(10), CommonDate, 102) as [Date Between] FROM #temp
DROP TABLE #temp
A Bit late to the party, but I like this solution quite a bit.
CREATE FUNCTION ExplodeDates(#startDate DateTime, #endDate DateTime)
RETURNS table as
return (
SELECT TOP (DATEDIFF(DAY, #startDate, #endDate) + 1)
DATEADD(DAY, ROW_NUMBER() OVER(ORDER BY a.object_id) - 1, #startDate) AS DATE
FROM sys.all_objects a
CROSS JOIN sys.all_objects b
)
Before you use my function, you need to set up a "helper" table, you only need to do this one time per database:
CREATE TABLE Numbers
(Number int NOT NULL,
CONSTRAINT PK_Numbers PRIMARY KEY CLUSTERED (Number ASC)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
DECLARE #x int
SET #x=0
WHILE #x<8000
BEGIN
SET #x=#x+1
INSERT INTO Numbers VALUES (#x)
END
here is the function:
CREATE FUNCTION dbo.ListDates
(
#StartDate char(10)
,#EndDate char(10)
)
RETURNS
#DateList table
(
Date datetime
)
AS
BEGIN
IF ISDATE(#StartDate)!=1 OR ISDATE(#EndDate)!=1
BEGIN
RETURN
END
INSERT INTO #DateList
(Date)
SELECT
CONVERT(datetime,#StartDate)+n.Number-1
FROM Numbers n
WHERE Number<=DATEDIFF(day,#StartDate,CONVERT(datetime,#EndDate)+1)
RETURN
END --Function
use this:
select * from dbo.ListDates('2010-01-01', '2010-01-13')
output:
Date
-----------------------
2010-01-01 00:00:00.000
2010-01-02 00:00:00.000
2010-01-03 00:00:00.000
2010-01-04 00:00:00.000
2010-01-05 00:00:00.000
2010-01-06 00:00:00.000
2010-01-07 00:00:00.000
2010-01-08 00:00:00.000
2010-01-09 00:00:00.000
2010-01-10 00:00:00.000
2010-01-11 00:00:00.000
2010-01-12 00:00:00.000
2010-01-13 00:00:00.000
(13 row(s) affected)
Perhaps if you wish to go an easier way, this should do it.
WITH date_range (calc_date) AS (
SELECT DATEADD(DAY, DATEDIFF(DAY, 0, CURRENT_TIMESTAMP) - 6, 0)
UNION ALL SELECT DATEADD(DAY, 1, calc_date)
FROM date_range
WHERE DATEADD(DAY, 1, calc_date) < CURRENT_TIMESTAMP)
SELECT calc_date
FROM date_range;
But the temporary table is a very good approach also. Perhaps shall you also consider a populated calendar table.
Definately a numbers table, though tyou may want to use Mark Redman's idea of a CLR proc/assembly if you really need the performance.
How to create the table of dates (and a super fast way to create a numbers table)
/*Gets a list of integers into a temp table (Jeff Moden's idea from SqlServerCentral.com)*/
SELECT TOP 10950 /*30 years of days*/
IDENTITY(INT,1,1) as N
INTO #Numbers
FROM Master.dbo.SysColumns sc1,
Master.dbo.SysColumns sc2
/*Create the dates table*/
CREATE TABLE [TableOfDates](
[fld_date] [datetime] NOT NULL,
CONSTRAINT [PK_TableOfDates] PRIMARY KEY CLUSTERED
(
[fld_date] ASC
)WITH FILLFACTOR = 99 ON [PRIMARY]
) ON [PRIMARY]
/*fill the table with dates*/
DECLARE #daysFromFirstDateInTheTable int
DECLARE #firstDateInTheTable DATETIME
SET #firstDateInTheTable = '01/01/1998'
SET #daysFromFirstDateInTheTable = (SELECT (DATEDIFF(dd, #firstDateInTheTable ,GETDATE()) + 1))
INSERT INTO
TableOfDates
SELECT
DATEADD(dd,nums.n - #daysFromFirstDateInTheTable, CAST(FLOOR(CAST(GETDATE() as FLOAT)) as DateTime)) as FLD_Date
FROM #Numbers nums
Now that you have a table of dates, you can use a function (NOT A PROC) like KM's to get the table of them.
CREATE FUNCTION dbo.ListDates
(
#StartDate DATETIME
,#EndDate DATETIME
)
RETURNS
#DateList table
(
Date datetime
)
AS
BEGIN
/*add some validation logic of your own to make sure that the inputs are sound.Adjust the rest as needed*/
INSERT INTO
#DateList
SELECT FLD_Date FROM TableOfDates (NOLOCK) WHERE FLD_Date >= #StartDate AND FLD_Date <= #EndDate
RETURN
END
Declare #date1 date = '2016-01-01'
,#date2 date = '2016-03-31'
,#date_index date
Declare #calender table (D date)
SET #date_index = #date1
WHILE #date_index<=#date2
BEGIN
INSERT INTO #calender
SELECT #date_index
SET #date_index = dateadd(day,1,#date_index)
IF #date_index>#date2
Break
ELSE
Continue
END
-- ### Six of one half dozen of another. Another method assuming MsSql
Declare #MonthStart datetime = convert(DateTime,'07/01/2016')
Declare #MonthEnd datetime = convert(DateTime,'07/31/2016')
Declare #DayCount_int Int = 0
Declare #WhileCount_int Int = 0
set #DayCount_int = DATEDIFF(DAY, #MonthStart, #MonthEnd)
select #WhileCount_int
WHILE #WhileCount_int < #DayCount_int + 1
BEGIN
print convert(Varchar(24),DateAdd(day,#WhileCount_int,#MonthStart),101)
SET #WhileCount_int = #WhileCount_int + 1;
END;
In case you want to print years starting from a particular year till current date. Just altered the accepted answer.
WITH mycte AS
(
SELECT YEAR(CONVERT(DATE, '2006-01-01',102)) DateValue
UNION ALL
SELECT DateValue + 1
FROM mycte
WHERE DateValue + 1 < = YEAR(GETDATE())
)
SELECT DateValue
FROM mycte
OPTION (MAXRECURSION 0)
This query works on Microsoft SQL Server.
select distinct format( cast('2010-01-01' as datetime) + ( a.v / 10 ), 'yyyy-MM-dd' ) as aDate
from (
SELECT ones.n + 10 * tens.n + 100 * hundreds.n + 1000 * thousands.n as v
FROM (VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) ones(n),
(VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) tens(n),
(VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) hundreds(n),
(VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) thousands(n)
) a
where format( cast('2010-01-01' as datetime) + ( a.v / 10 ), 'yyyy-MM-dd' ) < cast('2010-01-13' as datetime)
order by aDate asc;
Now let's look at how it works.
The inner query merely returns a list of integers from 0 to 9999. It will give us a range of 10,000 values for calculating dates. You can get more dates by adding rows for ten_thousands and hundred_thousands and so forth.
SELECT ones.n + 10 * tens.n + 100 * hundreds.n + 1000 * thousands.n as v
FROM (VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) ones(n),
(VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) tens(n),
(VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) hundreds(n),
(VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) thousands(n)
) a;
This part converts the string to a date and adds a number to it from the inner query.
cast('2010-01-01' as datetime) + ( a.v / 10 )
Then we convert the result into the format you want. This is also the column name!
format( cast('2010-01-01' as datetime) + ( a.v / 10 ), 'yyyy-MM-dd' )
Next we extract only the distinct values and give the column name an alias of aDate.
distinct format( cast('2010-01-01' as datetime) + ( a.v / 10 ), 'yyyy-MM-dd' ) as aDate
We use the where clause to filter in only dates within the range you want. Notice that we use the column name here since SQL Server does not accept the column alias, aDate, within the where clause.
where format( cast('2010-01-01' as datetime) + ( a.v / 10 ), 'yyyy-MM-dd' ) < cast('2010-01-13' as datetime)
Lastly, we sort the results.
order by aDate asc;
if you're in a situation like me where procedures and functions are prohibited, and your sql user does not have permissions for insert, therefore insert not allowed, also "set/declare temporary variables like #c is not allowed", but you want to generate a list of dates in a specific period, say current year to do some aggregation, use this
select * from
(select adddate('1970-01-01',t4*10000 + t3*1000 + t2*100 + t1*10 + t0) gen_date from
(select 0 t0 union select 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9) t0,
(select 0 t1 union select 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9) t1,
(select 0 t2 union select 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9) t2,
(select 0 t3 union select 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9) t3,
(select 0 t4 union select 1 union select 2 union select 3 union select 4 union select 5 union select 6 union select 7 union select 8 union select 9) t4) v
where gen_date between '2017-01-01' and '2017-12-31'
WITH TEMP (DIA, SIGUIENTE_DIA ) AS
(SELECT
1,
CAST(#FECHAINI AS DATE)
FROM
DUAL
UNION ALL
SELECT
DIA,
DATEADD(DAY, DIA, SIGUIENTE_DIA)
FROM
TEMP
WHERE
DIA < DATEDIFF(DAY, #FECHAINI, #FECHAFIN)
AND DATEADD(DAY, 1, SIGUIENTE_DIA) <= CAST(#FECHAFIN AS DATE)
)
SELECT
SIGUIENTE_DIA AS CALENDARIO
FROM
TEMP
ORDER BY
SIGUIENTE_DIA
The detail is on the table DUAL but if your exchange this table for a dummy table this works.
SELECT dateadd(dd,DAYS,'2013-09-07 00:00:00') DATES
INTO #TEMP1
FROM
(SELECT TOP 365 colorder - 1 AS DAYS from master..syscolumns
WHERE id = -519536829 order by colorder) a
WHERE datediff(dd,dateadd(dd,DAYS,'2013-09-07 00:00:00'),'2013-09-13 00:00:00' ) >= 0
AND dateadd(dd,DAYS,'2013-09-07 00:00:00') <= '2013-09-13 00:00:00'
SELECT * FROM #TEMP1
Answer is avialbe here
How to list all dates between two dates
Create Procedure SelectDates(#fromDate Date, #toDate Date)
AS
BEGIN
SELECT DATEADD(DAY,number,#fromDate) [Date]
FROM master..spt_values
WHERE type = 'P'
AND DATEADD(DAY,number,#fromDate) < #toDate
END
DECLARE #StartDate DATE = '2017-09-13', #EndDate DATE = '2017-09-16'
SELECT date FROM ( SELECT DATE = DATEADD(DAY, rn - 1, #StartDate) FROM (
SELECT TOP (DATEDIFF(DAY, #StartDate, DATEADD(DAY,1,#EndDate)))
rn = ROW_NUMBER() OVER (ORDER BY s1.[object_id])
FROM sys.all_objects AS s1
CROSS JOIN sys.all_objects AS s2
ORDER BY s1.[object_id] ) AS x ) AS y
Result:
2017-09-13
2017-09-14
2017-09-15
2017-09-16

Resources