/* Data Setup */
DROP TABLE IF EXISTS #DaysPerJob;
CREATE TABLE #DaysPerJob
(
GroupID INT, JobDesc VARCHAR(100), StartDate DATE, EndDate DATE
)
INSERT INTO #DaysPerJob(GroupID, JobDesc, StartDate, EndDate)
VALUES
(23293, 'Food Prep', '2017-03-01', '2017-07-17')
, (23293, 'Finisher', '2021-11-19', NULL)
, (23293, 'Cashier', '2021-12-06', '2021-12-10')
, (26208, '3rd SHift Stocker', '2019-09-25', '2020-11-05')
, (26208, 'Order Fulfillment Assoc', '2020-08-05', '2021-04-16')
, (26208, 'Customer Service Rep', '2021-05-10', '2021-10-15')
, (26208, 'Delivery Driver', '2021-11-15', NULL)
, (26208, 'Another Job', '2022-02-23', '2022-03-02')
, (26208, 'Same Day Job Start as Prev Job End', '2022-03-01', NULL)
--SELECT * FROM #DaysPerJob dpj ORDER BY dpj.GroupID, dpj.StartDate, dpj.EndDate
/* Days Per Job Calculations - Attempts */
SELECT dj.GroupID, dj.JobDesc, dj.StartDate, dj.EndDate
, LAG(dj.EndDate) OVER (PARTITION BY dj.GroupID ORDER BY dj.GroupID, dj.StartDate, dj.EndDate) AS PreviousJobEndDate
, DATEDIFF(DAY, dj.StartDate, IsNull(dj.EndDate, GetDate())) AS daysPerJob
FROM #DaysPerJob dj
ORDER BY dj.GroupID, dj.StartDate, dj.EndDate
How do I obtain a SUM of the unique days employed per group?
The SQL Above will give you a table of Job Records. Each Job has a Start Date but not all jobs have an End Date which means they are still employed at that job.
The issue I have been struggling with is how to count the unique days employed. It is VERY easy to simply calculate the number of days per job using the DATEDIFF function however I am not currently able to account for other jobs within the same range as it would count those days twice.
I am ordering by the Start Date and then using LAG I compare the last jobs End Date to the next jobs Start Date. If the current jobs Start Date is <= the last jobs End Date we instead calculate the next jobs days using the last jobs End Date to the current Jobs End Date...
However the above condition had issues... what if my last job did not have an End Date or what if the last jobs End Date was also > the current Jobs End Date? This would mean that the entire current job falls within the same range as the last job and so we should NOT count ANY days and the day count would become 0 so that when the Total SUM of days is calculated it would not count the days in that job. It was THIS last issue that I could not figure out which has now lead me to posting this question here on Stack Overflow.
/* Some SQL below of some things I have tried */
/* Days Per Job Calculations - Attempts */
SELECT dj.GroupID, dj.JobDesc, dj.StartDate, dj.EndDate
, LAG(dj.EndDate) OVER (PARTITION BY dj.GroupID ORDER BY dj.GroupID, dj.StartDate, dj.EndDate) AS PreviousJobEndDate
/* Check if next record is within same date range. The idea here is if the job is within the
| same Range we replace the current Jobs Start Date with the last Jobs End Date
*/
, CASE WHEN ( LAG(dj.EndDate) OVER (PARTITION BY dj.GroupID ORDER BY dj.StartDate, dj.EndDate) ) >= dj.StartDate
AND ( LAG(dj.EndDate) OVER (PARTITION BY dj.GroupID ORDER BY dj.StartDate, dj.EndDate) ) <= dj.EndDate
THEN IsNull( ( LAG(dj.EndDate) OVER (PARTITION BY dj.GroupID ORDER BY dj.StartDate, dj.EndDate) ), GetDate() )
ELSE dj.StartDate
END AS StartDateForSet
/* The below CASE is the same logic as the above CASE but just an output stating if the
| next job was found to be within the same range or if a NEW Set has begun.
*/
, CASE WHEN ( LAG(dj.EndDate) OVER (PARTITION BY dj.GroupID ORDER BY dj.StartDate, dj.EndDate) ) >= dj.StartDate
AND ( LAG(dj.EndDate) OVER (PARTITION BY dj.GroupID ORDER BY dj.StartDate, dj.EndDate) ) <= dj.EndDate
THEN 'InRange'
ELSE 'NewSet'
END AS withinRangeCheck
, DATEDIFF(DAY, dj.StartDate, IsNull(dj.EndDate, GetDate())) AS daysPerJob
/* This is the field that I want to use to eventually SUM using GROUPing and aggregate functions however I first
| need to get it to correctly output the unique days. If the current job falls within the previous jobs date
| range the idea is that this calculation would account for that and move the End Date accordingly so it either
| does NOT count any days within the new job or counts the trailing days should the job end date fall after the previous job.
*/
, DATEDIFF(DAY /* StartDate */
, (CASE WHEN( LAG(dj.EndDate) OVER (PARTITION BY dj.GroupID ORDER BY dj.StartDate, dj.EndDate) ) >= dj.StartDate
AND ( LAG(dj.EndDate) OVER (PARTITION BY dj.GroupID ORDER BY dj.StartDate, dj.EndDate) ) <= dj.EndDate
THEN IsNull( ( LAG(dj.EndDate) OVER (PARTITION BY dj.GroupID ORDER BY dj.StartDate, dj.EndDate) ), GetDate() )
ELSE dj.StartDate
END
)
/* EndDate If Null Use Current Date */
, IsNull(dj.EndDate, GetDate())
) AS DaysEmployedWithinSet
FROM #DaysPerJob dj
ORDER BY dj.GroupID, dj.StartDate, dj.EndDate
|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|
The Solution to this problem is Below based on the Chosen correct posted answer
|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|
I really thought there would be more answers to this question however this isn't an easy one... at least it wasn't for me nor was it something my coworkers were able to answer. Regardless there were two answers posted to this question. One post, however close it came, did not produce accurate counts of the days employed. I triple checked the data as well as checking the calculations in Excel and based on the dataset provided in this example the totals should look as they do below in the SQL Server version of using a Recursive CTE to create a dates table.
/* SUM Unique Days in Multiple Date Range Records (SQL Server).sql
| SQL Server Example
| Desc: The below shows how to obtain the unique days employed. Meaning we don't count the
| same day twice should an individual be employed at more than job at any given time.
*/
/* Data Setup */
DROP TABLE IF EXISTS #DaysPerJob;
CREATE TABLE #DaysPerJob
(
GroupID INT, JobDesc VARCHAR(100), StartDate DATE, EndDate DATE
)
INSERT INTO #DaysPerJob(GroupID, JobDesc, StartDate, EndDate)
VALUES
(23293, 'Food Prep', '2017-03-01', '2017-07-17')
, (23293, 'Finisher', '2021-11-19', NULL)
, (23293, 'Starter', '2021-11-21', '2021-12-13')
, (23293, 'Cashier', '2021-12-06', '2021-12-10')
, (26208, '3rd SHift Stocker', '2019-09-25', '2020-11-05')
, (26208, 'Order Fulfillment Assoc', '2020-08-05', '2021-04-16')
, (26208, 'Customer Service Rep', '2021-05-10', '2021-10-15')
, (26208, 'Delivery Driver', '2021-11-15', NULL)
, (26208, 'Another Job', '2022-02-23', '2022-03-02')
, (26208, 'Same Day Job Start as Prev Job End', '2022-03-01', NULL)
;
/* Using a Recursive CTE to produce a dates table to later be JOINed on */
WITH Dates(date) AS
(
SELECT MIN(StartDate) AS date
FROM #DaysPerJob
UNION ALL
SELECT DATEADD(DAY, 1, date)
FROM Dates
WHERE date < GetDate()
)
, ranked AS
( /* Needing to rank each job record in order to later remove the overlapping days when employed at more than one job at one time. */
SELECT j.*, d.*
, ROW_NUMBER() OVER (PARTITION BY j.GroupID, d.date ORDER BY j.GroupID, j.StartDate, IsNull(j.EndDate, GetDate())) AS ranker
FROM Dates d
LEFT JOIN #DaysPerJob j ON j.StartDate <= d.date
AND IsNull(j.EndDate, GetDate()) >= d.date
WHERE j.GroupID IS NOT NULL /* This filter removes all days in the Dates table where there was no employment */
--AND j.GroupID = 26208 --23293
--ORDER BY d.date, j.StartDate, IsNull(j.EndDate, GetDate()), j.GroupID
--OPTION (MaxRecursion 0)
)
/* Non Aggregate Data - UnComment to view */
/*
SELECT * FROM ranked r WHERE r.GroupID IS NOT NULL
ORDER BY r.date, r.StartDate, IsNull(r.EndDate, GetDate()), r.GroupID
OPTION (MaxRecursion 0)
*/
/* Aggregated Data */
SELECT r.GroupID, COUNT(*) AS daysEmployed, MIN(date) AS minStartDate, MAX(date) AS maxEndDate
, DATEDIFF(DAY, MIN(date), MAX(date)) AS TotalDaysInRange
/* To get total number of days NOT employed we simply take the TotalDaysInRange and subtract the daysEmployed */
, DATEDIFF(DAY, MIN(date), MAX(date)) - COUNT(*) AS unEmployedDays
FROM ranked r
WHERE r.ranker = 1
GROUP BY r.GroupID
ORDER BY r.GroupID
OPTION (MaxRecursion 0) /* The default MaxRecursion setting is 100. Generating more than 100 dates using this method will require the Option (MaxRecursion N) segment of the query, where N is the desired MaxRecursion setting. Setting this to 0 will remove the MaxRecursion limitation altogether */
Screenshot of totals grouped by GroupID:
Based on the screenshot as of today's date as of this posting 06.02.22 the totals are:
GroupID 23293 : 335 Days Employed
GroupID 26208 : 929 Days Employed
This SO Post has excellent examples of how to populate a dates table and some of the answers accomplish this feat without needing to use Option (MaxRecursion)
Get a list of dates between two dates using a function
I didn't have access to a SqlServer instance to test this on, so this is SQLite syntax, but I don't think it should be hard to convert this.
The approach I took was to basically use a "Dates" table and then join the DaysPerJob table to it so you get records for each day a GroupId was active. Then you just rank based on the individual day and groupId to use to filter out "overlapped" days of jobs.
/* Just using a recursive CTE to create a DATE table */
/* If you have an existing date table, could use that instead */
WITH dates(date) AS (
SELECT
MIN(StartDate)
FROM DaysPerJob
UNION ALL
SELECT
DATE(date, '+1 day')
FROM dates
WHERE date < date()
)
, ranked AS (
SELECT
d.date
, j.StartDate
, j.EndDate
, j.GroupID
, j.JobDesc
, ROW_NUMBER() OVER (PARTITION BY d.date, j.GroupID) AS ranker
FROM dates d
LEFT JOIN DaysPerJob j
ON date(j.StartDate) <= date(d.date)
AND ifnull(j.EndDate, date()) >= date(d.date)
WHERE j.GroupID IS NOT NULL
)
SELECT COUNT(*) AS days_worked, GroupID
FROM ranked r
WHERE r.ranker = 1
GROUP BY GroupID;
Here is another answer derived after some time to wrangle the data. Please forgive me, I put this into a fromatting that was easier to work with. This should work.
/* Data Setup */
DROP TABLE IF EXISTS #DaysPerJob;
CREATE TABLE #DaysPerJob
(
GroupID INT, JobDesc VARCHAR(100), StartDate DATE, EndDate DATE
)
INSERT INTO #DaysPerJob(GroupID, JobDesc, StartDate, EndDate)
VALUES
(23293, 'Food Prep', '2017-03-01', '2017-07-17')
, (23293, 'Finisher', '2021-11-19', NULL)
, (23293, 'Cashier', '2021-12-06', '2021-12-10')
, (26208, '3rd SHift Stocker', '2019-09-25', '2020-11-05')
, (26208, 'Order Fulfillment Assoc', '2020-08-05', '2021-04-16')
, (26208, 'Customer Service Rep', '2021-05-10', '2021-10-15')
, (26208, 'Delivery Driver', '2021-11-15', NULL)
, (26208, 'Another Job', '2022-02-23', '2022-03-02')
, (26208, 'Same Day Job Start as Prev Job End', '2022-03-01', NULL)
--SELECT * FROM #DaysPerJob dpj ORDER BY dpj.GroupID, dpj.StartDate, dpj.EndDate
/* Days Per Job Calculations - Attempts */
;WITH GapsMarked AS
(
--Mark the start of an (null) value island within a group and rank the data for window functions below and/or joining back
SELECT
GroupID, JobDesc,StartDate, EndDate,
Island = CASE WHEN EndDate IS NULL THEN 1 ELSE 0 END,
RowInGroup=ROW_NUMBER() OVER(PARTITION BY GroupID ORDER BY StartDate, EndDate)
FROM
#DaysPerJob
)
,VirtualGroups AS
(
--Complete the IsIsland within group calculation started above
SELECT
*,
IsIsland = SUM(Island) OVER (PARTITION BY GroupID ORDER BY RowInGroup ROWS UNBOUNDED PRECEDING)
FROM
GapsMarked
)
,MinEndDateInIsland AS
(
--This grabs the Min End Date to compare to the start date of each consecutive island record
SELECT
V1.GroupID, V1.RowInGroup,
EndDateOrMinOverlapped=CASE WHEN MIN(V2.EndDate) >= V1.StartDate THEN MIN(V2.EndDate) ELSE V1.EndDate END
FROM
VirtualGroups V1
LEFT OUTER JOIN VirtualGroups V2 ON V2.GroupID = V1.GroupID AND V2.RowInGroup <= V1.RowInGroup AND V2.IsIsland=0
GROUP BY
V1.GroupID, V1.RowInGroup,V1.StartDate, V1.EndDate
)
--Final output
SELECT
G.GroupID, G.JobDesc, G.StartDate, G.EndDate,
DayCalc=CASE WHEN G.IsIsland=0 THEN DATEDIFF(DAY, G.StartDate,N.EndDateOrMinOverlapped) ELSE NULL END
FROM
MinEndDateInIsland N
INNER JOIN VirtualGroups G ON G.GroupID = N.GroupID AND G.RowInGroup= N.RowInGroup
ORDER BY
G.GroupID, G.RowInGroup
I have this query:
SELECT
COUNT(DISTINCT ProdTr.OrdNo) AS Orders,
ProdTr.YrPr AS Period,
SUM(ProdTr.DAm) AS Total,
SUM(ProdTr.IncCst) AS Cost
FROM ProdTr
WHERE ProdTr.TrTp = 1 AND ProdTr.CustNo != 0
AND ProdTr.YrPr BETWEEN (201901) AND (201912)
GROUP BY ProdTr.YrPr
ORDER BY ProdTr.YrPr ASC
And it works well. It yields the expected result, sales data from the date period 2019-01 to 2019-12. Result:
I would like to add an extra column that shows the same data - but from last year. For period 2019-01 it should show sales data for 2018-01 (1 year back). I managed to do this with a subquery, but it is slow - and seems like a bad idea.
Are there any better ways to achieve this? Database version is MSSQL 2016.
Thank you very much for your time.
You can do it with conditional aggregation:
SELECT
COUNT(DISTINCT CASE WHEN LEFT(YrPr, 4) = '2019' THEN OrdNo END) AS Orders2019,
'2019' + RIGHT(YrPr, 2) AS Period2019,
SUM(CASE WHEN LEFT(YrPr, 4) = '2019' THEN DAm END) AS Total2019,
SUM(CASE WHEN LEFT(YrPr, 4) = '2019' THEN IncCst END) AS Cost2019,
SUM(CASE WHEN LEFT(YrPr, 4) = '2018' THEN DAm END) AS Total2018
FROM ProdTr
WHERE TrTp = 1 AND CustNo != 0
AND YrPr BETWEEN (201801) AND (201912)
GROUP BY RIGHT(YrPr, 2)
ORDER BY Period2019 ASC
You could do it like this:
WITH TwoYears AS (
SELECT COUNT(DISTINCT ProdTr.OrdNo) AS Orders
, ProdTr.YrPr AS Period
, SUM(ProdTr.DAm) AS Total
, SUM(ProdTr.IncCst) AS Cost
FROM ProdTr
WHERE ProdTr.TrTp = 1
AND ProdTr.CustNo != 0
AND ProdTr.YrPr BETWEEN 201801 AND 201912
GROUP BY ProdTr.YrPr
), CurrentYear AS (
SELECT Orders, Period, Total, Cost
FROM TwoYears
WHERE Period >= 201901
), PreviousYear AS (
SELECT Orders, Period, Total, Cost
FROM TwoYears
WHERE Period < 201901
)
SELECT c.Orders, c.Period, c.Total, c.Cost
, p.Orders AS PrevOrders, p.Period AS PrevPeriod, p.Total AS PrevTotal, p.Cost AS PrevCost
FROM CurrentYear c
FULL JOIN PreviousYear p ON p.Period = c.Period - 100
ORDER BY COALESCE(c.Period, p.Period + 100)
In SQL I've got a table with students:
CREATE TABLE dbo.[Student]
(
[Id] bigint IDENTITY(1,1) NOT NULL CONSTRAINT [PK_Student] PRIMARY KEY NONCLUSTERED,
[ActiveFrom] [DATETIME] NOT NULL,
[ActiveUntil] [DATETIME] NULL,
) ON [PRIMARY]
Now I want to show a bar chart how many students have been active in each month of the year. A student is active in a month if [ActiveFrom] is before or in that month and [ActiveUntil] is null or later or in that month.
I guess I need some kind of group by, but since a student can be active for months or years I got no idea how to get those numbers in one SQL command.
Sample input
INSERT INTO Student (ActiveFrom, ActiveUntil) VALUES ('20181001', '20181231')
INSERT INTO Student (ActiveFrom, ActiveUntil) VALUES ('20181101', '20190131')
INSERT INTO Student (ActiveFrom, ActiveUntil) VALUES ('20181201', '20181231')
INSERT INTO Student (ActiveFrom, ActiveUntil) VALUES ('20190101', '20190430')
Expected output
Month, Activecount
2018-10, 1
2018-11, 2
2018-12, 3
2019-01, 2
2019-02, 1
2019-03, 1
2019-04, 1
DECLARE #ReportStartDate DATE = '20180101'
, #ReportEndDate DATE = '20191231'
; WITH MonthCounter AS
(
SELECT 1 i
UNION ALL
SELECT i+1 i
FROM MonthCounter
WHERE i < DATEDIFF(MONTH, #ReportStartDate, #ReportEndDate)
)
, Months AS
(
SELECT DATEADD(MONTH, i-1, #ReportStartDate) AS StartDate
, DATEADD(DAY, -1, DATEADD(MONTH, i, #ReportStartDate)) AS EndDate
FROM MonthCounter
)
SELECT mo.StartDate
, mo.EndDate
, COUNT(st.[Key]) AS ActiveStudents
FROM Months mo
LEFT JOIN Student st ON DATEDIFF(DAY, st.ActiveFrom, mo.enddate) >= 0
AND (st.ActiveUntil IS NULL OR DATEDIFF(DAY, mo.startdate, st.ActiveUntil) >= 0)
GROUP BY mo.startdate
, mo.enddate
ORDER BY mo.startdate
OPTION (MAXRECURSION 0)
Apologies for the convoluted month generating code, but I really tried to make it happen in a single SELECT query, and I couldn't find a much better method than the recursive CTE.
Pay attention to the comparison. To determine whether a student is active in a month, the record's ActiveFrom must start sometime BEFORE the END of the month, and it's ActiveTo must be some date on or AFTER the BEGINNING of the month.
I have a table with a Date column I want to group in 5 minutes intervals, and the sum of Volume for each interval. I also want the average of this sum column - how do I do it?
SELECT Ticker,
Date,
Volume
FROM share
WHERE Ticker = 'divya'
Here's my attempt, except I don't want the average volume, I want the average value for the sum of the volume:
SELECT Ticker,
MIN(Date) AS Time,
SUM(Volume) AS SumVolume,
AVG(Volume) AS AverageSumVolume
FROM share
WHERE Ticker = 'divya'
GROUP BY (DATEPART(MINUTE, Date) / 5), Ticker
Another attempt:
select Ticker,
MIN(Date)as Time,
(select top 1 [Open] from share where ticker = 'divya' ) as OpenValue,-- need first value of 5 mins data
Max(High) as Max,
Min(low) as Low,
(select top 1 [Close] from share where ticker = 'divya') as Closevalue,-- need last value of 5 mins data
sum(Volume) as SumVolume,
avg(SumVolume) as SumAverageVolume,-- average of SumVolume
(select top 1 [Open Interest] from share where ticker = 'divya') as OpenInterest
from share where Ticker = 'divya'
GROUP BY (DATEPART(MINUTE, Date)/5 ),Ticker
Try this:
SELECT
Ticker
,DATEADD(MINUTE, DATEDIFF(MINUTE, 0,Date)/5 * 5, 0)
, SUM(Volume) AS SumVolume
, AVG(Volume) AS AverageSumVolume
FROM share
WHERE Ticker = 'divya'
GROUP BY Ticker, DATEADD(MINUTE, DATEDIFF(MINUTE, 0,Date)/5 * 5, 0)
Please read the following so that you can understand what is going on:
DATEADD: https://learn.microsoft.com/en-us/sql/t-sql/functions/dateadd-transact-sql
DATEDIFF: https://learn.microsoft.com/en-us/sql/t-sql/functions/datediff-transact-sql
UPDATE:
This would give you what you're looking for if I understood your comment correctly.
SELECT
ticker
,[time]
,SumVolume
,AVG(SumVolume) OVER(PARTITION BY ticker) AS AverageSumVolume
FROM(
SELECT
Ticker
,DATEADD(MINUTE, DATEDIFF(MINUTE, 0,Date)/5 * 5, 0) AS Time
, SUM(Volume) AS SumVolume
FROM share
WHERE Ticker = 'divya'
GROUP BY Ticker,DATEADD(MINUTE, DATEDIFF(MINUTE, 0,Date)/5 * 5, 0)
) a
GROUP BY
ticker
,[time]
,sumvolume
I am using SQL Server 2012 and I know it is quite simple to calculate moving averages.
But what I need is to get the mode and the median for a defined window frame like so (with a window of 2 preceding to current row; month unique):
MONTH | CODE | MEDIAN | MODE
1 0 0 0
2 3 1.5 0
3 2 2 0
4 2 2 2
5 2 2 2
6 5 2 2
7 3 3 2
If several values qualify as mode, than pick the first.
I commented my code thoroughly. Read my comments on my Mode calculations and let me know it needs tweaking. Overall, it's a relatively simple query. It just has a lot of ugly subqueries and it has a lot of comments. Check it out:
DECLARE #Table TABLE ([Month] INT,[Code] INT);
INSERT INTO #Table
VALUES (1,0),
(2,3),
(3,2),
(4,2), --Try commenting this out to test my special mode thingymajig
(5,2),
(6,5),
(7,3);
WITH CTE
AS
(
SELECT ROW_NUMBER() OVER (ORDER BY [Month]) row_num,
[Month],
CAST(Code AS FLOAT) Code
FROM #Table
)
SELECT [Month],
Code,
ISNULL((
SELECT CASE
--When there is only one previous value at row_num = 2, find Mean of first two codes
WHEN A.row_num = 2 THEN (LAG(B.code,1) OVER (ORDER BY [Code]) + B.Code)/2.0
--Else find middle code value of current and previous two rows
ELSE B.Code
END
FROM CTE B
--How subquery relates to outer query
WHERE B.row_num BETWEEN A.row_num - 2 AND A.row_num
ORDER BY B.[Code]
--Order by code and offset by 1 so don't select the lowest value, but fetch the one above the lowest value
OFFSET 1 ROW FETCH NEXT 1 ROW ONLY),
0) AS Median,
--I did mode a little different
--Instead of Avg(D.Code) you could list the values because with mode,
--If there's a tie with more than one of each number, you have multiple modes
--Instead of doing that, I simply return the mean of the tied modes
--When there's one, it doesn't change anything.
--If you were to delete the month 4, then your number of Codes 2 and number of Codes 3 would be the same in the last row.
--Proper mode would be 2,3. I instead average them out to be 2.5.
ISNULL((
SELECT AVG(D.Code)
FROM (
SELECT C.Code,
COUNT(*) cnt,
DENSE_RANK() OVER (ORDER BY COUNT(*) DESC) dnse_rank
FROM CTE C
WHERE C.row_num <= A.row_num
GROUP BY C.Code
HAVING COUNT(*) > 1) D
WHERE D.dnse_rank = 1),
0) AS Mode
FROM CTE A
Results:
Month Code Median Mode
----------- ---------------------- ---------------------- ----------------------
1 0 0 0
2 3 1.5 0
3 2 2 0
4 2 2 2
5 2 2 2
6 5 2 2
7 3 3 2
If I understood your requirements correctly, your source table contains MONTH and CODE columns, and you want to calculate MEDIAN and MODE.
The query below calculates MEDIAN and MODE with moving window <= than 3 month ("2 preceding to current row") and returns the results matching your example.
-----------------------------------------------------
--Demo data
-----------------------------------------------------
CREATE TABLE #Data(
[Month] INT NOT NULL,
[Code] INT NOT NULL,
CONSTRAINT [PK_Data] PRIMARY KEY CLUSTERED
(
[Month] ASC
));
INSERT #Data
([Month],[Code])
VALUES
(1,0),
(2,3),
(3,2),
(4,2),
(5,2),
(6,5),
(7,3);
-----------------------------------------------------
--Query
-----------------------------------------------------
DECLARE #PrecedingRowsLimit INT = 2;
WITH [MPos] AS
(
SELECT [R].[Month]
, [RB].[Month] AS [SubId]
, [RB].[Code]
, ROW_NUMBER() OVER(PARTITION BY [R].[Month] ORDER BY [RB].[Code]) AS [RowNumberInPartition]
, CASE
WHEN [R].[Count] % 2 = 1 THEN ([R].[Count] + 1) / 2
ELSE NULL
END AS [MedianPosition]
, CASE
WHEN [R].[Count] % 2 = 0 THEN [R].[Count] / 2
ELSE NULL
END AS [MedianPosition1]
, CASE
WHEN [R].[Count] % 2 = 0 THEN [R].[Count] / 2 + 1
ELSE NULL
END AS [MedianPosition2]
FROM
(
SELECT [RC].[Month]
, [RC].[RowNumber]
, CASE WHEN [RC].[Count] > #PrecedingRowsLimit + 1 THEN #PrecedingRowsLimit + 1 ELSE [RC].[Count] END AS [Count]
FROM
(
SELECT [Month]
, ROW_NUMBER() OVER(ORDER BY [Month]) AS [RowNumber]
, ROW_NUMBER() OVER(ORDER BY [Month]) AS [Count]
FROM #Data
) [RC]
) [R]
INNER JOIN #Data [RB]
ON [R].[Month] >= [RB].[Month]
AND [RB].[Month] >= [R].[RowNumber] - #PrecedingRowsLimit
)
SELECT DISTINCT [M].[Month]
, [ORIG].[Code]
, COALESCE([ME].[Code],([M1].[Code] + [M2].[Code]) / 2.0) AS [Median]
, [MOD].[Mode]
FROM [MPos] [M]
LEFT JOIN [MPOS] [ME]
ON [M].[Month] = [ME].[Month]
AND [M].[MedianPosition] = [ME].[RowNumberInPartition]
LEFT JOIN [MPOS] [M1]
ON [M].[Month] = [M1].[Month]
AND [M].[MedianPosition1] = [M1].[RowNumberInPartition]
LEFT JOIN [MPOS] [M2]
ON [M].[Month] = [M2].[Month]
AND [M].[MedianPosition2] = [M2].[RowNumberInPartition]
INNER JOIN
(
SELECT [MG].[Month]
, FIRST_VALUE([MG].[Code]) OVER (PARTITION BY [MG].[Month] ORDER BY [MG].[Count] DESC , [MG].[SubId] ASC) AS [Mode]
FROM
(
SELECT [Month] , MIN([SubId]) AS [SubId], [Code] , COUNT(1) AS [Count]
FROM [MPOS]
GROUP BY [Month] , [Code]
) [MG]
) [MOD]
ON [M].[Month] = [MOD].[Month]
INNER JOIN #Data [ORIG]
ON [ORIG].[Month] = [M].[Month]
ORDER BY [M].[Month];