I have some data by date and need to summarize by month. I think I can use PIVOT function in sql server 2012 but I cant get this right.
For simplicity purpose tables are called details and summary, details has visitor counts by month and gate. Summary table has Gate, Visitor type and 12 columns for each month.
DECLARE #tblDetails TABLE (
Gate varchar(15), EOM Date, TotVisitors decimal(10,2), TotKids decimal(10,2)
);
DECLARE #tblSumm TABLE (
Gate varchar(15), CatDesc varchar(50),
Jan decimal(10,2), Feb decimal(10,2), Mar decimal(10,2),
Apr decimal(10,2), May decimal(10,2), Jun decimal(10,2),
Jul decimal(10,2), Aug decimal(10,2), Sep decimal(10,2),
Oct decimal(10,2), Nov decimal(10,2), Dec decimal(10,2)
);
insert into #tblDetails select 'East', '1/31/2018', 1000, 350
insert into #tblDetails select 'East', '2/28/2018', 990,225
insert into #tblDetails select 'East', '3/31/2018', 970, 390
insert into #tblDetails select 'East', '4/30/2018', 977, 290
insert into #tblDetails select 'East', '5/31/2018', 960, 375
insert into #tblDetails select 'East', '6/30/2018', 1020, 425
insert into #tblDetails select 'East', '7/31/2018', 1117, 450
insert into #tblDetails select 'East', '8/31/2018', 1090, 443
insert into #tblDetails select 'East', '9/30/2018', 980, 210
insert into #tblDetails select 'East', '10/31/2018', 960, 190
insert into #tblDetails select 'East', '11/30/2018', 990, 195
insert into #tblDetails select 'East', '12/31/2018', 1020, 330
insert into #tblDetails select 'West', '1/31/2018', 992, 333
insert into #tblDetails select 'West', '2/28/2018', 980, 265
insert into #tblDetails select 'West', '3/31/2018', 1005, 397
insert into #tblDetails select 'West', '4/30/2018', 960, 265
insert into #tblDetails select 'West', '5/31/2018', 982, 344
insert into #tblDetails select 'West', '6/30/2018', 1017, 399
insert into #tblDetails select 'West', '7/31/2018', 1080, 442
insert into #tblDetails select 'West', '8/31/2018', 1045, 413
insert into #tblDetails select 'West', '9/30/2018', 940, 217
insert into #tblDetails select 'West', '10/31/2018', 925, 302
insert into #tblDetails select 'West', '11/30/2018', 937, 287
insert into #tblDetails select 'West', '12/31/2018', 958, 271
insert into #tblDetails select 'North', '5/31/2018', 780, 380
insert into #tblDetails select 'North', '6/30/2018', 810, 400
insert into #tblDetails select 'North', '7/31/2018', 835, 411
insert into #tblDetails select 'North', '8/31/2018', 809, 415
insert into #tblDetails select 'North', '9/30/2018', 730, 390
insert into #tblDetails select 'South', '1/31/2018', 630, 210
insert into #tblDetails select 'South', '2/28/2018', 550, 190
insert into #tblDetails select 'South', '3/31/2018', 607, 215
This is what I want in my summary table:
Gate CatDesc Jan Feb Mar Apr May Jun July Aug Sep Oct Nov Dec
East Number of Visitors 1000 990 970 977 960 1020 1117 1090 980 960 990 1020
East Number of Children 350 225 390 290 375 425 450 443 210 190 195 330
East % of Children 35.00 22.73 40.21 29.68 39.06 41.67 40.29 40.64 21.43 19.79 19.70 32.35
West Number of Visitors 992 980 1005 960 982 1017 1080 1045 940 925 937 958
West Number of Children 333 265 397 265 344 399 442 413 217 302 287 271
West % of Children 33.57 27.04 39.50 27.60 35.03 39.23 40.93 39.52 23.09 32.65 30.63 28.29
North Number of Visitors 0 0 0 0 0 810 835 809 730 0 0 0
North Number of Children 0 0 0 0 0 400 411 415 390 0 0 0
North % of Children 0 0 0 0 0 49.38 49.22 51.30 53.42 0 0 0
South Number of Visitors 630 550 607 0 0 0 0 0 0 0 0 0
South Number of Children 210 190 215 0 0 0 0 0 0 0 0 0
South % of Children 33.33 34.55 35.42 0 0 0 0 0 0 0 0 0
I personally prefer using as Cross Tab to using the PIVOT operator. You need need to use a VALUES clause to get your Description, and then you can use the Cross tab to get the values:
DECLARE #Year int = 2018;
SELECT D.Gate,
V.CatDesc,
ISNULL(MAX(CASE WHEN D.EOM = EOMONTH(DATEFROMPARTS(#Year, 1,1)) THEN V.KPI END),0) AS [Jan],
ISNULL(MAX(CASE WHEN D.EOM = EOMONTH(DATEFROMPARTS(#Year, 2,1)) THEN V.KPI END),0) AS [Feb],
ISNULL(MAX(CASE WHEN D.EOM = EOMONTH(DATEFROMPARTS(#Year, 3,1)) THEN V.KPI END),0) AS [Mar],
ISNULL(MAX(CASE WHEN D.EOM = EOMONTH(DATEFROMPARTS(#Year, 4,1)) THEN V.KPI END),0) AS [Apr],
ISNULL(MAX(CASE WHEN D.EOM = EOMONTH(DATEFROMPARTS(#Year, 5,1)) THEN V.KPI END),0) AS [May],
ISNULL(MAX(CASE WHEN D.EOM = EOMONTH(DATEFROMPARTS(#Year, 6,1)) THEN V.KPI END),0) AS [Jun],
ISNULL(MAX(CASE WHEN D.EOM = EOMONTH(DATEFROMPARTS(#Year, 7,1)) THEN V.KPI END),0) AS [Jul],
ISNULL(MAX(CASE WHEN D.EOM = EOMONTH(DATEFROMPARTS(#Year, 8,1)) THEN V.KPI END),0) AS [Aug],
ISNULL(MAX(CASE WHEN D.EOM = EOMONTH(DATEFROMPARTS(#Year, 9,1)) THEN V.KPI END),0) AS [Sep],
ISNULL(MAX(CASE WHEN D.EOM = EOMONTH(DATEFROMPARTS(#Year, 10,1)) THEN V.KPI END),0) AS [Oct],
ISNULL(MAX(CASE WHEN D.EOM = EOMONTH(DATEFROMPARTS(#Year, 11,1)) THEN V.KPI END),0) AS [Nov],
ISNULL(MAX(CASE WHEN D.EOM = EOMONTH(DATEFROMPARTS(#Year, 12,1)) THEN V.KPI END),0) AS [Dec]
FROM #tblDetails D
CROSS APPLY(VALUES(1,'Number of Visitors',D.TotVisitors),
(2,'Number of Children',D.TotKids),
(3,'% of Children',CONVERT(decimal(8,2),D.TotKids / (D.TotVisitors *1.0)*100)))V(Ordering,CatDesc,KPI)
GROUP BY D.Gate,
V.CatDesc,
V.Ordering
ORDER BY D.Gate,
V.Ordering;
Note that a column in SQL Server can only be one data type, so all values returned are a decimal(8,2).
Related
Looking for ways to specify the start time of a PARTITION BY statement in SQL Server.
Partitioning a years worth of data into 60 minute segments. The data is 10 minute collections from an IOT device.
Would like the partitions to start at 6AM each day.
How do I accomplish that fixed start time every day?
Here's some sample data. Want the windowing (partition) to start on the hour:
Sample data, 10 minute data sampling:
code
datetimePDT
data
AA01
12/15/2021 05:44 AM
0100
AA02
12/15/2021 05:54 AM
0105
AA03
12/15/2021 06:04 AM
0103
AA04
12/15/2021 06:14 AM
0109
AA05
12/15/2021 06:24 AM
0112
AA06
12/15/2021 06:34 AM
0115
AA07
12/15/2021 06:44 AM
0119
AA08
12/15/2021 06:54 AM
0125
AA09
12/15/2021 07:04 AM
0135
AA10
12/15/2021 07:14 AM
0155
AA11
12/15/2021 07:24 AM
0195
In a stored procedure -
Ranking by minute:
dense_rank() over (order by datepart(day,datetimePDT), datepart(hour,datetimePDT), datepart(minute,datetimePDT)) minuteRank
Grouping minutes into hours:
CEILING((minuteRank-1)/10) hourGroup
Then doing things like pulling out the average:
avg(data) over (partition by hourGroup) as GroupAVG
Prefer the hourGroup to start at 6 AM, so my GroupAVG is over the rows from 6:04 to 6:54, and the next partition is from 7-8 AM.
To make this more complicated, there may be missing data, so I can't rely on the data collection period being 10 minutes.
Want to get here:
code
datetimePDT
data
minuteRank
hourGroup
AA01
12/15/2021 05:44 AM
0100
01
NULL
AA02
12/15/2021 05:54 AM
0105
02
NULL
AA03
12/15/2021 06:04 AM
0103
03
0001
AA04
12/15/2021 06:14 AM
0109
04
0001
AA05
12/15/2021 06:24 AM
0112
05
0001
AA06
12/15/2021 06:34 AM
0115
06
0001
AA07
12/15/2021 06:44 AM
0119
07
0001
AA08
12/15/2021 06:54 AM
0125
08
0001
AA09
12/15/2021 07:04 AM
0135
09
0002
AA10
12/15/2021 07:14 AM
0155
10
0002
AA11
12/15/2021 07:24 AM
0195
11
0002
Maybe something like this?
Declare #testTable table (MyTestDate datetime);
Insert Into #testTable (MyTestDate)
Values ('2022-02-12 04:06:57.683')
, ('2022-02-12 05:06:57.683')
, ('2022-02-12 06:06:57.683')
, ('2022-02-12 07:06:57.683')
, ('2022-02-12 08:06:57.683')
, ('2022-02-12 09:06:57.683')
, ('2022-02-12 10:06:57.683')
, ('2022-02-12 11:06:57.683')
, ('2022-02-12 12:06:57.683')
, ('2022-02-12 13:06:57.683')
, ('2022-02-12 14:06:57.683')
, ('2022-02-12 15:06:57.683')
, ('2022-02-12 16:06:57.683')
, ('2022-02-12 17:06:57.683')
, ('2022-02-12 18:06:57.683')
, ('2022-02-12 19:06:57.683')
, ('2022-02-12 20:06:57.683')
, ('2022-02-12 12:06:57.683')
, ('2022-02-13 04:06:57.683')
, ('2022-02-13 05:06:57.683')
, ('2022-02-13 06:06:57.683')
, ('2022-02-13 07:06:57.683')
, ('2022-02-13 08:06:57.683')
, ('2022-02-13 09:06:57.683')
, ('2022-02-13 10:06:57.683')
, ('2022-02-13 11:06:57.683')
, ('2022-02-13 12:06:57.683')
, ('2022-02-13 13:06:57.683')
, ('2022-02-13 14:06:57.683')
, ('2022-02-13 15:06:57.683')
, ('2022-02-13 16:06:57.683')
, ('2022-02-13 17:06:57.683')
, ('2022-02-13 18:06:57.683')
, ('2022-02-13 19:06:57.683')
, ('2022-02-13 20:06:57.683')
, ('2022-02-13 12:06:57.683');
Select *
, row_number() Over(Partition By t.start_date Order By tt.MyTestDate)
From #testTable tt
Cross Apply (Values(dateadd(day, datediff(day, '09:00', tt.MyTestDate) - iif(datepart(hour, tt.MyTestDate) < 9, 1, 0), '09:00'))) As t(start_date);
What is the reasoning for PARTITION BY instead of just GROUP BY on the HOUR? You could play around with it obviously and put the aggregation into a CTE if you still want to see the individual values.
EDIT: Added a CTE and CASE expression in final select to partition as noted by Jeff.
DECLARE #Table TABLE (code VARCHAR(10), datetimePDT DATETIME, [data] INT)
INSERT INTO #Table VALUES
('AA01','12/15/2021 05:44 AM', 0100),
('AA02','12/15/2021 05:54 AM', 0105),
('AA03','12/15/2021 06:04 AM', 0103),
('AA04','12/15/2021 06:14 AM', 0109),
('AA05','12/15/2021 06:24 AM', 0112),
('AA06','12/15/2021 06:34 AM', 0115),
('AA07','12/15/2021 06:44 AM', 0119),
('AA08','12/15/2021 06:54 AM', 0125),
('AA09','12/15/2021 07:04 AM', 0135),
('AA10','12/15/2021 07:14 AM', 0155),
('AA11','12/15/2021 07:24 AM', 0195);
WITH DataAgg
AS
(
SELECT MIN(tt.code) AS FirstOfGroup,
MAX(tt.code) AS LastOfGroup,
COUNT(tt.code) AS NumberInGroup,
DATEPART(YEAR,tt.datetimePDT) AS [DataYear],
DATEPART(MONTH,tt.datetimePDT) AS [DataMonth],
DATEPART(DAY,tt.datetimePDT) AS [DataDay],
DATEPART(HOUR,tt.datetimePDT) AS [DataHour],
AVG(tt.[data]) AS AvgData
FROM #Table tt
GROUP BY DATEPART(YEAR,tt.datetimePDT),DATEPART(MONTH,tt.datetimePDT),DATEPART(DAY,tt.datetimePDT), DATEPART(HOUR,tt.datetimePDT)
)
SELECT t.code,
t.datetimePDT,
t.data,
d.AvgData AS 'HourAvg',
CASE WHEN DATEPART(HOUR,t.datetimePDT) >= 6 THEN CAST(t.datetimePDT AS DATE)
WHEN DATEPART(HOUR,t.datetimePDT) BETWEEN 1 AND 5 THEN CAST(DATEADD(DAY,-1,t.datetimePDT) AS DATE) END AS 'DataDate'
FROM #Table t
LEFT JOIN DataAgg d ON t.code BETWEEN d.FirstOfGroup AND d.LastOfGroup
FirstOfGroup
LastOfGroup
NumberInGroup
DataYear
DataMonth
DataDay
DataHour
AvgData
AA01
AA02
2
2021
12
15
5
102
AA03
AA08
6
2021
12
15
6
113
AA09
AA11
3
2021
12
15
7
161
code
datetimePDT
data
HourAvg
DataDate
AA01
2021-12-15 05:44:00.000
100
102
2021-12-14
AA02
2021-12-15 05:54:00.000
105
102
2021-12-14
AA03
2021-12-15 06:04:00.000
103
113
2021-12-15
AA04
2021-12-15 06:14:00.000
109
113
2021-12-15
AA05
2021-12-15 06:24:00.000
112
113
2021-12-15
AA06
2021-12-15 06:34:00.000
115
113
2021-12-15
AA07
2021-12-15 06:44:00.000
119
113
2021-12-15
AA08
2021-12-15 06:54:00.000
125
113
2021-12-15
AA09
2021-12-15 07:04:00.000
135
161
2021-12-15
AA10
2021-12-15 07:14:00.000
155
161
2021-12-15
AA11
2021-12-15 07:24:00.000
195
161
2021-12-15
I have a business need to project when a specific task needs to be done based on the usage of a task.
For example, you need to change the oil in your car every 3000 miles. Some days you drive 300 miles, and other days you drive 500 miles. When you hit 3000, you change the oil, and restart the counter. Based on a projected usage table, return a set of all the oil change dates.
I could do this in a table-valued function or some other 'coded' solution.
But I thought I could do it in one statement, a recursive cte perhaps.
I'm having difficulties 'joining' the next date into the WHERE of the recursive part.
And SQL doesn't like 'TOP 1' in a recursive CTE at all. :)
I would like a set like this:
This is what I've got:
WITH cte_MilesMX (RateDate,RunningRateMiles)
AS
(
-- Initial query
SELECT TOP 1 *
FROM (
SELECT
RateDate,
SUM(RateMiles) OVER (ORDER BY RateDate) AS RunningRateMiles
FROM dbo.RatesbyDay
WHERE RateDate > '2020-01-01') q1
WHERE q1.RunningRateMiles >= 3000
UNION ALL
-- Recursive part
SELECT TOP 1 *
FROM (
SELECT
rbd.RateDate,
SUM(RateMiles) OVER (ORDER BY rbd.RateDate) AS RunningRateMiles
FROM dbo.RatesbyDay rbd
JOIN cte_MilesMX cte
ON 1 = 1
WHERE rbd.RateDate > cte.RateDate) q1
WHERE q1.RunningRateMiles >= 3000
)
SELECT *
FROM cte_MilesMX
If you want to fool with this, here is the example:
Any help would be greatly appreciated.
Thanks.
CREATE TABLE RatesbyDay(
RateDate DATE,
RateMiles INT);
INSERT INTO RatesbyDay VALUES ('2020-01-01',600)
INSERT INTO RatesbyDay VALUES ('2020-01-02',450)
INSERT INTO RatesbyDay VALUES ('2020-01-03',370)
INSERT INTO RatesbyDay VALUES ('2020-01-04',700)
INSERT INTO RatesbyDay VALUES ('2020-01-05',100)
INSERT INTO RatesbyDay VALUES ('2020-01-06',480)
INSERT INTO RatesbyDay VALUES ('2020-01-07',430)
INSERT INTO RatesbyDay VALUES ('2020-01-08',200)
INSERT INTO RatesbyDay VALUES ('2020-01-09',590)
INSERT INTO RatesbyDay VALUES ('2020-01-10',380)
INSERT INTO RatesbyDay VALUES ('2020-01-11',220)
INSERT INTO RatesbyDay VALUES ('2020-01-12',320)
INSERT INTO RatesbyDay VALUES ('2020-01-13',360)
INSERT INTO RatesbyDay VALUES ('2020-01-14',600)
INSERT INTO RatesbyDay VALUES ('2020-01-15',450)
INSERT INTO RatesbyDay VALUES ('2020-01-16',475)
INSERT INTO RatesbyDay VALUES ('2020-01-17',300)
INSERT INTO RatesbyDay VALUES ('2020-01-18',190)
INSERT INTO RatesbyDay VALUES ('2020-01-19',435)
INSERT INTO RatesbyDay VALUES ('2020-01-20',285)
INSERT INTO RatesbyDay VALUES ('2020-01-21',350)
INSERT INTO RatesbyDay VALUES ('2020-01-22',410)
INSERT INTO RatesbyDay VALUES ('2020-01-23',250)
INSERT INTO RatesbyDay VALUES ('2020-01-24',300)
INSERT INTO RatesbyDay VALUES ('2020-01-25',250)
INSERT INTO RatesbyDay VALUES ('2020-01-26',650)
INSERT INTO RatesbyDay VALUES ('2020-01-27',180)
INSERT INTO RatesbyDay VALUES ('2020-01-28',280)
INSERT INTO RatesbyDay VALUES ('2020-01-29',200)
INSERT INTO RatesbyDay VALUES ('2020-01-30',100)
INSERT INTO RatesbyDay VALUES ('2020-01-31',100)
-- this returns the 1st oil change assuming we just changed it on 1-1-2020
SELECT TOP 1 *
FROM (
SELECT
RateDate,
SUM(RateMiles) OVER (ORDER BY RateDate) AS RunningRateMiles
FROM dbo.RatesbyDay
WHERE RateDate > '2020-01-01') q1
WHERE q1.RunningRateMiles >= 3000
-- the above query returned 1-9-2020 as the oil change, so when is the next one.
SELECT TOP 1 *
FROM (
SELECT
RateDate,
SUM(RateMiles) OVER (ORDER BY RateDate) AS RunningRateMiles
FROM dbo.RatesbyDay
WHERE RateDate > '2020-01-09') q1
WHERE q1.RunningRateMiles >= 3000
-- etc. etc.
SELECT TOP 1 *
FROM (
SELECT
RateDate,
SUM(RateMiles) OVER (ORDER BY RateDate) AS RunningRateMiles
FROM dbo.RatesbyDay
WHERE RateDate > '2020-01-17') q1
WHERE q1.RunningRateMiles >= 3000
SELECT TOP 1 *
FROM (
SELECT
RateDate,
SUM(RateMiles) OVER (ORDER BY RateDate) AS RunningRateMiles
FROM dbo.RatesbyDay
WHERE RateDate > '2020-01-26') q1
WHERE q1.RunningRateMiles >= 3000
This isn't a recursive CTE but it does do what you're what you're trying to do. The technique goes by a couple different names... Usually either "Quirky Update" or "Ordered Update".
First thing, notice that I added two new columns to your table and a clustered index. They are in fact necessary but if are unwilling or unable to modify the existing table, this works just as well with a #TempTable.
For more detailed information, see Solving the Running Total and Ordinal Rank Problems (Rewritten)
Also... fair warning, this technique isn't without it's detractors due to the fact that Microsoft doesn't guarantee that it will work as expected.
USE tempdb;
GO
IF OBJECT_ID('tempdb.dbo.RatesByDay', 'U') IS NOT NULL
BEGIN DROP TABLE tempdb.dbo.RatesByDay; END;
GO
CREATE TABLE tempdb.dbo.RatesByDay (
RateDate date NOT NULL
CONSTRAINT pk_RatesByDay PRIMARY KEY CLUSTERED (RateDate), -- clustered index is needed to control the direction of the update.
RateMiles int NOT NULL,
IsChangeDay bit NULL,
MilesSinceLastChange int NULL
);
GO
INSERT tempdb.dbo.RatesByDay (RateDate, RateMiles) VALUES
('2020-01-01',600),('2020-01-02',450),('2020-01-03',370),('2020-01-04',700),('2020-01-05',100),('2020-01-06',480),
('2020-01-07',430),('2020-01-08',200),('2020-01-09',590),('2020-01-10',380),('2020-01-11',220),('2020-01-12',320),
('2020-01-13',360),('2020-01-14',600),('2020-01-15',450),('2020-01-16',475),('2020-01-17',300),('2020-01-18',190),
('2020-01-19',435),('2020-01-20',285),('2020-01-21',350),('2020-01-22',410),('2020-01-23',250),('2020-01-24',300),
('2020-01-25',250),('2020-01-26',650),('2020-01-27',180),('2020-01-28',280),('2020-01-29',200),('2020-01-30',100),
('2020-01-31',100);
--=====================================================================================================================
DECLARE
#RunningMiles int = 0,
#Anchor date;
UPDATE rbd SET
#RunningMiles = rbd.MilesSinceLastChange = CASE WHEN #RunningMiles < 3000 THEN #RunningMiles ELSE 0 END + rbd.RateMiles,
rbd.IsChangeDay = CASE WHEN #RunningMiles < 3000 THEN 0 ELSE 1 END,
#Anchor = rbd.RateDate
FROM
dbo.RatesByDay rbd WITH (TABLOCKX, INDEX (1))
WHERE 1 = 1
AND rbd.RateDate > '2020-01-01'
OPTION (MAXDOP 1);
-------------------------------------
SELECT * FROM dbo.RatesByDay rbd;
And the results...
RateDate RateMiles IsChangeDay MilesSinceLastChange
---------- ----------- ----------- --------------------
2020-01-01 600 NULL NULL
2020-01-02 450 0 450
2020-01-03 370 0 820
2020-01-04 700 0 1520
2020-01-05 100 0 1620
2020-01-06 480 0 2100
2020-01-07 430 0 2530
2020-01-08 200 0 2730
2020-01-09 590 1 3320
2020-01-10 380 0 380
2020-01-11 220 0 600
2020-01-12 320 0 920
2020-01-13 360 0 1280
2020-01-14 600 0 1880
2020-01-15 450 0 2330
2020-01-16 475 0 2805
2020-01-17 300 1 3105
2020-01-18 190 0 190
2020-01-19 435 0 625
2020-01-20 285 0 910
2020-01-21 350 0 1260
2020-01-22 410 0 1670
2020-01-23 250 0 1920
2020-01-24 300 0 2220
2020-01-25 250 0 2470
2020-01-26 650 1 3120
2020-01-27 180 0 180
2020-01-28 280 0 460
2020-01-29 200 0 660
2020-01-30 100 0 760
2020-01-31 100 0 860
You can do this with a recursive query:
with
data as (select r.*, row_number() over(order by ratedate) rn from ratesbyday r),
cte as (
select d.*, ratemiles total, ratemiles newtotal from data d where rn = 1
union all
select d.*,
c.newtotal + d.ratemiles,
case when c.newtotal < 3000 and c.newtotal + d.ratemiles >= 3000 then 0 else c.newtotal + d.ratemiles end
from cte c
inner join data d on d.rn = c.rn + 1
)
select ratedate, ratemiles, total
from cte
where newtotal = 0
order by ratedate
The query starts by enumerating the rows. Then, it iteratively walks them, starting from the "first" one; everytime we exceed the 3000 miles threshold, we reset the running miles count. We can then filter on "reset" rows.
Demo on DB Fiddle:
ratedate | ratemiles | total
:--------- | --------: | ----:
2020-01-07 | 430 | 3130
2020-01-15 | 450 | 3120
2020-01-25 | 250 | 3245
If there may be more than 100 rows in your dataset, you need to add option (maxrecursion 0) at the very end of the query.
In this instance I would use a rolling agg and then use the mod operator to find the points where it hits the 3000 interval.
Using the table desc and inserts above here is an example:
-- When the mod value "resets" then the oil change is due, check this using LAG
SELECT
agg.RateDate
,agg.RateMiles
,agg.MilesAgg
,agg.MilesAgg%3000 AS ModValue
,CASE WHEN agg.MilesAgg%3000 < LAG(agg.MilesAgg) OVER(ORDER BY agg.RateDate)%3000
THEN 'Due'
ELSE 'NotDue'
END
FROM
(
--Get the rolling total of miles
SELECT
rbd.RateDate
,rbd.RateMiles
,SUM(rbd.RateMiles) OVER(ORDER BY rbd.RateDate ROWS UNBOUNDED PRECEDING) AS MilesAgg
FROM #RatesByDay rbd
) agg
Results, first day is counting the 600 miles as being AFTER the oil change
RateDate Mi MiAgg Mod IsDue?
--------------------------------------
2020-01-01 600 600 600 NotDue
2020-01-02 450 1050 1050 NotDue
2020-01-03 370 1420 1420 NotDue
2020-01-04 700 2120 2120 NotDue
2020-01-05 100 2220 2220 NotDue
2020-01-06 480 2700 2700 NotDue
2020-01-07 430 3130 130 Due
2020-01-08 200 3330 330 NotDue
2020-01-09 590 3920 920 NotDue
2020-01-10 380 4300 1300 NotDue
2020-01-11 220 4520 1520 NotDue
2020-01-12 320 4840 1840 NotDue
2020-01-13 360 5200 2200 NotDue
2020-01-14 600 5800 2800 NotDue
2020-01-15 450 6250 250 Due
2020-01-16 475 6725 725 NotDue
2020-01-17 300 7025 1025 NotDue
2020-01-18 190 7215 1215 NotDue
2020-01-19 435 7650 1650 NotDue
2020-01-20 285 7935 1935 NotDue
2020-01-21 350 8285 2285 NotDue
2020-01-22 410 8695 2695 NotDue
2020-01-23 250 8945 2945 NotDue
2020-01-24 300 9245 245 Due
2020-01-25 250 9495 495 NotDue
2020-01-26 650 10145 1145 NotDue
2020-01-27 180 10325 1325 NotDue
2020-01-28 280 10605 1605 NotDue
2020-01-29 200 10805 1805 NotDue
2020-01-30 100 10905 1905 NotDue
2020-01-31 100 11005 2005 NotDue
I have not written any SQL for an age an am struggle with the final stage of a data cleanup script. An example output of my existing script is
MRN ID ADTM SDTM WardDays WardMins
45 45_1 2016-03-24 06:28:00.000 2016-03-24 18:15:00.000 0 707
45 45_2 2016-03-24 11:07:00.000 2016-03-24 18:15:00.000 0 428
MRN ID ADTM SDTM TDays Tminutes
381 381_1 2016-01-30 00:25:00.000 2016-01-31 16:53:00.000 0 1415
381 381_1 2016-01-31 00:00:00.000 2016-01-31 16:53:00.000 0 1013
381 381_2 2016-01-31 11:30:00.000 2016-01-31 16:53:00.000 0 323
381 381_3 2016-01-31 16:53:00.000 2016-02-01 17:50:00.000 0 427
381 381_3 2016-02-01 00:00:00.000 2016-02-01 17:50:00.000 0 1070
The problem is the overlapping dates for the same [non-unique] [ID] field. For the first case, the output I want (with corrections in italics) is:
MRN ID ADTM SDTM WardDays WardMins
45 45_1 2016-03-24 06:28:00.000 _2016-03-24 11:07:00.000_ 0 335
45 45_2 2016-03-24 11:07:00.000 2016-03-24 18:15:00.000 0 428
and for the second set of records:
MRN ID ADTM SDTM TDays Tminutes
381 381_1 2016-01-30 00:25:00.000 _2016-01-31 00:00:00.000_ 0 1415
381 381_1 2016-01-31 00:00:00.000 _2016-01-31 11:30:00.000_ 0 690
381 381_2 2016-01-31 11:30:00.000 2016-01-31 16:53:00.000 0 323
381 381_3 2016-01-31 16:53:00.000 _2016-02-01 00:00:00.000_ 0 427
381 381_3 2016-02-01 00:00:00.000 2016-02-01 17:50:00.000 0 1070
So you see that I don't want the end date-time [SDTM] of any two records to overlap with the next records start date-time [ADTM]. I see this being done in two stages:
Updates the dates according to the logic outlined by the data set above.
Update the TDays and TMinutes for each record.
To setup the data set, please use:
CREATE TABLE T (
MRN int, ID varchar(5), ADTM varchar(23), SDTM varchar(23), TDays int, TMinutes int);
INSERT INTO T
(MRN, ID, ADTM, SDTM, TDays, TMinutes)
VALUES
(45, '45_1', '2016-03-24 06:28:00.000', '2016-03-24 18:15:00.000', 0, 707),
(45, '45_2', '2016-03-24 11:07:00.000', '2016-03-24 18:15:00.000', 0, 428),
(381, '381_1', '2016-01-30 00:25:00.000', '2016-01-31 16:53:00.000', 0, 1415),
(381, '381_1', '2016-01-31 00:00:00.000', '2016-01-31 16:53:00.000', 0, 1013),
(381, '381_3', '2016-01-31 16:53:00.000', '2016-02-01 17:50:00.000', 0, 427),
(381, '381_3', '2016-02-01 00:00:00.000', '2016-02-01 17:50:00.000', 0, 1070),
(381, '381_2', '2016-01-31 11:30:00.000', '2016-01-31 16:53:00.000', 0, 323);
For part 1. I have been toying with a CTE query, but this is merely merging overlapping records. I need to query the preceding record to check for the required conditions and I am getting lost big-time.
; WITH StartD AS
(
SELECT ID, ADTM, ROW_NUMBER()
OVER(PARTITION BY ID ORDER BY ADTM) AS Rn
FROM
WD AS t
WHERE
NOT EXISTS
(
SELECT *
FROM WD AS p
WHERE p.ID = t.ID
AND p.ADTM < t.ADTM
AND t.ADTM <= DATEADD(day, 1, p.SDTM)
)
) , EndD AS
(
SELECT ID, SDTM, ROW_NUMBER()
OVER(PARTITION BY ID ORDER BY SDTM) AS Rn
FROM
WD AS t
WHERE
NOT EXISTS
(
SELECT *
FROM WD AS p
WHERE p.ID = t.ID
AND DATEADD(day, -1, p.ADTM) <= t.SDTM
AND t.SDTM < p.SDTM
)
) SELECT s.ID, s.ADTM, e.SDTM
FROM StartD AS s JOIN EndD AS e
ON e.ID = s.ID AND e.Rn = s.Rn;
Can someone give me any advice about how this can be done?
Thanks for your time.
This case is not getting fixed with the accepted answer:
MRN ID ADTM SDTM TDays Tminutes
381 381_1 2016-01-30 00:25:00.000 2016-01-31 00:00:00.000 0 1415
381 381_2 2016-01-31 11:30:00.000 2016-02-01 00:00:00.000 0 323
381 381_3 2016-01-31 16:53:00.000 2016-02-01 00:00:00.000 0 1070
New table is:
CREATE TABLE T (
MRN int, ID varchar(5), ADTM varchar(23), SDTM varchar(23), TDays int, TMinutes int);
INSERT INTO T
(MRN, ID, ADTM, SDTM, TDays, TMinutes)
VALUES
(45, '45_1', '2016-03-24 06:28:00.000', '2016-03-24 18:15:00.000', 0, 707),
(45, '45_2', '2016-03-24 11:07:00.000', '2016-03-24 18:15:00.000', 0, 428),
(381, '381_1', '2016-01-30 00:25:00.000', '2016-01-31 00:00:00.000', 0, 1415),
(381, '381_2', '2016-01-31 11:30:00.000', '2016-02-01 00:00:00.000', 0, 323),
(381, '381_3', '2016-01-31 16:53:00.000', '2016-02-01 00:00:00.000', 0, 427);
this should get you what you want in sql 2008
SELECT t1.ID,
t1.ADTM,
COALESCE(t2.ADTM,t1.SDTM) SDTM,
DATEDIFF(MINUTE,t1.ADTM,COALESCE(t2.ADTM,t1.SDTM)) Tminutes
FROM T t1
OUTER APPLY (SELECT TOP 1
*
FROM T t2
WHERE t2.MRN = t1.MRN
AND t2.ADTM > t1.ADTM
AND t2.ADTM <> t1.SDTM
ORDER BY adtm
) t2
ORDER BY t1.ID
This seems to be the right way to start:
declare #T table ( MRN int, ID varchar(5), ADTM varchar(23), SDTM varchar(23),
TDays int, TMinutes int);
INSERT INTO #T (MRN, ID, ADTM, SDTM, TDays, TMinutes) VALUES
(45, '45_1', '2016-03-24 06:28:00.000', '2016-03-24 18:15:00.000', 0, 707),
(45, '45_2', '2016-03-24 11:07:00.000', '2016-03-24 18:15:00.000', 0, 428),
(381, '381_1', '2016-01-30 00:25:00.000', '2016-01-31 16:53:00.000', 0, 1415),
(381, '381_1', '2016-01-31 00:00:00.000', '2016-01-31 16:53:00.000', 0, 1013),
(381, '381_3', '2016-01-31 16:53:00.000', '2016-02-01 17:50:00.000', 0, 427),
(381, '381_3', '2016-02-01 00:00:00.000', '2016-02-01 17:50:00.000', 0, 1070),
(381, '381_2', '2016-01-31 11:30:00.000', '2016-01-31 16:53:00.000', 0, 323);
;With Ordered as (
select
*,
ROW_NUMBER() OVER (PARTITION BY MRN order by ADTM) as rn
from
#T
), Ends as (
select
o1.MRN,
o1.ID,
o1.ADTM,
CASE WHEN o2.ADTM < o1.SDTM THEN o2.ADTM ELSE o1.SDTM END as SDTM
from
Ordered o1
left join
Ordered o2
on
o1.MRN = o2.MRN and
o1.rn= o2.rn - 1
)
select
*,
DATEDIFF(minute,ADTM,SDTM) as TMinutes
from Ends
Results:
MRN ID ADTM SDTM TMinutes
----------- ----- ----------------------- ----------------------- -----------
45 45_1 2016-03-24 06:28:00.000 2016-03-24 11:07:00.000 279
45 45_2 2016-03-24 11:07:00.000 2016-03-24 18:15:00.000 428
381 381_1 2016-01-30 00:25:00.000 2016-01-31 00:00:00.000 1415
381 381_1 2016-01-31 00:00:00.000 2016-01-31 11:30:00.000 690
381 381_2 2016-01-31 11:30:00.000 2016-01-31 16:53:00.000 323
381 381_3 2016-01-31 16:53:00.000 2016-02-01 00:00:00.000 427
381 381_3 2016-02-01 00:00:00.000 2016-02-01 17:50:00.000 1070
Unless your sample data is incomplete or I'm missing something, we just always match each row with the next row after (just sorting them by ADTM) and then either take the current SDTM or the next rows ADTM, whichever comes earlier (via the CASE).
I have a problem with creating a query with distinct and top. What i want is a top 25 with maximum values and a distinct by the parameter column.
My query now is:
select distinct top 25
startDate, parameter, min, max, avg, amount_called
from
VisualisatieData.dbo.metric_data_by_day_parameter
where
startDate between '2013-05-30 08:46' and '2013-05-31 16:00'
and endDate between '2013-05-30 08:46' and '2013-05-31 16:00'
order by
max desc
This returns the following:
2013-05-31 01:08:26.000 P1 0 318386 1662 795
2013-05-31 00:01:36.000 P2 0 312325 1554 806
2013-05-31 00:01:36.000 P3 0 124827 25877 14
2013-05-30 08:49:19.000 P4 0 91992 11381 54
2013-05-31 01:05:54.000 P5 47 42410 497 499
2013-05-31 01:05:54.000 P6 16 42395 469 499
2013-05-31 01:05:55.000 P7 0 41380 244 498
2013-05-31 00:01:36.000 P8 328 35225 5305 8
2013-05-31 05:34:10.000 P4 16 12137 1208 17
2013-05-31 03:50:18.000 P9 0 11137 4687 23
2013-05-31 01:23:41.000 P10 391 8013 3237 95
2013-05-31 01:23:41.000 P11 375 7998 3174 98
2013-05-31 01:19:55.000 P12 453 7263 2437 58
2013-05-31 07:57:05.000 P13 2343 5639 3991 2
2013-05-31 03:32:21.000 P14 1687 5077 2993 9
2013-05-30 08:48:57.000 P15 984 5061 2419 12
2013-05-30 08:48:57.000 P16 984 5061 2419 12
2013-05-31 01:40:37.000 P15 1281 5045 2619 10
2013-05-31 01:40:37.000 P16 1281 5045 2619 10
2013-05-31 03:08:51.000 P17 562 4608 1302 18
2013-05-30 16:59:05.000 P18 4202 4202 4202 1
2013-05-30 16:59:05.000 P19 4202 4202 4202 1
2013-05-31 03:37:30.000 P20 875 4139 2681 18
2013-05-31 03:08:51.000 P21 547 3999 1203 18
2013-05-31 03:19:17.000 P22 31 3702 1399 5
This time there are 3 duplicate parameters what i dont want. Depending on the time selection there are more duplicates. I think this doesnt work because the distinct must be applied on only the parameter column.
I tried the following:
SELECT DISTINCT TOP 25 startDate, parameter, min, max, avg, amount_called
FROM
( SELECT startDate, endDate, parameter, min, max, avg, amount_called, ROW_NUMBER() over(partition by parameter order by max desc) subselect
FROM VisualisatieData.dbo.metric_data_by_day_parameter
) A
where startDate between '2013-05-30 08:46' and '2013-05-31 16:00' and endDate between '2013-05-30 08:46' and '2013-05-31 16:00'
ORDER BY max desc
But this doesnt work either, it returns the same as the first query.
I hope i described my problem clearly, if you want more information ask me.
How can i change my query so i get a top 25 with maximum values and no duplicate parameters? Suggestions are appreciated!
Thanks in advance!
Try to
select distinct top 25
startDate, parameter, min, max, avg, amount_called
from VisualisatieData.dbo.metric_data_by_day_parameter as tb
where
startDate between '2013-05-30 08:46' and '2013-05-31 16:00'
and endDate between '2013-05-30 08:46' and '2013-05-31 16:00'
and max = (select max(max)
from VisualisatieData.dbo.metric_data_by_day_parameter
where startDate between '2013-05-30 08:46' and '2013-05-31 16:00'
and endDate between '2013-05-30 08:46' and '2013-05-31 16:00'
and parameter = tb.parameter )
order by
max desc
then the same parameter will be removed
try this
SELECT * FROM (
SELECT DISTINCT startDate, parameter, min, max, avg, amount_called
FROM
( SELECT startDate, endDate, parameter, min, max, avg, amount_called, ROW_NUMBER() over(partition by parameter order by max desc) subselect
FROM VisualisatieData.dbo.metric_data_by_day_parameter
) A
where startDate between '2013-05-30 08:46' and '2013-05-31 16:00' and endDate between '2013-05-30 08:46' and '2013-05-31 16:00'
ORDER BY max desc
) as mytop LIMIT 25;
I am working on query where I need next nth no of row. My table structure is like
ID StockName StockDate DayOpen DayHigh DayLow DayClose
--------------------------------------------------------------------
60 IDBI 2014-01-01 66.50 67.80 66.50 67.60
197 IDBI 2014-01-02 67.55 69.20 65.25 65.60
334 IDBI 2014-01-03 65.00 66.40 64.35 66.15
471 IDBI 2014-01-06 66.15 66.35 65.10 65.55
608 IDBI 2014-01-07 66.10 66.15 63.85 64.25
745 IDBI 2014-01-08 64.00 67.10 63.10 66.80
882 IDBI 2014-01-09 66.60 67.80 64.50 64.75
1019 IDBI 2014-01-10 65.00 65.90 63.75 64.10
1156 IDBI 2014-01-13 63.85 65.00 63.25 64.20
1293 IDBI 2014-01-14 64.00 64.95 63.80 64.05
What I want from output is column name which will give me next 5th row date
E.g. For 1st row new column should return value of next 5th row date ie 2014-01-08 same for 2nd row it should return 2014-01-09 date.
And I can't use datediff with -5 day count as these are working day data excluding weekend days
How do I get this value without using while loop or cursor?
Use a CTE to return the base table plus ROW_NUMBER so that you can LEFT JOIN the CTE to itself on the ROW_NUMBER of the new [Next5thDate] column being 5 rows ahead of current row:
SET NOCOUNT ON;
SET ANSI_NULLS ON;
DECLARE #Data TABLE (
ID INT NOT NULL PRIMARY KEY CLUSTERED,
StockName VARCHAR(50) NOT NULL,
StockDate DATE NOT NULL,
DayOpen MONEY NOT NULL,
DayHigh MONEY NOT NULL,
DayLow MONEY NOT NULL,
DayClose MONEY NOT NULL,
UNIQUE(StockDate)
)
INSERT INTO #Data VALUES (60, 'IDBI', '2014-01-01', 66.50, 67.80, 66.50, 67.60)
INSERT INTO #Data VALUES (197, 'IDBI', '2014-01-02', 67.55, 69.20, 65.25, 65.60)
INSERT INTO #Data VALUES (334, 'IDBI', '2014-01-03', 65.00, 66.40, 64.35, 66.15)
INSERT INTO #Data VALUES (471, 'IDBI', '2014-01-06', 66.15, 66.35, 65.10, 65.55)
INSERT INTO #Data VALUES (608, 'IDBI', '2014-01-07', 66.10, 66.15, 63.85, 64.25)
INSERT INTO #Data VALUES (745, 'IDBI', '2014-01-08', 64.00, 67.10, 63.10, 66.80)
INSERT INTO #Data VALUES (882, 'IDBI', '2014-01-09', 66.60, 67.80, 64.50, 64.75)
INSERT INTO #Data VALUES (1019, 'IDBI', '2014-01-10', 65.00, 65.90, 63.75, 64.10)
INSERT INTO #Data VALUES (1156, 'IDBI', '2014-01-13', 63.85, 65.00, 63.25, 64.20)
INSERT INTO #Data VALUES (1293, 'IDBI', '2014-01-14', 64.00, 64.95, 63.80, 64.05)
;WITH cte AS
(
SELECT d.*, ROW_NUMBER() OVER (ORDER BY d.StockDate ASC) AS [RowNum]
FROM #Data d
)
SELECT d1.ID, d1.StockName, d1.StockDate, d1.DayOpen, d1.DayHigh,
d1.DayLow, d1.DayClose, d2.StockDate AS [Next5thDate]
FROM cte d1
LEFT JOIN cte d2
ON d2.RowNum = (d1.RowNum + 5)
Results:
ID StockName StockDate DayOpen DayHigh DayLow DayClose Next5thDate
60 IDBI 2014-01-01 66.50 67.80 66.50 67.60 2014-01-08
197 IDBI 2014-01-02 67.55 69.20 65.25 65.60 2014-01-09
334 IDBI 2014-01-03 65.00 66.40 64.35 66.15 2014-01-10
471 IDBI 2014-01-06 66.15 66.35 65.10 65.55 2014-01-13
608 IDBI 2014-01-07 66.10 66.15 63.85 64.25 2014-01-14
745 IDBI 2014-01-08 64.00 67.10 63.10 66.80 NULL
882 IDBI 2014-01-09 66.60 67.80 64.50 64.75 NULL
1019 IDBI 2014-01-10 65.00 65.90 63.75 64.10 NULL
1156 IDBI 2014-01-13 63.85 65.00 63.25 64.20 NULL
1293 IDBI 2014-01-14 64.00 64.95 63.80 64.05 NULL
With RnkedItems As
(
Select Id, StockName, StockDate, DayOpen, DayHigh, DayLow, DayClose
, Row_Number() Over ( Order By StockDate, Id ) As Rnk
From MyTable
)
Select ...
From RnkedItems As Original
Left Join RnkedItems
On RnkedItems.Rnk = Original.Rnk + 5
I'm assuming you want to return both the target row and the row five entries after the target row on the same line.
Try this:
;with EnumeratedStocks as (
select rn = row_number() over(order by StockDate), *
from Stocks
)
select * from EnumeratedStocks es where rn > 5
You can account for next 5th date falling on weekends by extracting name of day from date that falls on next 5th day by skipping date by two extra more days if its Saturday and by one extra day if its Sunday.
Select * , Case DAYNAME( dateadd(day,5,stockdate))
When 'Saturday' Then dateadd(day,7,stockdate)
When 'Sunday' Then dateadd(day,6,stockdate)
Else dateadd(day,6,stockdate) End As newStockDate
From tableName