I have not written any SQL for an age an am struggle with the final stage of a data cleanup script. An example output of my existing script is
MRN ID ADTM SDTM WardDays WardMins
45 45_1 2016-03-24 06:28:00.000 2016-03-24 18:15:00.000 0 707
45 45_2 2016-03-24 11:07:00.000 2016-03-24 18:15:00.000 0 428
MRN ID ADTM SDTM TDays Tminutes
381 381_1 2016-01-30 00:25:00.000 2016-01-31 16:53:00.000 0 1415
381 381_1 2016-01-31 00:00:00.000 2016-01-31 16:53:00.000 0 1013
381 381_2 2016-01-31 11:30:00.000 2016-01-31 16:53:00.000 0 323
381 381_3 2016-01-31 16:53:00.000 2016-02-01 17:50:00.000 0 427
381 381_3 2016-02-01 00:00:00.000 2016-02-01 17:50:00.000 0 1070
The problem is the overlapping dates for the same [non-unique] [ID] field. For the first case, the output I want (with corrections in italics) is:
MRN ID ADTM SDTM WardDays WardMins
45 45_1 2016-03-24 06:28:00.000 _2016-03-24 11:07:00.000_ 0 335
45 45_2 2016-03-24 11:07:00.000 2016-03-24 18:15:00.000 0 428
and for the second set of records:
MRN ID ADTM SDTM TDays Tminutes
381 381_1 2016-01-30 00:25:00.000 _2016-01-31 00:00:00.000_ 0 1415
381 381_1 2016-01-31 00:00:00.000 _2016-01-31 11:30:00.000_ 0 690
381 381_2 2016-01-31 11:30:00.000 2016-01-31 16:53:00.000 0 323
381 381_3 2016-01-31 16:53:00.000 _2016-02-01 00:00:00.000_ 0 427
381 381_3 2016-02-01 00:00:00.000 2016-02-01 17:50:00.000 0 1070
So you see that I don't want the end date-time [SDTM] of any two records to overlap with the next records start date-time [ADTM]. I see this being done in two stages:
Updates the dates according to the logic outlined by the data set above.
Update the TDays and TMinutes for each record.
To setup the data set, please use:
CREATE TABLE T (
MRN int, ID varchar(5), ADTM varchar(23), SDTM varchar(23), TDays int, TMinutes int);
INSERT INTO T
(MRN, ID, ADTM, SDTM, TDays, TMinutes)
VALUES
(45, '45_1', '2016-03-24 06:28:00.000', '2016-03-24 18:15:00.000', 0, 707),
(45, '45_2', '2016-03-24 11:07:00.000', '2016-03-24 18:15:00.000', 0, 428),
(381, '381_1', '2016-01-30 00:25:00.000', '2016-01-31 16:53:00.000', 0, 1415),
(381, '381_1', '2016-01-31 00:00:00.000', '2016-01-31 16:53:00.000', 0, 1013),
(381, '381_3', '2016-01-31 16:53:00.000', '2016-02-01 17:50:00.000', 0, 427),
(381, '381_3', '2016-02-01 00:00:00.000', '2016-02-01 17:50:00.000', 0, 1070),
(381, '381_2', '2016-01-31 11:30:00.000', '2016-01-31 16:53:00.000', 0, 323);
For part 1. I have been toying with a CTE query, but this is merely merging overlapping records. I need to query the preceding record to check for the required conditions and I am getting lost big-time.
; WITH StartD AS
(
SELECT ID, ADTM, ROW_NUMBER()
OVER(PARTITION BY ID ORDER BY ADTM) AS Rn
FROM
WD AS t
WHERE
NOT EXISTS
(
SELECT *
FROM WD AS p
WHERE p.ID = t.ID
AND p.ADTM < t.ADTM
AND t.ADTM <= DATEADD(day, 1, p.SDTM)
)
) , EndD AS
(
SELECT ID, SDTM, ROW_NUMBER()
OVER(PARTITION BY ID ORDER BY SDTM) AS Rn
FROM
WD AS t
WHERE
NOT EXISTS
(
SELECT *
FROM WD AS p
WHERE p.ID = t.ID
AND DATEADD(day, -1, p.ADTM) <= t.SDTM
AND t.SDTM < p.SDTM
)
) SELECT s.ID, s.ADTM, e.SDTM
FROM StartD AS s JOIN EndD AS e
ON e.ID = s.ID AND e.Rn = s.Rn;
Can someone give me any advice about how this can be done?
Thanks for your time.
This case is not getting fixed with the accepted answer:
MRN ID ADTM SDTM TDays Tminutes
381 381_1 2016-01-30 00:25:00.000 2016-01-31 00:00:00.000 0 1415
381 381_2 2016-01-31 11:30:00.000 2016-02-01 00:00:00.000 0 323
381 381_3 2016-01-31 16:53:00.000 2016-02-01 00:00:00.000 0 1070
New table is:
CREATE TABLE T (
MRN int, ID varchar(5), ADTM varchar(23), SDTM varchar(23), TDays int, TMinutes int);
INSERT INTO T
(MRN, ID, ADTM, SDTM, TDays, TMinutes)
VALUES
(45, '45_1', '2016-03-24 06:28:00.000', '2016-03-24 18:15:00.000', 0, 707),
(45, '45_2', '2016-03-24 11:07:00.000', '2016-03-24 18:15:00.000', 0, 428),
(381, '381_1', '2016-01-30 00:25:00.000', '2016-01-31 00:00:00.000', 0, 1415),
(381, '381_2', '2016-01-31 11:30:00.000', '2016-02-01 00:00:00.000', 0, 323),
(381, '381_3', '2016-01-31 16:53:00.000', '2016-02-01 00:00:00.000', 0, 427);
this should get you what you want in sql 2008
SELECT t1.ID,
t1.ADTM,
COALESCE(t2.ADTM,t1.SDTM) SDTM,
DATEDIFF(MINUTE,t1.ADTM,COALESCE(t2.ADTM,t1.SDTM)) Tminutes
FROM T t1
OUTER APPLY (SELECT TOP 1
*
FROM T t2
WHERE t2.MRN = t1.MRN
AND t2.ADTM > t1.ADTM
AND t2.ADTM <> t1.SDTM
ORDER BY adtm
) t2
ORDER BY t1.ID
This seems to be the right way to start:
declare #T table ( MRN int, ID varchar(5), ADTM varchar(23), SDTM varchar(23),
TDays int, TMinutes int);
INSERT INTO #T (MRN, ID, ADTM, SDTM, TDays, TMinutes) VALUES
(45, '45_1', '2016-03-24 06:28:00.000', '2016-03-24 18:15:00.000', 0, 707),
(45, '45_2', '2016-03-24 11:07:00.000', '2016-03-24 18:15:00.000', 0, 428),
(381, '381_1', '2016-01-30 00:25:00.000', '2016-01-31 16:53:00.000', 0, 1415),
(381, '381_1', '2016-01-31 00:00:00.000', '2016-01-31 16:53:00.000', 0, 1013),
(381, '381_3', '2016-01-31 16:53:00.000', '2016-02-01 17:50:00.000', 0, 427),
(381, '381_3', '2016-02-01 00:00:00.000', '2016-02-01 17:50:00.000', 0, 1070),
(381, '381_2', '2016-01-31 11:30:00.000', '2016-01-31 16:53:00.000', 0, 323);
;With Ordered as (
select
*,
ROW_NUMBER() OVER (PARTITION BY MRN order by ADTM) as rn
from
#T
), Ends as (
select
o1.MRN,
o1.ID,
o1.ADTM,
CASE WHEN o2.ADTM < o1.SDTM THEN o2.ADTM ELSE o1.SDTM END as SDTM
from
Ordered o1
left join
Ordered o2
on
o1.MRN = o2.MRN and
o1.rn= o2.rn - 1
)
select
*,
DATEDIFF(minute,ADTM,SDTM) as TMinutes
from Ends
Results:
MRN ID ADTM SDTM TMinutes
----------- ----- ----------------------- ----------------------- -----------
45 45_1 2016-03-24 06:28:00.000 2016-03-24 11:07:00.000 279
45 45_2 2016-03-24 11:07:00.000 2016-03-24 18:15:00.000 428
381 381_1 2016-01-30 00:25:00.000 2016-01-31 00:00:00.000 1415
381 381_1 2016-01-31 00:00:00.000 2016-01-31 11:30:00.000 690
381 381_2 2016-01-31 11:30:00.000 2016-01-31 16:53:00.000 323
381 381_3 2016-01-31 16:53:00.000 2016-02-01 00:00:00.000 427
381 381_3 2016-02-01 00:00:00.000 2016-02-01 17:50:00.000 1070
Unless your sample data is incomplete or I'm missing something, we just always match each row with the next row after (just sorting them by ADTM) and then either take the current SDTM or the next rows ADTM, whichever comes earlier (via the CASE).
Related
Looking for ways to specify the start time of a PARTITION BY statement in SQL Server.
Partitioning a years worth of data into 60 minute segments. The data is 10 minute collections from an IOT device.
Would like the partitions to start at 6AM each day.
How do I accomplish that fixed start time every day?
Here's some sample data. Want the windowing (partition) to start on the hour:
Sample data, 10 minute data sampling:
code
datetimePDT
data
AA01
12/15/2021 05:44 AM
0100
AA02
12/15/2021 05:54 AM
0105
AA03
12/15/2021 06:04 AM
0103
AA04
12/15/2021 06:14 AM
0109
AA05
12/15/2021 06:24 AM
0112
AA06
12/15/2021 06:34 AM
0115
AA07
12/15/2021 06:44 AM
0119
AA08
12/15/2021 06:54 AM
0125
AA09
12/15/2021 07:04 AM
0135
AA10
12/15/2021 07:14 AM
0155
AA11
12/15/2021 07:24 AM
0195
In a stored procedure -
Ranking by minute:
dense_rank() over (order by datepart(day,datetimePDT), datepart(hour,datetimePDT), datepart(minute,datetimePDT)) minuteRank
Grouping minutes into hours:
CEILING((minuteRank-1)/10) hourGroup
Then doing things like pulling out the average:
avg(data) over (partition by hourGroup) as GroupAVG
Prefer the hourGroup to start at 6 AM, so my GroupAVG is over the rows from 6:04 to 6:54, and the next partition is from 7-8 AM.
To make this more complicated, there may be missing data, so I can't rely on the data collection period being 10 minutes.
Want to get here:
code
datetimePDT
data
minuteRank
hourGroup
AA01
12/15/2021 05:44 AM
0100
01
NULL
AA02
12/15/2021 05:54 AM
0105
02
NULL
AA03
12/15/2021 06:04 AM
0103
03
0001
AA04
12/15/2021 06:14 AM
0109
04
0001
AA05
12/15/2021 06:24 AM
0112
05
0001
AA06
12/15/2021 06:34 AM
0115
06
0001
AA07
12/15/2021 06:44 AM
0119
07
0001
AA08
12/15/2021 06:54 AM
0125
08
0001
AA09
12/15/2021 07:04 AM
0135
09
0002
AA10
12/15/2021 07:14 AM
0155
10
0002
AA11
12/15/2021 07:24 AM
0195
11
0002
Maybe something like this?
Declare #testTable table (MyTestDate datetime);
Insert Into #testTable (MyTestDate)
Values ('2022-02-12 04:06:57.683')
, ('2022-02-12 05:06:57.683')
, ('2022-02-12 06:06:57.683')
, ('2022-02-12 07:06:57.683')
, ('2022-02-12 08:06:57.683')
, ('2022-02-12 09:06:57.683')
, ('2022-02-12 10:06:57.683')
, ('2022-02-12 11:06:57.683')
, ('2022-02-12 12:06:57.683')
, ('2022-02-12 13:06:57.683')
, ('2022-02-12 14:06:57.683')
, ('2022-02-12 15:06:57.683')
, ('2022-02-12 16:06:57.683')
, ('2022-02-12 17:06:57.683')
, ('2022-02-12 18:06:57.683')
, ('2022-02-12 19:06:57.683')
, ('2022-02-12 20:06:57.683')
, ('2022-02-12 12:06:57.683')
, ('2022-02-13 04:06:57.683')
, ('2022-02-13 05:06:57.683')
, ('2022-02-13 06:06:57.683')
, ('2022-02-13 07:06:57.683')
, ('2022-02-13 08:06:57.683')
, ('2022-02-13 09:06:57.683')
, ('2022-02-13 10:06:57.683')
, ('2022-02-13 11:06:57.683')
, ('2022-02-13 12:06:57.683')
, ('2022-02-13 13:06:57.683')
, ('2022-02-13 14:06:57.683')
, ('2022-02-13 15:06:57.683')
, ('2022-02-13 16:06:57.683')
, ('2022-02-13 17:06:57.683')
, ('2022-02-13 18:06:57.683')
, ('2022-02-13 19:06:57.683')
, ('2022-02-13 20:06:57.683')
, ('2022-02-13 12:06:57.683');
Select *
, row_number() Over(Partition By t.start_date Order By tt.MyTestDate)
From #testTable tt
Cross Apply (Values(dateadd(day, datediff(day, '09:00', tt.MyTestDate) - iif(datepart(hour, tt.MyTestDate) < 9, 1, 0), '09:00'))) As t(start_date);
What is the reasoning for PARTITION BY instead of just GROUP BY on the HOUR? You could play around with it obviously and put the aggregation into a CTE if you still want to see the individual values.
EDIT: Added a CTE and CASE expression in final select to partition as noted by Jeff.
DECLARE #Table TABLE (code VARCHAR(10), datetimePDT DATETIME, [data] INT)
INSERT INTO #Table VALUES
('AA01','12/15/2021 05:44 AM', 0100),
('AA02','12/15/2021 05:54 AM', 0105),
('AA03','12/15/2021 06:04 AM', 0103),
('AA04','12/15/2021 06:14 AM', 0109),
('AA05','12/15/2021 06:24 AM', 0112),
('AA06','12/15/2021 06:34 AM', 0115),
('AA07','12/15/2021 06:44 AM', 0119),
('AA08','12/15/2021 06:54 AM', 0125),
('AA09','12/15/2021 07:04 AM', 0135),
('AA10','12/15/2021 07:14 AM', 0155),
('AA11','12/15/2021 07:24 AM', 0195);
WITH DataAgg
AS
(
SELECT MIN(tt.code) AS FirstOfGroup,
MAX(tt.code) AS LastOfGroup,
COUNT(tt.code) AS NumberInGroup,
DATEPART(YEAR,tt.datetimePDT) AS [DataYear],
DATEPART(MONTH,tt.datetimePDT) AS [DataMonth],
DATEPART(DAY,tt.datetimePDT) AS [DataDay],
DATEPART(HOUR,tt.datetimePDT) AS [DataHour],
AVG(tt.[data]) AS AvgData
FROM #Table tt
GROUP BY DATEPART(YEAR,tt.datetimePDT),DATEPART(MONTH,tt.datetimePDT),DATEPART(DAY,tt.datetimePDT), DATEPART(HOUR,tt.datetimePDT)
)
SELECT t.code,
t.datetimePDT,
t.data,
d.AvgData AS 'HourAvg',
CASE WHEN DATEPART(HOUR,t.datetimePDT) >= 6 THEN CAST(t.datetimePDT AS DATE)
WHEN DATEPART(HOUR,t.datetimePDT) BETWEEN 1 AND 5 THEN CAST(DATEADD(DAY,-1,t.datetimePDT) AS DATE) END AS 'DataDate'
FROM #Table t
LEFT JOIN DataAgg d ON t.code BETWEEN d.FirstOfGroup AND d.LastOfGroup
FirstOfGroup
LastOfGroup
NumberInGroup
DataYear
DataMonth
DataDay
DataHour
AvgData
AA01
AA02
2
2021
12
15
5
102
AA03
AA08
6
2021
12
15
6
113
AA09
AA11
3
2021
12
15
7
161
code
datetimePDT
data
HourAvg
DataDate
AA01
2021-12-15 05:44:00.000
100
102
2021-12-14
AA02
2021-12-15 05:54:00.000
105
102
2021-12-14
AA03
2021-12-15 06:04:00.000
103
113
2021-12-15
AA04
2021-12-15 06:14:00.000
109
113
2021-12-15
AA05
2021-12-15 06:24:00.000
112
113
2021-12-15
AA06
2021-12-15 06:34:00.000
115
113
2021-12-15
AA07
2021-12-15 06:44:00.000
119
113
2021-12-15
AA08
2021-12-15 06:54:00.000
125
113
2021-12-15
AA09
2021-12-15 07:04:00.000
135
161
2021-12-15
AA10
2021-12-15 07:14:00.000
155
161
2021-12-15
AA11
2021-12-15 07:24:00.000
195
161
2021-12-15
I have some data by date and need to summarize by month. I think I can use PIVOT function in sql server 2012 but I cant get this right.
For simplicity purpose tables are called details and summary, details has visitor counts by month and gate. Summary table has Gate, Visitor type and 12 columns for each month.
DECLARE #tblDetails TABLE (
Gate varchar(15), EOM Date, TotVisitors decimal(10,2), TotKids decimal(10,2)
);
DECLARE #tblSumm TABLE (
Gate varchar(15), CatDesc varchar(50),
Jan decimal(10,2), Feb decimal(10,2), Mar decimal(10,2),
Apr decimal(10,2), May decimal(10,2), Jun decimal(10,2),
Jul decimal(10,2), Aug decimal(10,2), Sep decimal(10,2),
Oct decimal(10,2), Nov decimal(10,2), Dec decimal(10,2)
);
insert into #tblDetails select 'East', '1/31/2018', 1000, 350
insert into #tblDetails select 'East', '2/28/2018', 990,225
insert into #tblDetails select 'East', '3/31/2018', 970, 390
insert into #tblDetails select 'East', '4/30/2018', 977, 290
insert into #tblDetails select 'East', '5/31/2018', 960, 375
insert into #tblDetails select 'East', '6/30/2018', 1020, 425
insert into #tblDetails select 'East', '7/31/2018', 1117, 450
insert into #tblDetails select 'East', '8/31/2018', 1090, 443
insert into #tblDetails select 'East', '9/30/2018', 980, 210
insert into #tblDetails select 'East', '10/31/2018', 960, 190
insert into #tblDetails select 'East', '11/30/2018', 990, 195
insert into #tblDetails select 'East', '12/31/2018', 1020, 330
insert into #tblDetails select 'West', '1/31/2018', 992, 333
insert into #tblDetails select 'West', '2/28/2018', 980, 265
insert into #tblDetails select 'West', '3/31/2018', 1005, 397
insert into #tblDetails select 'West', '4/30/2018', 960, 265
insert into #tblDetails select 'West', '5/31/2018', 982, 344
insert into #tblDetails select 'West', '6/30/2018', 1017, 399
insert into #tblDetails select 'West', '7/31/2018', 1080, 442
insert into #tblDetails select 'West', '8/31/2018', 1045, 413
insert into #tblDetails select 'West', '9/30/2018', 940, 217
insert into #tblDetails select 'West', '10/31/2018', 925, 302
insert into #tblDetails select 'West', '11/30/2018', 937, 287
insert into #tblDetails select 'West', '12/31/2018', 958, 271
insert into #tblDetails select 'North', '5/31/2018', 780, 380
insert into #tblDetails select 'North', '6/30/2018', 810, 400
insert into #tblDetails select 'North', '7/31/2018', 835, 411
insert into #tblDetails select 'North', '8/31/2018', 809, 415
insert into #tblDetails select 'North', '9/30/2018', 730, 390
insert into #tblDetails select 'South', '1/31/2018', 630, 210
insert into #tblDetails select 'South', '2/28/2018', 550, 190
insert into #tblDetails select 'South', '3/31/2018', 607, 215
This is what I want in my summary table:
Gate CatDesc Jan Feb Mar Apr May Jun July Aug Sep Oct Nov Dec
East Number of Visitors 1000 990 970 977 960 1020 1117 1090 980 960 990 1020
East Number of Children 350 225 390 290 375 425 450 443 210 190 195 330
East % of Children 35.00 22.73 40.21 29.68 39.06 41.67 40.29 40.64 21.43 19.79 19.70 32.35
West Number of Visitors 992 980 1005 960 982 1017 1080 1045 940 925 937 958
West Number of Children 333 265 397 265 344 399 442 413 217 302 287 271
West % of Children 33.57 27.04 39.50 27.60 35.03 39.23 40.93 39.52 23.09 32.65 30.63 28.29
North Number of Visitors 0 0 0 0 0 810 835 809 730 0 0 0
North Number of Children 0 0 0 0 0 400 411 415 390 0 0 0
North % of Children 0 0 0 0 0 49.38 49.22 51.30 53.42 0 0 0
South Number of Visitors 630 550 607 0 0 0 0 0 0 0 0 0
South Number of Children 210 190 215 0 0 0 0 0 0 0 0 0
South % of Children 33.33 34.55 35.42 0 0 0 0 0 0 0 0 0
I personally prefer using as Cross Tab to using the PIVOT operator. You need need to use a VALUES clause to get your Description, and then you can use the Cross tab to get the values:
DECLARE #Year int = 2018;
SELECT D.Gate,
V.CatDesc,
ISNULL(MAX(CASE WHEN D.EOM = EOMONTH(DATEFROMPARTS(#Year, 1,1)) THEN V.KPI END),0) AS [Jan],
ISNULL(MAX(CASE WHEN D.EOM = EOMONTH(DATEFROMPARTS(#Year, 2,1)) THEN V.KPI END),0) AS [Feb],
ISNULL(MAX(CASE WHEN D.EOM = EOMONTH(DATEFROMPARTS(#Year, 3,1)) THEN V.KPI END),0) AS [Mar],
ISNULL(MAX(CASE WHEN D.EOM = EOMONTH(DATEFROMPARTS(#Year, 4,1)) THEN V.KPI END),0) AS [Apr],
ISNULL(MAX(CASE WHEN D.EOM = EOMONTH(DATEFROMPARTS(#Year, 5,1)) THEN V.KPI END),0) AS [May],
ISNULL(MAX(CASE WHEN D.EOM = EOMONTH(DATEFROMPARTS(#Year, 6,1)) THEN V.KPI END),0) AS [Jun],
ISNULL(MAX(CASE WHEN D.EOM = EOMONTH(DATEFROMPARTS(#Year, 7,1)) THEN V.KPI END),0) AS [Jul],
ISNULL(MAX(CASE WHEN D.EOM = EOMONTH(DATEFROMPARTS(#Year, 8,1)) THEN V.KPI END),0) AS [Aug],
ISNULL(MAX(CASE WHEN D.EOM = EOMONTH(DATEFROMPARTS(#Year, 9,1)) THEN V.KPI END),0) AS [Sep],
ISNULL(MAX(CASE WHEN D.EOM = EOMONTH(DATEFROMPARTS(#Year, 10,1)) THEN V.KPI END),0) AS [Oct],
ISNULL(MAX(CASE WHEN D.EOM = EOMONTH(DATEFROMPARTS(#Year, 11,1)) THEN V.KPI END),0) AS [Nov],
ISNULL(MAX(CASE WHEN D.EOM = EOMONTH(DATEFROMPARTS(#Year, 12,1)) THEN V.KPI END),0) AS [Dec]
FROM #tblDetails D
CROSS APPLY(VALUES(1,'Number of Visitors',D.TotVisitors),
(2,'Number of Children',D.TotKids),
(3,'% of Children',CONVERT(decimal(8,2),D.TotKids / (D.TotVisitors *1.0)*100)))V(Ordering,CatDesc,KPI)
GROUP BY D.Gate,
V.CatDesc,
V.Ordering
ORDER BY D.Gate,
V.Ordering;
Note that a column in SQL Server can only be one data type, so all values returned are a decimal(8,2).
I have a time punch program the outputs the data set below. RECTYP_43 are the (1) in and (2) out punches. I need a query to look at the look at the LOGINDATE_43 and LOGINTIME_43 and the RECTYPE_43 and get the difference between 1 and 2.
I thought this would be easier than it has proven to be.
empid_43 RECTYPE_43 LOGINDATE_43 LOGINTIME_43
------------------------------------------------------------
127 1 2016-10-21 00:00:00.000 0558
127 2 2016-10-21 00:00:00.000 1430
127 2 2016-10-21 00:00:00.000 1201
127 1 2016-10-21 00:00:00.000 1228
127 1 2016-10-24 00:00:00.000 0557
127 2 2016-10-24 00:00:00.000 1200
127 1 2016-10-24 00:00:00.000 1228
127 2 2016-10-24 00:00:00.000 1430
2589 2 2016-10-21 00:00:00.000 1431
2589 1 2016-10-21 00:00:00.000 0556
2589 1 2016-10-24 00:00:00.000 0550
2589 2 2016-10-24 00:00:00.000 1431
2589 2 2016-10-24 00:00:00.000 1201
2589 1 2016-10-24 00:00:00.000 1226
69 1 2016-10-24 00:00:00.000 1229
69 2 2016-10-24 00:00:00.000 1430
69 1 2016-10-24 00:00:00.000 0555
69 2 2016-10-24 00:00:00.000 1200
You can use a CTE to get all the punch-ins and then a subquery to find the first punch out that comes after that time...
;WITH ctePunchIn AS (
SELECT empid_43, LOGINDATE_43 AS Date_In, LOGINTIME_43 AS Time_In
FROM #Table1
WHERE [RECTYPE_43] = 1
)
SELECT
empid_43, Date_In, Time_In
,(SELECT TOP 1 LOGINTIME_43 FROM #Table1 WHERE
(empid_43 = ctePunchIn.empid_43)
AND
(LOGINDATE_43 = ctePunchIn.Date_In)
AND
(LOGINTIME_43 > ctePunchIn.Time_In)
AND
(RECTYPE_43 = 2)
ORDER BY empid_43, Date_In, LOGINTIME_43) AS Time_Out
FROM
ctePunchIn
Dazedandconfused's answer works if the logout Time is the same date as the login time, but if the user logs out on a different day to logging in, it will not work.
e.g.
INSERT into Punch (empId_43, RecType_43, LoginDate_43, LoginTime_43)
VALUES (15, 1, '2016-01-01', '2305'),
(15, 2, '2016-01-02', '0005');
In order to accomodate for this, you need to know what the next item in the table is for that employee. And with that, you can ensure that the next item is also a logout event. This will help capture situations where someone has forgotten to punch out.
Extending the CTE can provide a more complete solution:
WITH Data AS
(
SELECT empId_43,
RecType_43,
LoginDate_43,
LoginTime_43,
RowNum = ROW_NUMBER() OVER (PARTITION BY empId_43
ORDER BY LoginDate_43, LoginTime_43)
FROM Punch
)
SELECT PIn.empId_43 [Employee],
PIn.LoginDate_43 [LoginDate],
PIn.LoginTime_43 [LoginTime],
POut.LoginDate_43 [LogoutDate],
POut.LoginTime_43 [LogoutTime]
FROM Data PIn
LEFT JOIN Data POut ON PIn.empId_43 = POut.empId_43
AND POut.RecType_43 = 2
AND POut.RowNum = PIn.RowNum + 1
WHERE PIn.RecType_43 = 1
ORDER BY PIn.empId_43, PIn.LoginDate_43, PIn.LoginTime_43;
However, Row_Number can be inefficient. Doing this is best when looking at a small subset (e.g. a particular date range, etc).
slightly different way of doing it:
select
punchIn.empid_43,
punchIn.login as dateTime_in,
punchout.login as dateTime_out
from
(
SELECT empId_43,
RecType_43,
LoginDate_43,
LoginTime_43,
dateadd('n',right(logintime_43,2),
dateadd('hh',left(LoginTime_43,2),
LoginDate_43)) as login,
RowNum = ROW_NUMBER() OVER (PARTITION BY empId_43
ORDER BY LoginDate_43, LoginTime_43)
FROM Punch
where rectype_43 = 1
) punchIn left outer join
(
SELECT empId_43,
RecType_43,
LoginDate_43,
LoginTime_43,
dateadd('n',right(logintime_43,2),
dateadd('hh',left(LoginTime_43,2),
LoginDate_43)) as login,
RowNum = ROW_NUMBER() OVER (PARTITION BY empId_43
ORDER BY LoginDate_43, LoginTime_43)
FROM Punch
where rectype_43 = 2
) punchOut on
punchin.empID = punchout.empID and
punchin.rownum = punchout.rownum
assuming all punchin rows have a corresponding punchout row
I have a problem with creating a query with distinct and top. What i want is a top 25 with maximum values and a distinct by the parameter column.
My query now is:
select distinct top 25
startDate, parameter, min, max, avg, amount_called
from
VisualisatieData.dbo.metric_data_by_day_parameter
where
startDate between '2013-05-30 08:46' and '2013-05-31 16:00'
and endDate between '2013-05-30 08:46' and '2013-05-31 16:00'
order by
max desc
This returns the following:
2013-05-31 01:08:26.000 P1 0 318386 1662 795
2013-05-31 00:01:36.000 P2 0 312325 1554 806
2013-05-31 00:01:36.000 P3 0 124827 25877 14
2013-05-30 08:49:19.000 P4 0 91992 11381 54
2013-05-31 01:05:54.000 P5 47 42410 497 499
2013-05-31 01:05:54.000 P6 16 42395 469 499
2013-05-31 01:05:55.000 P7 0 41380 244 498
2013-05-31 00:01:36.000 P8 328 35225 5305 8
2013-05-31 05:34:10.000 P4 16 12137 1208 17
2013-05-31 03:50:18.000 P9 0 11137 4687 23
2013-05-31 01:23:41.000 P10 391 8013 3237 95
2013-05-31 01:23:41.000 P11 375 7998 3174 98
2013-05-31 01:19:55.000 P12 453 7263 2437 58
2013-05-31 07:57:05.000 P13 2343 5639 3991 2
2013-05-31 03:32:21.000 P14 1687 5077 2993 9
2013-05-30 08:48:57.000 P15 984 5061 2419 12
2013-05-30 08:48:57.000 P16 984 5061 2419 12
2013-05-31 01:40:37.000 P15 1281 5045 2619 10
2013-05-31 01:40:37.000 P16 1281 5045 2619 10
2013-05-31 03:08:51.000 P17 562 4608 1302 18
2013-05-30 16:59:05.000 P18 4202 4202 4202 1
2013-05-30 16:59:05.000 P19 4202 4202 4202 1
2013-05-31 03:37:30.000 P20 875 4139 2681 18
2013-05-31 03:08:51.000 P21 547 3999 1203 18
2013-05-31 03:19:17.000 P22 31 3702 1399 5
This time there are 3 duplicate parameters what i dont want. Depending on the time selection there are more duplicates. I think this doesnt work because the distinct must be applied on only the parameter column.
I tried the following:
SELECT DISTINCT TOP 25 startDate, parameter, min, max, avg, amount_called
FROM
( SELECT startDate, endDate, parameter, min, max, avg, amount_called, ROW_NUMBER() over(partition by parameter order by max desc) subselect
FROM VisualisatieData.dbo.metric_data_by_day_parameter
) A
where startDate between '2013-05-30 08:46' and '2013-05-31 16:00' and endDate between '2013-05-30 08:46' and '2013-05-31 16:00'
ORDER BY max desc
But this doesnt work either, it returns the same as the first query.
I hope i described my problem clearly, if you want more information ask me.
How can i change my query so i get a top 25 with maximum values and no duplicate parameters? Suggestions are appreciated!
Thanks in advance!
Try to
select distinct top 25
startDate, parameter, min, max, avg, amount_called
from VisualisatieData.dbo.metric_data_by_day_parameter as tb
where
startDate between '2013-05-30 08:46' and '2013-05-31 16:00'
and endDate between '2013-05-30 08:46' and '2013-05-31 16:00'
and max = (select max(max)
from VisualisatieData.dbo.metric_data_by_day_parameter
where startDate between '2013-05-30 08:46' and '2013-05-31 16:00'
and endDate between '2013-05-30 08:46' and '2013-05-31 16:00'
and parameter = tb.parameter )
order by
max desc
then the same parameter will be removed
try this
SELECT * FROM (
SELECT DISTINCT startDate, parameter, min, max, avg, amount_called
FROM
( SELECT startDate, endDate, parameter, min, max, avg, amount_called, ROW_NUMBER() over(partition by parameter order by max desc) subselect
FROM VisualisatieData.dbo.metric_data_by_day_parameter
) A
where startDate between '2013-05-30 08:46' and '2013-05-31 16:00' and endDate between '2013-05-30 08:46' and '2013-05-31 16:00'
ORDER BY max desc
) as mytop LIMIT 25;
I am working on query where I need next nth no of row. My table structure is like
ID StockName StockDate DayOpen DayHigh DayLow DayClose
--------------------------------------------------------------------
60 IDBI 2014-01-01 66.50 67.80 66.50 67.60
197 IDBI 2014-01-02 67.55 69.20 65.25 65.60
334 IDBI 2014-01-03 65.00 66.40 64.35 66.15
471 IDBI 2014-01-06 66.15 66.35 65.10 65.55
608 IDBI 2014-01-07 66.10 66.15 63.85 64.25
745 IDBI 2014-01-08 64.00 67.10 63.10 66.80
882 IDBI 2014-01-09 66.60 67.80 64.50 64.75
1019 IDBI 2014-01-10 65.00 65.90 63.75 64.10
1156 IDBI 2014-01-13 63.85 65.00 63.25 64.20
1293 IDBI 2014-01-14 64.00 64.95 63.80 64.05
What I want from output is column name which will give me next 5th row date
E.g. For 1st row new column should return value of next 5th row date ie 2014-01-08 same for 2nd row it should return 2014-01-09 date.
And I can't use datediff with -5 day count as these are working day data excluding weekend days
How do I get this value without using while loop or cursor?
Use a CTE to return the base table plus ROW_NUMBER so that you can LEFT JOIN the CTE to itself on the ROW_NUMBER of the new [Next5thDate] column being 5 rows ahead of current row:
SET NOCOUNT ON;
SET ANSI_NULLS ON;
DECLARE #Data TABLE (
ID INT NOT NULL PRIMARY KEY CLUSTERED,
StockName VARCHAR(50) NOT NULL,
StockDate DATE NOT NULL,
DayOpen MONEY NOT NULL,
DayHigh MONEY NOT NULL,
DayLow MONEY NOT NULL,
DayClose MONEY NOT NULL,
UNIQUE(StockDate)
)
INSERT INTO #Data VALUES (60, 'IDBI', '2014-01-01', 66.50, 67.80, 66.50, 67.60)
INSERT INTO #Data VALUES (197, 'IDBI', '2014-01-02', 67.55, 69.20, 65.25, 65.60)
INSERT INTO #Data VALUES (334, 'IDBI', '2014-01-03', 65.00, 66.40, 64.35, 66.15)
INSERT INTO #Data VALUES (471, 'IDBI', '2014-01-06', 66.15, 66.35, 65.10, 65.55)
INSERT INTO #Data VALUES (608, 'IDBI', '2014-01-07', 66.10, 66.15, 63.85, 64.25)
INSERT INTO #Data VALUES (745, 'IDBI', '2014-01-08', 64.00, 67.10, 63.10, 66.80)
INSERT INTO #Data VALUES (882, 'IDBI', '2014-01-09', 66.60, 67.80, 64.50, 64.75)
INSERT INTO #Data VALUES (1019, 'IDBI', '2014-01-10', 65.00, 65.90, 63.75, 64.10)
INSERT INTO #Data VALUES (1156, 'IDBI', '2014-01-13', 63.85, 65.00, 63.25, 64.20)
INSERT INTO #Data VALUES (1293, 'IDBI', '2014-01-14', 64.00, 64.95, 63.80, 64.05)
;WITH cte AS
(
SELECT d.*, ROW_NUMBER() OVER (ORDER BY d.StockDate ASC) AS [RowNum]
FROM #Data d
)
SELECT d1.ID, d1.StockName, d1.StockDate, d1.DayOpen, d1.DayHigh,
d1.DayLow, d1.DayClose, d2.StockDate AS [Next5thDate]
FROM cte d1
LEFT JOIN cte d2
ON d2.RowNum = (d1.RowNum + 5)
Results:
ID StockName StockDate DayOpen DayHigh DayLow DayClose Next5thDate
60 IDBI 2014-01-01 66.50 67.80 66.50 67.60 2014-01-08
197 IDBI 2014-01-02 67.55 69.20 65.25 65.60 2014-01-09
334 IDBI 2014-01-03 65.00 66.40 64.35 66.15 2014-01-10
471 IDBI 2014-01-06 66.15 66.35 65.10 65.55 2014-01-13
608 IDBI 2014-01-07 66.10 66.15 63.85 64.25 2014-01-14
745 IDBI 2014-01-08 64.00 67.10 63.10 66.80 NULL
882 IDBI 2014-01-09 66.60 67.80 64.50 64.75 NULL
1019 IDBI 2014-01-10 65.00 65.90 63.75 64.10 NULL
1156 IDBI 2014-01-13 63.85 65.00 63.25 64.20 NULL
1293 IDBI 2014-01-14 64.00 64.95 63.80 64.05 NULL
With RnkedItems As
(
Select Id, StockName, StockDate, DayOpen, DayHigh, DayLow, DayClose
, Row_Number() Over ( Order By StockDate, Id ) As Rnk
From MyTable
)
Select ...
From RnkedItems As Original
Left Join RnkedItems
On RnkedItems.Rnk = Original.Rnk + 5
I'm assuming you want to return both the target row and the row five entries after the target row on the same line.
Try this:
;with EnumeratedStocks as (
select rn = row_number() over(order by StockDate), *
from Stocks
)
select * from EnumeratedStocks es where rn > 5
You can account for next 5th date falling on weekends by extracting name of day from date that falls on next 5th day by skipping date by two extra more days if its Saturday and by one extra day if its Sunday.
Select * , Case DAYNAME( dateadd(day,5,stockdate))
When 'Saturday' Then dateadd(day,7,stockdate)
When 'Sunday' Then dateadd(day,6,stockdate)
Else dateadd(day,6,stockdate) End As newStockDate
From tableName