Need help in SQL Query 5 - sql-server

I am using SQL Server 2008. I have data by each employee for each day. Below is the sample data.
WITH RawData as
(
SELECT '10001' AS EmpNo,'2015-01-01' as AttendanceDate,'FS' AS ShiftCode UNION
SELECT '10001','2015-01-02','WO' UNION
SELECT '10001','2015-01-03','FS' UNION
SELECT '10001','2015-01-04','FS' UNION
SELECT '10001','2015-01-05','FS' UNION
SELECT '10001','2015-01-06','FS' UNION
SELECT '10001','2015-01-07','FS' UNION
SELECT '10001','2015-01-08','FS' UNION
SELECT '10001','2015-01-09','WO' UNION
SELECT '10001','2015-01-10','FS' UNION
SELECT '10001','2015-01-11','FS' UNION
SELECT '10001','2015-01-12','FS' UNION
SELECT '10001','2015-01-13','FS' UNION
SELECT '10001','2015-01-14','FS' UNION
SELECT '10001','2015-01-15','FS' UNION
SELECT '10001','2015-01-16','WO' UNION
SELECT '10001','2015-01-17','FS' UNION
SELECT '10001','2015-01-18','FS' UNION
SELECT '10001','2015-01-19','FS' UNION
SELECT '10001','2015-01-20','FS' UNION
SELECT '10001','2015-01-21','FS' UNION
SELECT '10001','2015-01-22','FS' UNION
SELECT '10001','2015-01-23','WO' UNION
SELECT '10001','2015-01-24','FS' UNION
SELECT '10001','2015-01-25','FS' UNION
SELECT '10001','2015-01-26','FS' UNION
SELECT '10001','2015-01-27','FS' UNION
SELECT '10001','2015-01-28','FS' UNION
SELECT '10001','2015-01-29','FS' UNION
SELECT '10001','2015-01-30','WO' UNION
SELECT '10001','2015-01-31','FS' UNION
SELECT '10002','2015-01-01','FS' UNION
SELECT '10002','2015-01-02','WO' UNION
SELECT '10002','2015-01-03','WO' UNION
SELECT '10002','2015-01-04','FS' UNION
SELECT '10002','2015-01-05','FS' UNION
SELECT '10002','2015-01-06','FS' UNION
SELECT '10002','2015-01-07','FS' UNION
SELECT '10002','2015-01-08','FS' UNION
SELECT '10002','2015-01-09','WO' UNION
SELECT '10002','2015-01-10','WO' UNION
SELECT '10002','2015-01-11','FS' UNION
SELECT '10002','2015-01-12','FS' UNION
SELECT '10002','2015-01-13','FS' UNION
SELECT '10002','2015-01-14','FS' UNION
SELECT '10002','2015-01-15','FS' UNION
SELECT '10002','2015-01-16','WO' UNION
SELECT '10002','2015-01-17','WO' UNION
SELECT '10002','2015-01-18','FS' UNION
SELECT '10002','2015-01-19','FS' UNION
SELECT '10002','2015-01-20','FS' UNION
SELECT '10002','2015-01-21','FS' UNION
SELECT '10002','2015-01-22','FS' UNION
SELECT '10002','2015-01-23','WO' UNION
SELECT '10002','2015-01-24','WO' UNION
SELECT '10002','2015-01-25','FS' UNION
SELECT '10002','2015-01-26','FS' UNION
SELECT '10002','2015-01-27','FS' UNION
SELECT '10002','2015-01-28','FS' UNION
SELECT '10002','2015-01-29','FS' UNION
SELECT '10002','2015-01-30','WO' UNION
SELECT '10002','2015-01-31','WO')
SELECT * FROM RawData Order By EmpNo,AttendanceDate
How to write SQL Query to get following output based on this sample data ? The workweek of each employee starts on a Day after weekly off and it can be any day (mon, tue etc). The shift code denotes WO: weekly off, FS: First Shift, SS: Second Shift.
EmpNo WeekFrom WeekTo
10001 2015-01-01 2015-01-02
10001 2015-01-03 2015-01-09
10001 2015-01-10 2015-01-16
10001 2015-01-17 2015-01-23
10001 2015-01-24 2015-01-30
10001 2015-01-31 2015-01-31
10002 2015-01-01 2015-01-03
10002 2015-01-04 2015-01-10
10002 2015-01-11 2015-01-17
10002 2015-01-18 2015-01-24
10002 2015-01-25 2015-01-31
Got a solution. But its taking quite a long time on live table with 1 Million rows. Have I done something wrong in a query ? Or there is a better way of doing this.
WITH RawData as
(
-- Insert above data here.
)
,ProcessData AS (
SELECT EmpNo,AttendanceDate,ShiftCode,RowID = ROW_NUMBER() OVER (
ORDER BY EmpNo, AttendanceDate
), WeekNo = 1 FROM RawData
)
,FinalData
AS (
SELECT EmpNo, AttendanceDate, ShiftCode, RowID, WeekNo = 1
FROM ProcessData DA
WHERE RowID = 1
UNION ALL
SELECT DA.EmpNo, DA.AttendanceDate, DA.ShiftCode, DA.RowID,
WeekNo = (CASE WHEN FinalData.EmpNo != DA.EmpNo THEN 1 ELSE FinalData.WeekNo + (CASE WHEN (FinalData.ShiftCode = 'WO' AND DA.ShiftCode != 'WO') THEN 1 ELSE 0 END) END)
FROM FinalData
INNER JOIN ProcessData DA ON DA.RowID = FinalData.RowID + 1
)
SELECT EmpNo, MIN(AttendanceDate) AS StartDate, MAX(AttendanceDate) AS EndDate, WeekNo
FROM FinalData
GROUP BY EmpNo, WeekNo
ORDER BY EmpNo, WeekNo

Try this:
SQL Fiddle
;WITH RawData AS (
-- Your insert statements here
),
Cte AS(
SELECT *,
RN = ROW_NUMBER() OVER(PARTITION BY EmpNo, grp ORDER BY AttendanceDate DESC)
FROM (
SELECT *,
grp = DATEADD(DAY, -ROW_NUMBER() OVER(PARTITION BY EmpNo ORDER BY AttendanceDate), AttendanceDate)
FROM RawData
WHERE ShiftCode = 'WO'
)t
),
CteWeekOff AS(
SELECT EmpNo, AttendanceDate, ShiftCode FROM cte WHERE RN = 1
),
CteFinal AS(
SELECT
EmpNo,
WeekFrom = MIN(AttendanceDate),
Weekto = MAX(AttendanceDate)
FROM (
SELECT *,
grp = DATEADD(DAY, - ROW_NUMBER() OVER(PARTITION BY EmpNo ORDER BY AttendanceDate), AttendanceDate)
FROM RawData
WHERE ShiftCode <> 'WO'
)t
GROUP BY EmpNo, grp
)
SELECT
EmpNo,
WeekFrom = x.WeekFrom,
WeekTo = w.AttendanceDate
FROM CteWeekOff w
CROSS APPLY(
SELECT TOP 1 WeekFrom
FROM CteFinal r
WHERE
r.EmpNo = w.EmpNo
AND r.WeekFrom <= w.AttendanceDate
ORDER BY r.WeekFrom DESC
)x(WeekFrom)
UNION ALL
SELECT
EmpNo,
WeekFrom = x.WeekFrom,
WeekTo = t.AttendanceDate
FROM (
SELECT *, RN = ROW_NUMBER() OVER(PARTITION BY EmpNo ORDER BY AttendanceDate DESC)
FROM RawData
)t
CROSS APPLY(
SELECT TOP 1 AttendanceDate
FROM CteFinal r
WHERE
r.EmpNo = t.EmpNo
AND r.WeekFrom < t.AttendanceDate
ORDER BY r.WeekFrom DESC
)x(WeekFrom)
WHERE
RN = 1
AND ShiftCode <> 'WO'
ORDER BY EmpNo, WeekFrom

Finally this worked. 5 seconds on 230,000 records. I will go ahead with my solution. Thanks for your time. Hope this solution helps someone.
-- Step 1 : Save it to temp table
SELECT EmpNo,AttendanceDate,ShiftCode,RowID = ROW_NUMBER() OVER (
ORDER BY EmpNo, AttendanceDate
), WeekNo = 1 into #RawData FROM -- My table
-- Step 2 : Use temp table
;WITH FinalData
AS (
SELECT EmpNo, AttendanceDate, ShiftCode, RowID, WeekNo = 1
FROM #RawData DA
WHERE RowID = 1
UNION ALL
SELECT DA.EmpNo, DA.AttendanceDate, DA.ShiftCode, DA.RowID,
WeekNo = (CASE WHEN FinalData.EmpNo != DA.EmpNo THEN 1 ELSE FinalData.WeekNo + (CASE WHEN (FinalData.ShiftCode = 'WO' AND DA.ShiftCode != 'WO') THEN 1 ELSE 0 END) END)
FROM FinalData
INNER JOIN #RawData DA ON DA.RowID = FinalData.RowID + 1
)
SELECT EmpNo, MIN(AttendanceDate) AS StartDate, MAX(AttendanceDate) AS EndDate, WeekNo
FROM FinalData
GROUP BY EmpNo, WeekNo
ORDER BY EmpNo, WeekNo
OPTION (MAXRECURSION 0)

Related

Find Non Consecutive date in SQL Server

I want to find the missing NON-consecutive dates between two consecutive date.
I am posting my SQL query and temp tables to find out the results.
But I am not getting the proper results
Here is my SQL Query
drop table #temp
create table #temp(an varchar(20),dt date)
insert into #temp
select '2133783715' , '2016-10-16' union all
select '5107537880' , '2016-10-15' union all
select '6619324250' , '2016-10-15' union all
select '7146586717' , '2016-10-15' union all
select '7472381321' , '2016-10-12' union all
select '7472381321' , '2016-10-13' union all
select '7472381321' , '2016-10-14' union all
select '7472381321' , '2016-10-24' union all
select '8186056340' , '2016-10-15' union all
select '9099457123' , '2016-10-12' union all
select '9099457123' , '2016-10-13' union all
select '9099457123' , '2016-10-14' union all
select '9099457123' , '2016-10-23' union all
select '9099457123' , '2016-11-01' union all
select '9099457123' , '2016-11-02' union all
select '9099457123' , '2016-11-03' union all
select '9165074784' , '2016-10-16'
drop table #final
SELECT an,MIN(dt) AS MinDate,MAX(dt) AS MaxDate, COUNT(*) AS ConsecutiveUsage
--DateDiff(Day,LAG(MAX(dt)) OVER (partition by an ORDER BY an),MAX(dt)) nonusageDate
into #final
FROM(
SELECT an,dt,
DATEDIFF(D, ROW_NUMBER() OVER(partition by an ORDER BY dt),dt) AS Diff
FROM #temp c
)P
GROUP BY an,diff
select * from #final order by 1
an MinDate MaxDate ConsecutiveUsage
2133783715 2016-10-16 2016-10-16 1
5107537880 2016-10-15 2016-10-15 1
6619324250 2016-10-15 2016-10-15 1
7146586717 2016-10-15 2016-10-15 1
7472381321 2016-10-12 2016-10-14 3
7472381321 2016-10-24 2016-10-24 1
7472381321 2016-10-27 2016-10-28 1
8186056340 2016-10-15 2016-10-15 1
9099457123 2016-10-12 2016-10-14 3
9099457123 2016-10-23 2016-10-23 1
9165074784 2016-10-16 2016-10-16 1
But I want results of non-usage date.
I want to get those AN which has not been used continuously since 10 days.
So here output should be like this:-
an minusagesdate maxusagedate ConsecutiveNotUseddays
7472381321 2016-10-15 2016-10-23 9
7472381321 2016-10-25 2016-10-26 2
9099457123 2016-10-15 2016-10-22 8
So I just want to find out only consecutive not used dates count and their min and max dates .
try this :
with ranked as (
select f1.*,
ROW_NUMBER() over(partition by an order by dt) rang
from #temp f1
where exists
(select * from #temp f2
where f1.an=f2.an and datediff( day, f2.dt, f1.dt) >1
)
)
select an, minusagesdate, maxusagesdate, ConsecutiveNotUseddays
from (
select f1.*,
DATEADD(DAY,1, (select f2.dt from ranked f2 where f1.an=f2.an and f2.rang+1=f1.rang)) minusagesdate ,
DATEADD(DAY,-1, f1.dt) maxusagesdate ,
datediff( day, (select f2.dt from ranked f2 where f1.an=f2.an and f2.rang+1=f1.rang), f1.dt) - 1 ConsecutiveNotUseddays
from ranked f1
) tmp
where tmp.ConsecutiveNotUseddays>0
or like this
with ranked as (
select f1.*,
ROW_NUMBER() over(partition by an order by dt) rang
from #temp f1
where exists
(select * from #temp f2
where f1.an=f2.an and datediff( day, f2.dt, f1.dt) >1
)
)
select f1.an,
DATEADD(DAY,1, f3.dtbefore) minusagesdate ,
DATEADD(DAY,-1, f1.dt) maxusagesdate ,
datediff( day, f3.dtbefore, f1.dt) - 1 ConsecutiveNotUseddays
from ranked f1
outer apply
(
select top 1 f2.dt as dtbefore from ranked f2
where f1.an=f2.an and f2.rang+1=f1.rang
) f3
where datediff( day, f3.dtbefore, f1.dt) - 1>0
It looks like you're trying to count the number of days not used between the mindate and the maxdate for each an. If that's the case, then this should do the trick:
select an, min(dt) as min_dt, max(dt) as max_dt
, count(distinct dt) as daysused --this counts each day used, but only once
, datediff(day,min(dt),max(dt)) as totaldays --this is the total number of days between min and max date
, datediff(day,min(dt),max(dt)) - count(distinct dt) as daysnotused
--This takes total days - used days to give non-used days
from #temp c
group by an
having datediff(day,min(dt),max(dt)) - count(distinct dt) >= 10
As I understood you need this:
;WITH cte AS (
SELECT an,
dt,
ROW_NUMBER() OVER (PARTITION BY an ORDER BY dt) as rn
FROM #temp
)
SELECT c1.an,
c1.dt MinDate,
c2.dt MaxDate,
DATEDIFF(day,c1.dt,c2.dt) as ConsecutiveNotUseddays
FROM cte c1
INNER JOIN cte c2
ON c1.an = c2.an AND c1.rn = c2.rn-1
WHERE DATEDIFF(day,c1.dt,c2.dt) >= 10
Output:
an MinDate MaxDate ConsecutiveNotUseddays
7472381321 2016-10-14 2016-10-24 10
For 9099457123 I got two rows with 9 in ConsecutiveNotUseddays. You can check results removing WHERE statement.
On any newer version of SQL Server this should be easy:
with x as (
select *, lag(dt) over(partition by an order by dt) dt_lag
from #temp
)
select *, datediff(day, dt_lag, dt)
from x
where datediff(day, dt_lag, dt) >= 10

Repeat the first date withing a group

I Would like the first date of each group to repeat for the rest of the rows withing each group
You could use window expressions and grouping;
FIRST_VALUE (Transact-SQL)
You would need to partition by your first column. to get the split of A and B.
For example;
with cteTempData
(
[Code]
, [Date]
)
as
(
select 'A',cast('2015-9-4' as date)
union all select 'A','2015-9-4'
union all select 'A','2015-9-4'
union all select 'A','2015-9-16'
union all select 'B','2015-9-16'
union all select 'B','2015-9-22'
union all select 'B','2015-9-22'
union all select 'B','2015-10-26'
union all select 'B','2015-10-30'
)
select
[Code]
, [Date]
, FIRST_VALUE([Date]) over (partition by [Code] order by [Date]) as [First_Date]
from cteTempData
Using the first_value syntax also allows you to work with other columns in that ordered record....
with cteTempData
(
[Code]
, [Date]
, [Comment]
)
as
(
select 'A',cast('2015-9-4' as date),'One'
union all select 'A','2015-9-4','Two'
union all select 'A','2015-9-4','Three'
union all select 'A','2015-9-16','Four'
union all select 'B','2015-9-16','Five'
union all select 'B','2015-9-22','Six'
union all select 'B','2015-9-22','Seven'
union all select 'B','2015-10-26','Eight'
union all select 'B','2015-10-30','Nine'
)
select
[Code]
, [Date]
, FIRST_VALUE([Date]) over (partition by [Code] order by [Date]) as [First_Date]
, FIRST_VALUE([Comment]) over (partition by [Code] order by [Date]) as [First_Comment]
from cteTempData
Use MIN() Over ()
Declare #Table table (Grp varchar(25),Date date)
Insert into #Table values
('A','2015-09-04'),
('A','2015-09-05'),
('A','2015-09-10'),
('B','2015-10-04'),
('B','2015-10-05'),
('B','2015-10-10')
Select *
,GrpDate = min(Date) over (Partition By Grp)
From #Table
Returns
Grp Date GrpDate
A 2015-09-04 2015-09-04
A 2015-09-05 2015-09-04
A 2015-09-10 2015-09-04
B 2015-10-04 2015-10-04
B 2015-10-05 2015-10-04
B 2015-10-10 2015-10-04
You could use MIN with the OVER-clause
SELECT t.ColumnA,
DateCol = MIN( t.DateCol ) OVER ( PARTITION BY t.ColumnA ),
OtherColumns
FROM dbo.TableName t
you can go with a CROSS JOIN or FIRST_VALUE.
Declare #Yourtable table (groupCol varchar(25),firstDate date)
Insert into #Yourtable values
('A','2015-09-04'),
('A','2015-09-05'),
('A','2015-09-10'),
('B','2015-10-04'),
('B','2015-10-05'),
('B','2015-10-10')
SELECT a.*,b.firstDate
FROM #Yourtable a
CROSS JOIN (SELECT groupCol,MIN(firstDate) firstDate
FROM #Yourtable b
GROUP BY groupCol)b
WHERE a.groupCol =b.groupCol
OR
SELECT a.*,FIRST_VALUE(a.firstDate) OVER (PARTITION BY groupCol ORDER BY groupCol ASC) AS firstDate
FROM #Yourtable a

Trying to pivot event dates in t-sql without using a cursor

I have the following table:
What I want is to get to this:
EventTypeId 1 and 3 are valid start events and EventTypeId of 2 is the only valid end event.
I have tried to do a pivot, but I don't believe a pivot will get me the multiple events for a person in the result set.
SELECT PersonId, [1],[3],[2]
FROM
(
SELECT PersonId, EventTypeId, EventDate
from #PersonEvent
) as SourceTable
PIVOT
(
count(EventDate) FOR EventTypeId
IN ([1],[3],[2])
) as PivotTable
Select PersonID,
Min(Case WHEN EventTypeId IN (1,3) THEN EventDate END) as StartDate,
Min(Case WHEN EventTypeId IN (2) THEN EventDate END) as EndDate
FROM #PersonEvent
group by personid
I can do a cursor, but my original table is over 90,000 rows, and this is to be for a report, so I don't think I can use that option. Any other thoughts that I might be missing?
Assuming the table is called [dbo].[PersonEventRecords] this will work...
With StartEvents As
(
Select *
From [dbo].[PersonEventRecords]
Where EventTypeId In (1,3)
), EndEvents As
(
Select *
From [dbo].[PersonEventRecords]
Where EventTypeId In (2)
)
Select IsNull(se.PersonId,ee.PersonId) As PersonId,
se.EventTypeId As StartEventTypeId,
se.EventDate As StartEventDate,
ee.EventTypeId As EndEventTypeId,
ee.EventDate As EndEventDate
From StartEvents se
Full Outer Join EndEvents ee
On se.PersonId = ee.PersonId
And se.EventSequence = ee.EventSequence - 1
Order By IsNull(se.PersonId,ee.PersonId),
IsNull(se.EventDate,ee.EventDate);
/**** TEST DATA ****/
If Object_ID('[dbo].[PersonEventRecords]') Is Not Null
Drop Table [dbo].[PersonEventRecords];
Create Table [dbo].[PersonEventRecords]
(
PersonId Int,
EventTypeId Int,
EventDate Date,
EventSequence Int
);
Insert [dbo].[PersonEventRecords]
Select 1,1,'2012-10-13',1
Union All
Select 1,2,'2012-10-20',2
Union All
Select 1,1,'2012-11-01',3
Union All
Select 1,2,'2012-11-13',4
Union All
Select 2,1,'2012-05-07',1
Union All
Select 2,2,'2012-06-01',2
Union All
Select 2,3,'2012-07-01',3
Union All
Select 2,2,'2012-08-30',4
Union All
Select 3,2,'2012-04-05',1
Union All
Select 3,1,'2012-05-04',2
Union All
Select 3,2,'2012-05-24',3
Union All
Select 4,1,'2013-01-03',1
Union All
Select 4,1,'2013-02-20',2
Union All
Select 4,2,'2013-03-20',3;
Try this
SELECT E1.PersonId, E1.EventTypeId, E1.EventDate, E2.EventTypeId, E2.EventDate
FROM PersonEvent AS E1
OUTER APPLY(
SELECT TOP 1 PersonEvent.EventTypeId, PersonEvent.EventDate
FROM PersonEvent
WHERE PersonEvent.PersonId = E1.PersonId
AND PersonEvent.EventSequence = E1.EventSequence + 1
AND PersonEvent.EventTypeId = 2
) AS E2
WHERE E1.EventTypeId = 1 OR E1.EventTypeId = 3
UNION
SELECT E3.PersonId, NULL, NULL, E3.EventTypeId, E3.EventDate
FROM PersonEvent E3
WHERE E3.EventTypeId = 2
AND NOT EXISTS(
SELECT *
FROM PersonEvent
WHERE PersonEvent.PersonId = E3.PersonId
AND PersonEvent.EventSequence = E3.EventSequence - 1)
It is not completely clear how do you want the result to be ordered – add order as needed.

T-SQL Cumulative Count Reset per Month

I need to count cumulative count in T-SQL. This can be done as:
WITH DATASET AS (SELECT '2014-01-28' AS [DATE], 1 AS [COUNT]
UNION
SELECT '2014-01-29' AS [DATE], 5 AS [COUNT]
UNION
SELECT '2014-01-30' AS [DATE], 15 AS [COUNT]
UNION
SELECT '2014-01-31' AS [DATE], 4 AS [COUNT]
UNION
SELECT '2014-02-01' AS [DATE], 7 AS [COUNT]
UNION
SELECT '2014-02-02' AS [DATE], 1 AS [COUNT]
)
, CTE AS (SELECT *
,ROW_NUMBER() OVER (ORDER BY [DATE]) ROWNUM
FROM DATASET
)
SELECT CTE1.[DATE]
,CTE1.[COUNT]
,SUM(CTE2.[COUNT]) AS CUM_CNT
FROM CTE CTE1
JOIN CTE CTE2 ON CTE2.ROWNUM <= CTE1.ROWNUM
GROUP BY CTE1.[DATE]
,CTE1.[COUNT]
That returns:
DATE COUNT CUM_CNT
2014-01-28 1 1
2014-01-29 5 6
2014-01-30 15 21
2014-01-31 4 25
2014-02-01 7 32
2014-02-02 1 33
But I want to reset the cumulative count per each month so the data returned should be:
DATE COUNT CUM_CNT
2014-01-28 1 1
2014-01-29 5 6
2014-01-30 15 21
2014-01-31 4 25
2014-02-01 7 7
2014-02-02 1 8
Is it possible to achieve this in T-SQL? How?
As of SQL Server 2012+ you can use window version of SUM to calculate running totals. You just have to PARTITION by YEAR([DATE]), MONTH([DATE]) to get the expected result:
WITH DATASET AS (
SELECT '2014-01-28' AS [DATE], 1 AS [COUNT]
UNION
SELECT '2014-01-29' AS [DATE], 5 AS [COUNT]
UNION
SELECT '2014-01-30' AS [DATE], 15 AS [COUNT]
UNION
SELECT '2014-01-31' AS [DATE], 4 AS [COUNT]
UNION
SELECT '2014-02-01' AS [DATE], 7 AS [COUNT]
UNION
SELECT '2014-02-02' AS [DATE], 1 AS [COUNT]
)
SELECT [DATE], [COUNT],
SUM([COUNT]) OVER (PARTITION BY YEAR([DATE]), MONTH([DATE])
ORDER BY [DATE]) AS CUM_CNT
FROM DATASET
SQL Fiddle Demo
SELECT '2014-01-28' AS [DATE], 1 AS [COUNT]
UNION
SELECT '2014-01-29' AS [DATE], 5 AS [COUNT]
UNION
SELECT '2014-01-30' AS [DATE], 15 AS [COUNT]
UNION
SELECT '2014-01-31' AS [DATE], 4 AS [COUNT]
UNION
SELECT '2014-02-01' AS [DATE], 7 AS [COUNT]
UNION
SELECT '2014-02-02' AS [DATE], 1 AS [COUNT]
)
Select C2.date,sum(c1.count) as COUNT, sum(distinct c2.count) AS CUML_COUNT from DATASET C1
JOIN DATASET C2 ON month(C1.date) = month(C2.date) and C1.Date <= C2.Date
group by C2.date

How to query Open-high-low-close (OHLC) data from SQL Server

I'm trying to retrieve data for a Open-high-low-close (OHLC) chart directly from the database, it's the kind of chart you see of stocks. Is this possible, and if, how?
I have a table like this (simplified):
Date | Price | PriceType
A record is created for each day, I will report per month / year, not per day as used for stocks.
I would like to query something like this:
SELECT PriceType, MAX(Price) as High, MIN(Price) as Low, [Price of first item of month] as Open, [Price of last item of month] as Close GROUP BY PriceType, Year(Date), Month(Date)
To access the SQL Server I use LLBLGen, so an anwser based on that technology would be great, a generic SQL server will do too!
It's SQL 2005, but 2008 is also an option.
Thanks.
This appears to work. There may well be a less verbose way to do it.
--create test data
CREATE TABLE #t
(priceDate DATETIME
,price MONEY
,priceType CHAR(1)
)
INSERT #t
SELECT '20090101',100,'A'
UNION SELECT '20090102',500,'A'
UNION SELECT '20090103',20 ,'A'
UNION SELECT '20090104',25 ,'A'
UNION SELECT '20090105',28 ,'A'
UNION SELECT '20090131',150,'A'
UNION SELECT '20090201',501,'A'
UNION SELECT '20090203',21 ,'A'
UNION SELECT '20090204',26 ,'A'
UNION SELECT '20090205',29 ,'A'
UNION SELECT '20090228',151,'A'
UNION SELECT '20090101',100,'B'
UNION SELECT '20090102',500,'B'
UNION SELECT '20090103',20 ,'B'
UNION SELECT '20090104',25 ,'B'
UNION SELECT '20090105',28 ,'B'
UNION SELECT '20090131',150,'B'
UNION SELECT '20090201',501,'B'
UNION SELECT '20090203',21 ,'B'
UNION SELECT '20090204',26 ,'B'
UNION SELECT '20090205',29 ,'B'
UNION SELECT '20090228',151,'B'
--query
;WITH rangeCTE
AS
(
SELECT MIN(priceDate) minDate
,MAX(priceDate) maxDate
FROM #t
)
,datelistCTE
AS
(
SELECT CAST(CONVERT(CHAR(6),minDate,112) + '01' AS DATETIME) AS monthStart
,DATEADD(mm,1,CAST(CONVERT(CHAR(6),minDate,112) + '01' AS DATETIME)) -1 AS monthEnd
,1 AS monthID
FROM rangeCTE
UNION ALL
SELECT DATEADD(mm,1,monthStart)
,DATEADD(mm,2,monthStart) - 1
,monthID + 1
FROM datelistCTE
WHERE monthStart <= (SELECT maxDate FROM rangeCTE)
)
,priceOrderCTE
AS
(
SELECT *
,ROW_NUMBER() OVER (PARTITION BY monthID, priceType
ORDER BY priceDate
) AS rn1
,ROW_NUMBER() OVER (PARTITION BY monthID, priceType
ORDER BY priceDate DESC
) AS rn2
,ROW_NUMBER() OVER (PARTITION BY monthID, priceType
ORDER BY price DESC
) AS rn3
,ROW_NUMBER() OVER (PARTITION BY monthID, priceType
ORDER BY price
) AS rn4
FROM datelistCTE AS d
JOIN #t AS t
ON t.priceDate BETWEEN d.monthStart AND d.monthEnd
WHERE monthStart <= (SELECT maxDate FROM rangeCTE)
)
SELECT o.MonthStart
,o.priceType
,o.Price AS opening
,c.price AS closing
,h.price AS high
,l.price AS low
FROM priceOrderCTE AS o
JOIN priceOrderCTE AS c
ON c.priceType = o.PriceType
AND c.monthID = o.MonthID
JOIN priceOrderCTE AS h
ON h.priceType = o.PriceType
AND h.monthID = o.MonthID
JOIN priceOrderCTE AS l
ON l.priceType = o.PriceType
AND l.monthID = o.MonthID
WHERE o.rn1 = 1
AND c.rn2 = 1
AND h.rn3 = 1
AND l.rn4 = 1
This is a little query I wrote that seems to work nicely for one time span at a time. All you need to do is comment the select DATEPARTS in order to get to the timespan you are looking for. Or you could just make multiple views for different timespans. Also the underlying data table uses Bid Ask tick style data. If you are using mids or last prices you could eliminate the case statements from the selects.
Select
tmp.num,
rf.CurveName,
rf.Period as Period,
CASE WHEN (tmp2.Bid is null or tmp2.Ask is null) then isnull(tmp2.Bid,0)+isnull(tmp2.Ask,0) else (tmp2.Bid+tmp2.Ask)/2 end as [Open],
tmp.Hi,
tmp.Lo,
CASE WHEN (rf.Bid is null or Rf.Ask is null) then isnull(rf.Bid,0)+isnull(rf.Ask,0) else (rf.Bid+rf.Ask)/2 end as [Close],
tmp.OpenDate,
tmp.CloseDate,
tmp.yr,
tmp.mth,
tmp.wk,
tmp.dy,
tmp.hr
from BidAsk rf inner join
(SELECT count(CurveName)as num,CurveName,
Period,
max(CASE WHEN (Bid is null or Ask is null) then isnull(Bid,0)+isnull(Ask,0) else (Bid+Ask)/2 end) as Hi,
min(CASE WHEN (Bid is null or Ask is null) then isnull(Bid,0)+isnull(Ask,0) else (Bid+Ask)/2 end) as Lo,
max(CurveDateTime) as CloseDate, min(CurveDateTime) as OpenDate,
DATEPART(year, CurveDateTime) As yr,
DATEPART(month, CurveDateTime) As mth,
DATEPART(week, CurveDateTime) As wk,
DATEPART(Day, CurveDateTime) as dy,
DATEPART(Hour, CurveDateTime) as hr
--DATEPART(minute, CurveDateTime) as mnt
FROM
BidAsk
GROUP BY
CurveName,Period,
DATEPART(year, CurveDateTime),
DATEPART(month, CurveDateTime),
DATEPART(week, CurveDateTime),
DATEPART(Day, CurveDateTime) ,
DATEPART(Hour, CurveDateTime)
--DATEPART(minute, CurveDateTime)
) tmp on
tmp.CurveName=rf.CurveName and
tmp.CloseDate=rf.CurveDateTime and
tmp.Period=rf.Period
inner join BidAsk tmp2 on
tmp2.CurveName=rf.CurveName and
tmp2.CurveDateTime=tmp.Opendate and
tmp2.Period=rf.Period
ORDER BY
CurveName,Period,tmp.yr,tmp.mth
--DATEPART(year, CurveDateTime),
--DATEPART(month, CurveDateTime)
--DATEPART(day, CurveDateTime),
--DATEPART(Hour, CurveDateTime),
--DATEPART(minute, CurveDateTime) )

Resources