I have a table with 200.000 rows in a SQL Server 2014 database looking like this:
CREATE TABLE DateRanges
(
Contract VARCHAR(8),
Sector VARCHAR(8),
StartDate DATE,
EndDate DATE
);
INSERT INTO DateRanges (Contract, Sector, StartDate, Enddate)
SELECT '111', '999', '01-01-2014', '03-31-2014'
union
SELECT '111', '999', '04-01-2014', '06-30-2014'
union
SELECT '111', '999', '07-01-2014', '09-30-2014'
union
SELECT '111', '999', '10-01-2014', '12-31-2014'
union
SELECT '111', '888', '08-01-2014', '08-31-2014'
union
SELECT '111', '777', '08-15-2014', '08-31-2014'
union
SELECT '222', '999', '01-01-2014', '03-31-2014'
union
SELECT '222', '999', '04-01-2014', '06-30-2014'
union
SELECT '222', '999', '07-01-2014', '09-30-2014'
union
SELECT '222', '999', '10-01-2014', '12-31-2014'
union
SELECT '222', '666', '11-01-2014', '11-30-2014'
UNION
SELECT '222', '555', '11-15-2014', '11-30-2014';
As you can see there can be multiple overlaps for each contract and what I would like to have is the result like this
Contract Sector StartDate EndDate
---------------------------------------------
111 999 01-01-2014 07-31-2014
111 888 08-01-2014 08-14-2014
111 777 08-15-2014 08-31-2014
111 999 09-01-2014 12-31-2014
222 999 01-01-2014 10-31-2014
222 666 11-01-2014 11-14-2014
222 555 11-15-2014 11-30-2014
222 999 12-01-2014 12-31-2014
I can not figure out how this can be done and the examples i have seen on this site quite do not fit my problem.
This answer makes use of a few different techniques. The first is a recursive-cte that creates a table with every relevant cal_date which then gets cross apply'd with unique Contract values to get every combination of both values. The second is window-functions such as lag and row_number to determine a variety of things detailed in the comments below. Lastly, and probably most importantly, gaps-and-islands to determine when one Contract/Sector combination ends and the next begins.
Answer:
--determine range of dates
declare #bgn_dt date = (select min(StartDate) from DateRanges)
, #end_dt date = (select max(EndDate) from DateRanges)
--use a recursive CTE to create a record for each day / Contract
; with dates as
(
select #bgn_dt as cal_date
union all
select dateadd(d, 1, a.cal_date) as cal_date
from dates as a
where a.cal_date < #end_dt
)
select d.cal_date
, c.Contract
into #contract_dates
from dates as d
cross apply (select distinct Contract from DateRanges) as c
option (maxrecursion 0)
--Final Select
select f.Contract
, f.Sector
, min(f.cal_date) as StartDate
, max(f.cal_date) as EndDate
from (
--Use the sum-over to obtain the Island Numbers
select dr.Contract
, dr.Sector
, dr.cal_date
, sum(dr.IslandBegin) over (partition by dr.Contract order by dr.cal_date asc) as IslandNbr
from (
--Determine if the record is the start of a new Island
select a.Contract
, a.Sector
, a.cal_date
, case when lag(a.Sector, 1, NULL) over (partition by a.Contract order by a.cal_date asc) = a.Sector then 0 else 1 end as IslandBegin
from (
--Determine which Contract/Date combinations are valid, and rank the Sectors that are in effect
select cd.cal_date
, dr.Contract
, dr.Sector
, dr.EndDate
, row_number() over (partition by dr.Contract, cd.cal_date order by dr.StartDate desc) as ConractSectorRnk
from #contract_dates as cd
left join DateRanges as dr on cd.Contract = dr.Contract
and cd.cal_date between dr.StartDate and dr.EndDate
) as a
where a.ConractSectorRnk = 1
and a.Contract is not null
) as dr
) as f
group by f.Contract
, f.Sector
, f.IslandNbr
order by f.Contract asc
, min(f.cal_date) asc
Output:
+----------+--------+------------+------------+
| Contract | Sector | StartDate | EndDate |
+----------+--------+------------+------------+
| 111 | 999 | 2014-01-01 | 2014-07-31 |
| 111 | 888 | 2014-08-01 | 2014-08-14 |
| 111 | 777 | 2014-08-15 | 2014-08-31 |
| 111 | 999 | 2014-09-01 | 2014-12-31 |
| 222 | 999 | 2014-01-01 | 2014-10-31 |
| 222 | 666 | 2014-11-01 | 2014-11-14 |
| 222 | 555 | 2014-11-15 | 2014-11-30 |
| 222 | 999 | 2014-12-01 | 2014-12-31 |
+----------+--------+------------+------------+
Related
Matthew earns $600 in three days. And each day how he should earn should be split into three different rows.
RDBMS is SQL Server.
id name start_date end_date Total_Dollars
---------------------------------------------------
1 Mathew 01/01/2021 03/01/2021 600
Output should be
id name start_date end_date Total_Dollars
--------------------------------------------------
1 Rahul 01/01/2021 01/01/2021 200
1 Rahul 02/01/2021 02/01/2021 200
1 Rahul 03/01/2021 03/01/2021 200
If you have a calendar table, use that:
WITH
-- need a calendar table with one row per calendar date
cal (dt) AS (
SELECT DATE '2021-01-01'
UNION ALL SELECT DATE '2021-01-02'
UNION ALL SELECT DATE '2021-01-03'
UNION ALL SELECT DATE '2021-01-04'
UNION ALL SELECT DATE '2021-01-05'
UNION ALL SELECT DATE '2021-01-06'
UNION ALL SELECT DATE '2021-01-07'
)
,
-- your input ...
indata(id,nam,start_date,end_date,total_dollars) AS (
SELECT 1,'Mathew',DATE '2021-01-01',DATE '2021-01-03',600
)
-- real query starts here, replace following comma with "WITH" ...
,
daycount(daycount) AS (
SELECT COUNT(*) FROM cal JOIN indata ON dt BETWEEN start_date AND end_date
)
SELECT
id
, nam
, dt AS start_date
, dt AS end_date
, total_dollars / daycount AS total_dollars
FROM cal
JOIN indata ON dt BETWEEN start_date AND end_date
CROSS JOIN daycount;
-- out id | nam | start_date | end_date | total_dollars
-- out ----+--------+------------+------------+---------------
-- out 1 | Mathew | 2021-01-01 | 2021-01-01 | 200
-- out 1 | Mathew | 2021-01-02 | 2021-01-02 | 200
-- out 1 | Mathew | 2021-01-03 | 2021-01-03 | 200
Or, also:
SELECT
id
, nam
, dt AS start_date
, dt AS end_date
, total_dollars // count(*) OVER(PARTITION BY id) AS total_dollars
FROM cal
JOIN indata ON dt BETWEEN start_date AND end_date
-- out id | nam | start_date | end_date | total_dollars
-- out ----+--------+------------+------------+---------------
-- out 1 | Mathew | 2021-01-01 | 2021-01-01 | 200
-- out 1 | Mathew | 2021-01-02 | 2021-01-02 | 200
-- out 1 | Mathew | 2021-01-03 | 2021-01-03 | 200
You may use a recursive query as the following:
WITH CTE AS
(
SELECT id, name, start_date SDT, end_date, Total_Dolllars
FROM T
UNION ALL
SELECT id, name, DATEADD(DAY, 1,SDT), end_date, Total_Dolllars
FROM CTE
WHERE DATEADD(DAY, 1,SDT) <= end_date
)
SELECT id, name, SDT start_date, SDT end_date,
Total_Dolllars *1.00 / COUNT(*) OVER (PARTITION BY id) Total_Dolllars
FROM CTE
ORDER BY ID, SDT;
See a demo.
This is for SQL Server 2012: a subset of the data in my CTE looks like this:
Employee | OrderID | OrderType
---------+---------+----------
Kala | 321111 | 953
Paul | 321222 | 1026
Don | 321333 | 1026
Don | 321333 | 953
Kala | 321444 | 953
I'd like the following result:
Employee | 953_Order_Count | 1026_Order_Count
---------+-----------------+-----------------
Kala | 2 | 0
Don | 1 | 1
Paul | 0 | 1
To validate that I want is possible in my mind, when I run:
SELECT
Employee,
OrderType,
COUNT(DISTINCT OrderID) AS 'Count'
FROM
CTE
GROUP BY
employee, ordertype
The following result is returned:
Employee | OrderType | Count
---------+-----------+------
Kala | 953 | 1
Paul | 1026 | 1
Don | 1026 | 1
Don | 953 | 1
Close, but not close enough. So I run:
SELECT
Employee,
COUNT(DISTINCT OrderID) AS 'Total_Orders',
COUNT(DISTINCT (CASE WHEN OrderType = 1026 THEN OrderID END)) AS '1026_Order_Count',
COUNT(DISTINCT(CASE WHEN OrderType = 953 THEN OrderID END)) AS '953_Order_Count'
FROM
CTE
GROUP BY
Employee
The result is an accurate first "count," but the rest return 0. If this were not a CTE, I'd use a recursive statement.
Any help is appreciated!
Just use conditional aggregation:
SELECT
Employee,
COUNT(CASE WHEN OrderType = 953 THEN 1 END) AS [953_Order_Count],
COUNT(CASE WHEN OrderType = 1026 THEN 1 END) AS [1026_Order_Count]
FROM CTE
GROUP BY
Employee;
Demo
The 953 count, for example, works above by counting 1 when the order type is 953 and NULL (the implicit ELSE value) when the order type is not 953. COUNT ignores NULL by default, so it only counts the 953 orders.
Tim's answer looks fine. You could also use a PIVOT:
; with cte (Employee, OrderID, OrderType)
as
(
select 'Kala', 321111, 953
union select 'Paul', 321222, 1026
union select 'Don', 321333, 1026
union select 'Don', 321333, 953
union select 'Kala', 321444, 953
)
select Employee, [953] as [953_Order_Count],[1026] as [1026_Order_Count]
from
(
select Employee, OrderType from cte ) as sourceData
pivot
(
count(OrderType)
for OrderType
in ([953],[1026])
) as myPivot
If you want to have dynamic columns based on the set of available values in the OrderType column, you can build the query dynamically. See #Taryn's answer to Understanding PIVOT function in T-SQL for an example.
I have a table with ClaimNumber, NoteCreateDate, NoteType
I wanted to find the claims that has notes like 'Review sent' back to back with no gap in the NoteCreateDate
eg.
+-------------+----------------+--------------------+----------+
| ClaimNumber | NoteCreateDate | Notes | NoteType |
+-------------+----------------+--------------------+----------+
| 12121 | 12/01/2017 | ReviewSent | Subject |
| 12121 | 12/05/2017 | PackagesenttoABC | Details |
| 12121 | 12/07/2017 | ReviewSent | Subject |
| 10005 | 05/06/2018 | ReviewSent | Subject |
| 10005 | 05/07/2018 | ReviewSent | Subject |
| 10005 | 05/08/2018 | ReviewSent | Subject |
| 10005 | 05/12/2018 | Fieldinvestigation | SIU |
+-------------+----------------+--------------------+----------+
Expected
From this example I wanted only the claim number 10005 since it the claim that has the notes 'Review Sent' back to back with no gap in the date(consecutive Dates). For instance, I wanted to find the claims that has the note with phrase 'ReviewSent' created today and the following note should also be the same 'Review sent', no matter when it was created, May be the next day or even 10 days later .. Thanks in Advance
My current MS SQL query.
select cm.ClaimNum, a.NoteCreateDate, a.Notes, a.NoteType
from CMaster cm
left join Note a on cm.ClaimNum = a.PARENTREF
left join NoteType] b on b.ID = a.TYPECODE
where Body like '%Review Sent%'
So if you want to find the ClaimNumber(s) where the ReviewSent notes are sent back-2-back then this query will get them
CREATE TABLE T1
([ClaimNumber] int, [NoteCreateDate] datetime, [Notes] varchar(18), [NoteType] varchar(7))
;
INSERT INTO T1
([ClaimNumber], [NoteCreateDate], [Notes], [NoteType])
VALUES
(12121, '2017-12-01 00:00:00', 'ReviewSent', 'Subject'),
(12121, '2017-12-05 00:00:00', 'PackagesenttoABC', 'Details'),
(12121, '2017-12-07 00:00:00', 'ReviewSent', 'Subject'),
(10005, '2018-05-06 00:00:00', 'ReviewSent', 'Subject'),
(10005, '2018-05-07 00:00:00', 'ReviewSent', 'Subject'),
(10005, '2018-05-08 00:00:00', 'ReviewSent', 'Subject'),
(10005, '2018-05-12 00:00:00', 'Fieldinvestigation', 'SIU')
;
SELECT DISTINCT X.ClaimNumber
FROM(
SELECT
ClaimNumber
,NoteCreateDate
,Notes
,NoteType
,LD=LEAD( Notes )OVER(PARTITION BY ClaimNumber ORDER BY NoteCreateDate ASC)
FROM dbo.T1
) X WHERE LD= 'ReviewSent' AND X.Notes= 'ReviewSent'
DROP TABLE dbo.T1
Result
ClaimNumber
10005
Select * into #tmp
from
(
Select 12121 as ClaimNumber,'12/01/2017' as
NoteCreateDate,'ReviewSent' as Notes,'Subject' as NoteType
union
Select 12121 , '12/05/2017' , 'PackagesenttoABC' , 'Details'
union
Select 12121 , '12/07/2017' , 'ReviewSent' ,
'Subject'
union
Select 10005 , '05/06/2018' , 'ReviewSent' , 'Subject'
union
Select 10005 , '05/07/2018' , 'ReviewSent' ,
'Subject'
union
Select 10005 , '05/08/2018' , 'ReviewSent' , 'Subject'
union
Select 10005 , '05/12/2018' , 'Fieldinvestigation' , 'SIU'
) t
/*create another temp table to simplify the query*/
Select * , ROW_NUMBER() OVER (PARTITION BY ClaimNumber ORDER BY
ClaimNumber)
as ClaimCounter into #tmp2 from #tmp
/** finally get the claimnos that have consecutive NoteCreate Date **/
SELECT distinct t1.ClaimNumber FROM #tmp2 t1
inner join #tmp2 t2 on (t1.ClaimCounter = t2.ClaimCounter - 1 and
t1.ClaimNumber = t2.ClaimNumber)
where
DATEDIFF(DAY,t1.NoteCreateDate,t2.NoteCreateDate) = 1
and
t1.Notes ='ReviewSent' and t2.Notes ='ReviewSent'
The audit table looks like this:
Audit ID VendorID PaymentType CreateDateUTC
999 8048 2 2017-10-30-08:84:24
1000 1234 5 2017-10-31-01:17:34
1001 8048 7 2017-10-31-01:17:45
1002 1234 5 2017-10-31-01:17:53
1003 1234 7 2017-10-31-01:18:23
1004 1234 5 2017-11-01-01:18:45
In this example, you can see that say - VendorID 1234 started as PaymentType 5, then had another entry where it's still 5 (the audit table records additional changes not relevant to my query), then it changes to 7, but then back to 5.
Say I'd want to answer the question: 'Between now and date X, these VendorIDs had a change in PaymentType'. A bonus would be - this was the previous PaymentType.
Expected Results:
VendorID PaymentType Prev_PaymentType
8048 7 2
So say if I queried between now and 10-31-01:00:00, I'd want it to return VendorID 8048 as having changed (and as a bonus, that it's previous PaymentType was 2), but VendorID 1234 shouldn't show up, since at 2017-10-31-01:00:00 it was a 5, and now is still a 5, despite the intermittent changes.
How would one go about querying the VendorIDs whose payment type changed between 2 dates?
Thanks!
Here is an alternative approach that my prove useful, using OUTER APPLY. Note that the AuditID column is used as a tie-breaker mostly because the sample data does not have datetime values.
SQL Fiddle
CREATE TABLE AuditTable (
AuditID int
, VendorID int
, PaymentType int
, CreateDateUTC date
);
INSERT INTO AuditTable
VALUES (999, 8048, 2, '2017-10-30'),
(1000, 1234, 5, '2017-10-31'),
(1001, 8048, 7, '2017-10-31'),
(1002, 1234, 5, '2017-10-31'),
(1003, 1234, 7, '2017-10-31'),
(1004, 1234, 5, '2017-11-01');
Query 1:
select
*
from AuditTable a
outer apply (
select top(1) PaymentType, CreateDateUTC
from AuditTable t
where a.VendorID = t.VendorID
and a.CreateDateUTC >= t.CreateDateUTC
and a.AuditID > t.AuditID
order by CreateDateUTC DESC, AuditID DESC
) oa (PrevPaymentType, PrevDate)
order by
vendorid
, CreateDateUTC
Results:
| AuditID | VendorID | PaymentType | CreateDateUTC | PrevPaymentType | PrevDate |
|---------|----------|-------------|---------------|-----------------|------------|
| 1000 | 1234 | 5 | 2017-10-31 | (null) | (null) |
| 1002 | 1234 | 5 | 2017-10-31 | 5 | 2017-10-31 |
| 1003 | 1234 | 7 | 2017-10-31 | 5 | 2017-10-31 |
| 1004 | 1234 | 5 | 2017-11-01 | 7 | 2017-10-31 |
| 999 | 8048 | 2 | 2017-10-30 | (null) | (null) |
| 1001 | 8048 | 7 | 2017-10-31 | 2 | 2017-10-30 |
CREATE TABLE AuditTable (
AuditID INT,
VendorID INT,
PaymentType INT,
CreateDateUTC DATE
);
INSERT INTO AuditTable VALUES
(999 , 8048, 2, '2017-10-30'),
(1000, 1234, 5, '2017-10-31'),
(1001, 8048, 7, '2017-10-31'),
(1002, 1234, 5, '2017-10-31'),
(1003, 1234, 7, '2017-10-31'),
(1004, 1234, 5, '2017-11-01');
WITH CTE AS (
SELECT *,
ROW_NUMBER () OVER (PARTITION BY CreateDateUTC ORDER BY PaymentType) AS N1
FROM AuditTable
WHERE CreateDateUTC <= '2017-11-02' AND CreateDateUTC >= '2017-10-01'
) ,
MAXP AS(
SELECT VendorID, PaymentType, CreateDateUTC
FROM CTE
WHERE N1 = (SELECT MAX(N1) FROM CTE)
)
SELECT TOP 1 MAXP.VendorID, MAXP.PaymentType AS PaymentType, CTE.PaymentType AS Prev_PaymentType
FROM MAXP
JOIN CTE ON CTE.VendorID = MAXP.VendorID;
Result:
+----------+-------------+------------------+
| VendorID | PaymentType | Prev_PaymentType |
+----------+-------------+------------------+
| 8048 | 7 | 2 |
+----------+-------------+------------------+
Demo
Here is a variant without using LEAD() or LAG() but does use ROW_NUMBER and COUNT() OVER().
See this verision work at:SQL Fiddle
CREATE TABLE AuditTable (
AuditID int
, VendorID int
, PaymentType int
, CreateDateUTC date
);
INSERT INTO AuditTable
VALUES (999, 8048, 2, '2017-10-30'),
(1000, 1234, 5, '2017-10-31'),
(1001, 8048, 7, '2017-10-31'),
(1002, 1234, 5, '2017-10-31'),
(1003, 1234, 7, '2017-10-31'),
(1004, 1234, 5, '2017-11-01');
Query 1:
WITH
rowz AS (
SELECT *
, ROW_NUMBER() OVER (PARTITION BY VendorID
ORDER BY CreateDateUTC, AuditID) AS lagno
FROM AuditTable
),
cte AS (
SELECT *
, ROW_NUMBER() OVER (PARTITION BY VendorID, CreateDateUTC
ORDER BY c DESC, span_dt) rn
FROM (
SELECT r1.AuditID, r1.VendorID, r1.CreateDateUTC
, r1.PaymentType AS prevpaymenttype
, r2.PaymentType
, COALESCE(r2.CreateDateUTC, CAST(GETDATE() AS date)) span_dt
, COUNT(*) OVER (PARTITION BY r1.VendorID, r1.CreateDateUTC, r1.PaymentType) c
FROM rowz r1
LEFT JOIN rowz r2 ON r1.VendorID = r2.VendorID
AND r1.lagno = r2.lagno - 1
) d
)
SELECT
AuditID, VendorID, PrevPaymentType, PaymentType, CreateDateUTC
FROM (
SELECT
*
FROM cte
WHERE ('20171031' BETWEEN CreateDateUTC AND span_dt AND rn = 1)
OR (CAST(GETDATE() AS date) BETWEEN CreateDateUTC AND span_dt AND rn = 1)
) d
WHERE PaymentType <> PrevPaymentType
Results:
| AuditID | VendorID | PrevPaymentType | PaymentType | CreateDateUTC |
|---------|----------|-----------------|-------------|---------------|
| 999 | 8048 | 2 | 7 | 2017-10-30 |
I currently have the following table:
+-----+-----------------------------+------------------------------+
| ID | StartDate | EndDate |
+-----+-----------------------------+------------------------------|
| 1 | 2017-07-24 08:00:00.000 | 2017-07-29 08:00:00.000 |
| 2 | 2017-07-25 08:00:00.000 | 2017-07-28 08:00:00.000 |
| 3 | 2017-07-25 08:00:00.000 | 2017-07-26 08:00:00.000 |
+-----+-----------------------------+------------------------------+
I would like to know the count of the ID's that were not Closed on each date.
So for example, I wan't to know the count of open ID's on 2017-07-26 00:00:00.000. This would be all 3 in this case.
Another example: I wan't to know the count of open ID's on 2017-07-29 00:00:00.000. Which would be result to 1. Only ID=1 is Not yet closed at that date.
I have tried using another solution here on StackOverflow, but I can't quite figure why it is giving me false results.
declare #dt date, #dtEnd date
set #dt = getdate()-7
set #dtEnd = dateadd(day, 100, #dt);
WITH CTEt1 (SupportCallID, StartDate, EndDate, Onhold)
as
(SELECT SupportCallID
,OpenDate
,MAX(CASE WHEN StatusID IN('19381771-8E81-40C5-8E36-62A7DB0A2A99', '95C7A5FB-2389-4D14-9DAE-A08BFCC3B09A', 'D5429790-3B43-4462-9E1E-2466EA29AC74') then CONVERT(DATE, LastChangeDate) end) EndDate
,OnHold
FROM [ClienteleITSM_Prod_Application].[dbo].[SupportCall]
group by SupportCallID, OpenDate, OnHold
)
SELECT dates.myDate,
(SELECT COUNT(*)
FROM CTEt1
WHERE myDate BETWEEN StartDate and EndDate
)
FROM
(select dateadd(day, number, #dt) mydate
from
(select distinct number from master.dbo.spt_values
where name is null
) n
where dateadd(day, number, #dt) < #dtEnd) dates
If you use a cte to create a table of dates that span the range of dates in your source table, you can easily left join from that to your source table and count up the rows returned:
declare #t table(ID int,StartDate datetime,EndDate datetime);
insert into #t values (1,'2017-07-24 08:00:00.000','2017-07-29 08:00:00.000'),(2,'2017-07-25 08:00:00.000','2017-07-28 08:00:00.000'),(3,'2017-07-25 08:00:00.000','2017-07-26 08:00:00.000');
declare #StartDate datetime = (select min(StartDate) from #t);
declare #EndDate datetime = (select max(EndDate) from #t);
-- Table with 10 rows in to be joined together to create a large tally table (10 * 10 * 10 * etc)
with t(t) as (select t from (values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1))t(t))
-- Add the row_number of the tally table to your start date to generate all dates within your data range
,d(d) as (select top(datediff(d,#StartDate,#EndDate)+1) dateadd(d,row_number() over (order by (select null))-1,#StartDate) from t t1,t t2,t t3)
select d.d
,count(t.ID) as OpenIDs
from d
left join #t as t
on(d.d between cast(t.StartDate as date) and t.EndDate)
group by d.d
order by d.d;
Output:
+-------------------------+---------+
| d | OpenIDs |
+-------------------------+---------+
| 2017-07-24 08:00:00.000 | 1 |
| 2017-07-25 08:00:00.000 | 3 |
| 2017-07-26 08:00:00.000 | 3 |
| 2017-07-27 08:00:00.000 | 2 |
| 2017-07-28 08:00:00.000 | 2 |
| 2017-07-29 08:00:00.000 | 1 |
+-------------------------+---------+