Filtering consecutive dates in SQL Server - sql-server

I would like to filter consecutive dates from my holiday table if any of the consecutive date is yesterday. Below is my code to get consecutive dates with row numbers.
SELECT
RW = ROW_NUMBER() OVER( PARTITION BY GRP ORDER BY HolidayDate),
HolidayDate
FROM
(SELECT
HolidayDate,
DATEDIFF(Day, '1900-01-01', HolidayDate) - ROW_NUMBER() OVER (ORDER BY HolidayDate) AS GRP,
HolidayType
FROM
Holiday) A
ORDER BY
HolidayDate
For example, yesterday's date is 03/14/2017. I need to get data from the Holiday table if there are any consecutive holidays involving this date.
Output should be null if there is no match else it should display dates as below
03/12/2017
03/13/2017
03/14/2017

Well, you are almost there!
Define "yesterday" as a parameter, since it is easy to find.
CREATE TABLE Holiday(
HolidayDate SMALLDATETIME PRIMARY KEY,
HolidayType VARCHAR(20)
);
INSERT INTO Holiday VALUES
('2016-12-25', 'A'), ('2016-12-26', 'A'), ('2017-01-01', 'A'),
('2017-04-12', 'A'), ('2017-04-13', 'A'), ('2017-04-14', 'A');
DECLARE #yesterday SMALLDATETIME;
SET #yesterday = '2017-04-13'; -- belongs to a group of consecutive holidays
-- SET #yesterday = '2017-01-01'; -- isolated holiday
-- SET #yesterday = '2017-02-10'; -- not a holiday
; WITH groups as
(
SELECT
HolidayDate,
DATEDIFF(Day, '1900-01-01', HolidayDate)
- ROW_NUMBER() OVER (ORDER BY HolidayDate) AS GRP,
HolidayType
FROM Holiday
),
consecutive as (
SELECT GRP FROM groups GROUP BY GRP HAVING COUNT(*) > 1
)
SELECT DISTINCT g2.HolidayDate, g2.HolidayType
FROM consecutive c
JOIN groups g1 ON c.GRP = g1.GRP
JOIN groups g2 ON g1.GRP = g2.GRP
WHERE g1.HolidayDate = #yesterday;

Related

Build timeline from start and end dates

I have a subscription table with a user ID, a subscription start date and a subscription end date. I also have a calendar table with a datestamp field, that is every single date starting from the first subscription date in my subscription table.
I am trying to write something that would give me a table with a date column and three numbers: number of total active (on that day), number of new subscribers, number of unsubscribers.
(N.B. I tried to insert sample tables using the suggested GitHub Flavoured Markdown but it just all goes into one row.)
Currently I am playing with a query that creates multiple joins between the two tables, one for each number:
select a.datestamp
,count(distinct case when b_sub.UserID is not null then b_sub.UserID end) as total_w_subscription
,count(distinct case when b_in.UserID is not null then b_in.UserID end) as total_subscribed
,count(distinct case when b_out.UserID is not null then b_out.UserID end) as total_unsubscribed
from Calendar as a
left join Subscription as b_sub -- all those with subscription on given date
on b_sub.sub_dt <= a.datestamp
and (b_sub.unsub_dt > a.datestamp or b_sub.unsub_dt is null)
left join Subscription as b_in -- all those that subscribed on given date
on b_in.sub_dt = a.datestamp
left join Subscription as b_out -- all those that unsubscribed on given date
on b_out.unsub_dt = a.datestamp
where a.datestamp > '2021-06-10'
group by a.datestamp
order by datestamp asc
;
I have indexed the date fields in both tables. If I only look at one day, it runs in 3 seconds. Two days already takes forever. The Sub table is over 2.6M records and ideally I'll need my timeline to begin sometime in 2012.
What would be the most time efficient way to do this?
You're on the right track. I created some table variables and assumed a data structure that has each subscription include a start and end date.
--Create #dates table variable for calendar
DECLARE #startDate DATETIME = '2018-01-01'
DECLARE #endDate DATETIME = '2021-06-18'
DECLARE #dates TABLE
(
reportingdate DATETIME
)
WHILE #startDate <= #endDate
BEGIN
INSERT INTO #dates SELECT #startDate
SET #startDate += 1
END
--Create #subscriptions table variable for subcriptions to join onto calendar
DECLARE #subscriptions TABLE
(
id INT
,startDate DATETIME
,endDate DATETIME
)
INSERT INTO #subscriptions
VALUES
(1,'2018-01-01 00:00:00.000','2019-10-07 00:00:00.000')
,(2,'2018-01-11 00:00:00.000','2019-12-21 00:00:00.000')
,(3,'2019-04-21 00:00:00.000','2020-03-19 00:00:00.000')
,(4,'2019-12-09 00:00:00.000','2020-05-14 00:00:00.000')
,(5,'2020-04-26 00:00:00.000','2020-07-06 00:00:00.000')
,(6,'2020-05-02 00:00:00.000',NULL)
,(7,'2020-08-31 00:00:00.000','2020-10-29 00:00:00.000')
,(8,'2020-12-13 00:00:00.000','2021-01-13 00:00:00.000')
,(9,'2021-02-12 00:00:00.000','2021-04-19 00:00:00.000')
,(10,'2021-06-10 00:00:00.000',NULL)
;
Then I join the subscription onto the calendar table.
--CTE to join subscription onto calendar and use ROW_NUMBER functions
WITH cte AS (
SELECT
s.id AS SubID
,d.ReportingDate
,ROW_NUMBER() OVER (PARTITION BY s.id ORDER BY d.ReportingDate) AS asc_rn --used to identify 1st
,ROW_NUMBER() OVER (PARTITION BY s.id ORDER BY d.ReportingDate DESC) AS desc_rn --used to identify last
,CASE WHEN s.endDate IS NULL THEN 1 ELSE 0 END AS ActiveSub
FROM #subscriptions s
LEFT JOIN #dates d ON
d.reportingdate BETWEEN s.startDate AND ISNULL(s.endDate,'9999-12-31')
)
I used ROW_NUMBER to identify the first and last date rows of the subscription, as well as checking if the subscription endDate is NULL (still active). I then query the CTE to count subscriptions grouped by day, as well as summing new and terminated subscriptions grouped by day.
--Query CTE using asc_rn, desc_rn, and ActiveSub to identify new subscribers and unsubscribers.
SELECT
ReportingDate
,COUNT(*) AS TotalSubscribers
,SUM(CASE WHEN asc_rn = 1 THEN 1 ELSE 0 END) AS NewSubscribers
,SUM(CASE WHEN desc_rn = 1 AND ActiveSub = 0 THEN 1 ELSE 0 END) AS UnSubscribers
FROM cte
GROUP BY ReportingDate
ORDER BY ReportingDate

Multi - Columns OVERLAPPING DATES

;with cte as (
select Domain_Id, Starting_Date, End_Date
from Que_Date
union all
select t.Domain_Id, cte.Starting_Date, t.End_Date
from cte
join Que_Date t on cte.Domain_Id = t.Domain_Id and cte.End_Date = t.Starting_Date),
cte2 as (
select *, rn = row_number() over (partition by Domain_Id, End_Date order by Domain_Id)
from cte
)
select DISTINCT Domain_Id, Starting_Date, max(End_Date) enddate
from cte2
where rn=1
group by Domain_Id, Starting_Date
order by Domain_Id, Starting_Date;
select * from Que_Date
This is the code that I have wrote but i am getting an extra row i.e 2nd row is extra, the expected output should have only 1st, 3rd and 4th row as output so please help me with it.
I have attached an image showing Input, Excepted Output, and the output that I am getting.
You've got so many results in your first cte. Your first cte has consisting domains. So you cannot filter domains based on your cte. So you query has unnecessary rows.
Try this solution. Cte ConsistentDomains has just consistent domains. So based on this cte, we can get not overlapped results.
Create and fill data:
CREATE TABLE FooTable
(
Domain_ID INT,
Starting_Date DATE,
End_Date Date
)
INSERT INTO dbo.FooTable
(
Domain_ID,
Starting_Date,
End_Date
)
VALUES
( 1, -- Domain_ID - int
CONVERT(datetime,'01-01-2011',103), -- Starting_Date - date
CONVERT(datetime,'05-01-2011',103) -- End_Date - date
)
, (1, CONVERT(datetime,'05-01-2011',103), CONVERT(datetime,'07-01-2011',103))
, (1, CONVERT(datetime,'07-01-2011',103), CONVERT(datetime,'15-01-2011',103))
, (2, CONVERT(datetime,'11-05-2011',103), CONVERT(datetime,'12-05-2011',103))
, (2, CONVERT(datetime,'13-05-2011',103), CONVERT(datetime,'14-05-2011',103))
Query to find not overlapping results:
DECLARE #startDate varchar(50) = '2011-01-01';
WITH ConsistentDomains AS
(
SELECT
f.Domain_ID
, f.Starting_Date
, f.End_Date
FROM FooTable f
WHERE f.Starting_Date = #startDate
UNION ALL
SELECT
s.Domain_ID
, s.Starting_Date
, s.End_Date
FROM FooTable s
INNER JOIN ConsistentDomains cd
ON s.Domain_ID = cd.Domain_ID
AND s.Starting_Date = cd.End_Date
), ConsistentDomainsRownumber AS
(
SELECT
cd.Domain_ID
, cd.Starting_Date
, cd.End_Date
, ROW_NUMBER() OVER (PARTITION BY cd.Domain_ID ORDER BY cd.Starting_Date,
cd.End_Date) RN
FROM ConsistentDomains cd
)
SELECT cd.Domain_ID
, convert(varchar, cd.Starting_Date, 105) Starting_Date
, convert(varchar, cd.End_Date, 105) End_Date
FROM ConsistentDomainsRownumber cd WHERE cd.RN = 1
UNION ALL
SELECT
ft.Domain_ID
, convert(varchar, ft.Starting_Date, 105) Starting_Date
, convert(varchar, ft.End_Date, 105) End_Date
FROM dbo.FooTable ft WHERE ft.Domain_ID NOT IN (SELECT cd.Domain_ID FROM
ConsistentDomainsRownumber cd)
Output:
I used the same table creating script as provided by #stepup, but you can also get your outcome in this way.
CREATE TABLE testtbl
(
Domain_ID INT,
Starting_Date DATE,
End_Date Date
)
INSERT INTO testtbl
VALUES
(1, convert(date, '01-01-2011' ,103), convert(date, '05-01-2011',103) )
,(1, convert(date, '05-01-2011' ,103), convert(date, '07-01-2011',103) )
,(1, convert(date, '07-01-2011' ,103), convert(date, '15-01-2011',103) )
,(2, convert(date, '11-05-2011' ,103), convert(date, '12-05-2011',103) )
,(2, convert(date, '13-05-2011' ,103), convert(date, '14-05-2011',103) )
You can make use of self join and Firs_value and last value within the group to make sure that you are comparing within the same ID and overlapping dates.
select distinct t.Domain_ID,
case when lag(t1.starting_date)over (partition by t.Domain_id order by
t.starting_date) is not null
then first_value(t.Starting_Date) over (partition by t.domain_id order by
t.starting_date)
else t.Starting_Date end StartingDate,
case when lead(t.domain_id) over (partition by t.domain_id order by t.starting_date) =
t1.Domain_ID then isnull(last_value(t.End_Date) over (partition by t.domain_id order by t.end_date rows between unbounded preceding and unbounded following),t.End_Date)
else t.End_Date end end_date
from testtbl t
left join testtbl t1 on t.Domain_ID = t1.Domain_ID
and t.End_Date = t1.Starting_Date
and t.Starting_Date < t1.Starting_Date
Output:
Domain_ID StartingDate end_date
1 2011-01-01 2011-01-15
2 2011-05-11 2011-05-12
2 2011-05-13 2011-05-14

How To Count Rows Based on Values of Two Variables in SSIS

I am fairly new to SSIS, and now I have this requirement to exclude weekends in order to do a performance management. Now I have created a calendar and marked the weekends; what I am trying to do, using SSIS, is get the start and end date of every status and count how many weekends are there. I am kind of struggling to know which component to use to achieve this task.
So I have mainly two tables:
1- Table Calendar
2- Table History-Log
Calendar has the following columns:
1- ID
2- date
3- year
4- month
5- day of week
6- isweekend
History-Log has the following:
1- ID
2- Status
3- startdate
4- enddate
Your help is really appreciated.
I'm not an SSIS user, so apologies if this answer does not help, but if I wanted to get the result you describe, based on some test data:
DECLARE #Calendar TABLE (
ID INT,
[Date] DATETIME,
[Year] INT,
[Month] INT,
[DayOfWeek] VARCHAR(10),
IsWeekend BIT
)
DECLARE #HistoryLog TABLE (
ID INT,
[Status] INT,
StartDate DATETIME,
EndDate DATETIME
)
DECLARE #StartDate DATE = '20100101', #NumberOfYears INT = 10
DECLARE #CutoffDate DATE = DATEADD(YEAR, #NumberOfYears, #StartDate);
INSERT INTO #Calendar
SELECT ROW_NUMBER() OVER (ORDER BY d) AS ID,
d AS [Date],
DATEPART(YEAR,d) AS [Year],
DATEPART(MONTH,d) AS [Month],
DATENAME(WEEKDAY,d) AS [DayOfWeek],
CASE WHEN DATENAME(WEEKDAY,d) IN ('Saturday','Sunday') THEN 1 ELSE 0 END AS IsWeekend
FROM
(
SELECT d = DATEADD(DAY, rn - 1, #StartDate)
FROM
(
SELECT TOP (DATEDIFF(DAY, #StartDate, #CutoffDate))
rn = ROW_NUMBER() OVER (ORDER BY s1.[object_id])
FROM sys.all_objects AS s1
CROSS JOIN sys.all_objects AS s2
ORDER BY s1.[object_id]
) AS x
) AS y;
INSERT INTO #HistoryLog
SELECT 1, 3, '2016-01-05', '2016-01-20'
UNION
SELECT 2, 7, '2016-01-08', '2016-01-25'
UNION
SELECT 3, 4, '2016-01-01', '2016-02-03'
UNION
SELECT 4, 3, '2016-02-09', '2016-02-10'
I would use a query like this to return all of the HistoryLog records with a count of the number of weekend days between their StartDate and EndDate:
SELECT h.ID,
h.[Status],
h.StartDate,
h.EndDate,
COUNT(c.ID) AS WeekendDays
FROM #HistoryLog h
LEFT JOIN #Calendar c ON c.[Date] >= h.StartDate AND c.[Date] <= h.EndDate AND c.IsWeekend = 1
GROUP BY h.ID, h.[Status], h.StartDate, h.EndDate
ORDER BY 1
If you wanted to know the number of weekends, rather than the number of weekend days, we'd need to slightly amend this logic (and define how a range containing only one weekend day - or one starting on a Sunday and ending on a Saturday inclusive - should be handled). Assuming you just want to know how many distinct weekends are at least partially within the date range, you could do:
SELECT h.ID,
h.[Status],
h.StartDate,
h.EndDate,
COUNT(weekends.ID) AS Weekends
FROM #HistoryLog h
LEFT JOIN
(
SELECT c.ID,
c.[Date] AS SatDate,
DATEADD(DAY,1,c.[Date]) AS SunDate
FROM #Calendar c
WHERE c.[DayOfWeek] = 'Saturday'
) weekends ON h.StartDate BETWEEN weekends.SatDate AND weekends.SunDate
OR h.EndDate BETWEEN weekends.SatDate AND weekends.SunDate
OR (h.StartDate <= weekends.SatDate AND h.EndDate >= weekends.SunDate)
GROUP BY h.ID, h.[Status], h.StartDate, h.EndDate

Count the number of times a date is contained between 2 date columns

I have a table that looks like this
ID start_dt end_dt
--------------------------
1 1951-12-05 1951-12-21
2 1951-12-19 1951-12-31
3 1957-12-05 1957-12-19
4 1995-12-06 1995-12-20
5 1996-06-24 1996-07-08
6 1997-05-12 1997-05-26
7 1997-10-07 1997-10-21
8 1997-12-25 1998-01-08
9 1998-01-19 1998-02-02
10 1998-08-05 1998-08-19
I'd like to know how many times each individual date is contained between start_dt and end_dt.
From my example, the result set should look something like this
date count
------------------
1951-12-05 1
1951-12-06 1
...
1951-12-19 2
1951-12-20 2
1951-12-21 2
...
1998-08-19 1
What would be the best way to do this?
EDIT: To clarify, I need each date that appears at least once in a date range (between start_dt and end_dt) to get a row in my result set and I want the number of ranges that this date fits in next to it
hope this helps
When you need to turn 2 values (a range) into a series of rows you can use a number table (see Aaron Bertrand's The SQL Server Numbers Table article if you aren't familiar with the idea).
I've used shorter and simpler data but you should get the idea.
declare #dates table (id int not null, start_dt date not null, end_dt date not null)
insert #dates values (1, '20160601', '20160603'),
(2, '20160603', '20160605'),
(3, '20160610', '20160612')
;with cte as (
select
row_number() over (order by so1.object_id) - 1 as n
from
sys.objects so1
cross join sys.objects so2
)
select
dateadd(d, c.n, d.start_dt) as [date],
count(*)
from
#dates d
join cte c on dateadd(d, c.n, d.start_dt) <= d.end_dt
group by
dateadd(d, c.n, d.start_dt)
order by
dateadd(d, c.n, d.start_dt)
If there are no more than a few days (< 80 or so, depending in your sys.objects table) between start_dt and end_dt, you can use this approach (inspired on Rhys').
DECLARE #dates TABLE (id int not null, start_dt date not null, end_dt date not null)
INSERT #dates VALUES
(1, '1951-12-05', '1951-12-21'),
(2, '1951-12-19', '1951-12-31'),
(3, '1957-12-05', '1957-12-19'),
(4, '1995-12-06', '1995-12-20'),
(5, '1996-06-24', '1996-07-08'),
(6, '1997-05-12', '1997-05-26'),
(7, '1997-10-07', '1997-10-21'),
(8, '1997-12-25', '1998-01-08'),
(9, '1998-01-19', '1998-02-02'),
(10, '1998-08-05', '1998-08-19');
WITH RawData AS (
SELECT
DATEADD(d, n.n, d.start_dt) AS [date]
FROM #dates d
INNER JOIN (
SELECT ROW_NUMBER() OVER (ORDER BY object_id) - 1 AS n FROM sys.objects
) n ON DATEADD(d, n.n, d.start_dt) <= d.end_dt
)
SELECT [date], COUNT(*) [count]
FROM RawData
GROUP BY [date]
ORDER BY [date]
I don't think this could take long even with 1000 date ranges. Perhaps you are using a table with more fields and even missing some index?
You could use a CTE
WITH CTE AS(SELECT start_dt AS dates FROM Table
UNION ALL
SELECT end_dt AS dates FROM Table)
SELECT CAST(dates as DATE) as Date, COUNT(dates) AS Count
FROM CTE c
GROUP BY c.dates
order by Count desc
Or perhaps you need something broader if your columns are of DATETIME data type. This way will GROUP BY the whole day:
WITH CTE AS(SELECT CAST(start_dt AS DATE) AS dates FROM Table
UNION ALL
SELECT CAST(end_dt AS DATE) AS dates FROM Table)
SELECT Dates as Date, COUNT(Dates) AS Count
FROM CTE c
GROUP BY c.dates
order by Count desc

Select count with 0 count

Lets say I have following query:
SELECT top (5) CAST(Created AS DATE) as DateField,
Count(id) as Counted
FROM Table
GROUP BY CAST(Created AS DATE)
order by DateField desc
Lets say it will return following data set
DateField Counted
2016-01-18 34
2016-01-17 99
2016-01-14 1
2015-12-28 1
2015-12-27 6
But when I have Counted = 0 for certain Date I would like to get that in result set. So for example it should look like following
DateField Counted
2016-01-18 34
2016-01-17 99
2016-01-16 0
2016-01-15 0
2016-01-14 1
Thank you!
Expanding upon KM's answer, you need a date table which is like a numbers table.
There are many examples on the web but here's a simple one.
CREATE TABLE DateList (
DateValue DATE,
CONSTRAINT PK_DateList PRIMARY KEY CLUSTERED (DateValue)
)
GO
-- Insert dates from 01/01/2015 and 12/31/2015
DECLARE #StartDate DATE = '01/01/2015'
DECLARE #EndDatePlus1 DATE = '01/01/2016'
DECLARE #CurrentDate DATE = #StartDate
WHILE #EndDatePlus1 > #CurrentDate
BEGIN
INSERT INTO DateList VALUES (#CurrentDate)
SET #CurrentDate = DATEADD(dd,1,#CurrentDate)
END
Now you have a table
then you can rewrite your query as follows:
SELECT top (5) DateValue, isnull(Count(id),0) as Counted
FROM DateList
LEFT OUTER JOIN Table
on DateValue = CAST(Created AS DATE)
GROUP BY DateValue
order by DateValue desc
Two notes:
You'll need a where clause to specify your range.
A join on a cast isn't ideal. The type in your date table should match the type in your regular table.
One more solution as a single query:
;WITH dates AS
(
SELECT CAST(DATEADD(DAY, ROW_NUMBER() OVER (ORDER BY [object_id]) - 1, '2016-01-14') as date) 'date'
FROM sys.all_objects
)
SELECT TOP 5
[date] AS 'DateField',
SUM(CASE WHEN Created IS NULL THEN 0 ELSE 1 END) AS 'Counted'
FROM dates
LEFT JOIN Table ON [date]=CAST(Created as date)
GROUP BY [date]
ORDER BY [date]
For a more edgy solution, you could use a recursive common table expression to create the date list. PLEASE NOTE: do not use recursive common table expressions in your day job! They are dangerous because it is easy to create one that never terminates.
DECLARE #StartDate date = '1/1/2016';
DECLARE #EndDate date = '1/15/2016';
WITH DateList(DateValue)
AS
(
SELECT DATEADD(DAY, 1, #StartDate)
UNION ALL
SELECT DATEADD(DAY, 1, DateValue)
FROM DateList
WHERE DateList.DateValue < #EndDate
)
SELECT DateValue, isnull(Count(id),0) as Counted
FROM DateList
LEFT OUTER JOIN [Table]
ON DateValue = CAST(Created AS DATE)
GROUP BY DateValue
ORDER BY DateValue DESC

Resources