GROUP BY DAY, CUMULATIVE SUM - sql-server

I have a table in MSSQL with the following structure:
PersonId
StartDate
EndDate
I need to be able to show the number of distinct people in the table within a date range or at a given date.
As an example i need to show on a daily basis the totals per day, e.g. if we have 2 entries on the 1st June, 3 on the 2nd June and 1 on the 3rd June the system should show the following result:
1st June: 2
2nd June: 5
3rd June: 6
If however e.g. on of the entries on the 2nd June also has an end date that is 2nd June then the 3rd June result would show just 5.
Would someone be able to assist with this.
Thanks
UPDATE
This is what i have so far which seems to work. Is there a better solution though as my solution only gets me employed figures. I also need unemployed on another column - unemployed would mean either no entry in the table or date not between and no other entry as employed.
CREATE TABLE #Temp(CountTotal int NOT NULL, CountDate datetime NOT NULL);
DECLARE #StartDT DATETIME
SET #StartDT = '2015-01-01 00:00:00'
WHILE #StartDT < '2015-08-31 00:00:00'
BEGIN
INSERT INTO #Temp(CountTotal, CountDate)
SELECT COUNT(DISTINCT PERSON.Id) AS CountTotal, #StartDT AS CountDate FROM PERSON
INNER JOIN DATA_INPUT_CHANGE_LOG ON PERSON.DataInputTypeId = DATA_INPUT_CHANGE_LOG.DataInputTypeId AND PERSON.Id = DATA_INPUT_CHANGE_LOG.DataItemId
LEFT OUTER JOIN PERSON_EMPLOYMENT ON PERSON.Id = PERSON_EMPLOYMENT.PersonId
WHERE PERSON.Id > 0 AND DATA_INPUT_CHANGE_LOG.Hidden = '0' AND DATA_INPUT_CHANGE_LOG.Approved = '1'
AND ((PERSON_EMPLOYMENT.StartDate <= DATEADD(MONTH,1,#StartDT) AND PERSON_EMPLOYMENT.EndDate IS NULL)
OR (#StartDT BETWEEN PERSON_EMPLOYMENT.StartDate AND PERSON_EMPLOYMENT.EndDate) AND PERSON_EMPLOYMENT.EndDate IS NOT NULL)
SET #StartDT = DATEADD(MONTH,1,#StartDT)
END
select * from #Temp
drop TABLE #Temp

You can use the following query. The cte part is to generate a set of serial dates between the start date and end date.
DECLARE #ViewStartDate DATETIME
DECLARE #ViewEndDate DATETIME
SET #ViewStartDate = '2015-01-01 00:00:00.000';
SET #ViewEndDate = '2015-02-25 00:00:00.000';
;WITH Dates([Date])
AS
(
SELECT #ViewStartDate
UNION ALL
SELECT DATEADD(DAY, 1,Date)
FROM Dates
WHERE DATEADD(DAY, 1,Date) <= #ViewEndDate
)
SELECT [Date], COUNT(*)
FROM Dates
LEFT JOIN PersonData ON Dates.Date >= PersonData.StartDate
AND Dates.Date <= PersonData.EndDate
GROUP By [Date]
Replace the PersonData with your table name
If startdate and enddate columns can be null, then you need to add
addditional conditions to the join
It assumes one person has only one record in the same date range

You could do this by creating data where every start date is a +1 event and end date is -1 and then calculate a running total on top of that.
For example if your data is something like this
PersonId StartDate EndDate
1 20150101 20150201
2 20150102 20150115
3 20150101
You first create a data set that looks like this:
EventDate ChangeValue
20150101 +2
20150102 +1
20150115 -1
20150201 -1
And if you use running total, you'll get this:
EventDate Total
2015-01-01 2
2015-01-02 3
2015-01-15 2
2015-02-01 1
You can get it with something like this:
select
p.eventdate,
sum(p.changevalue) over (order by p.eventdate asc) as total
from
(
select startdate as eventdate, sum(1) as changevalue from personnel group by startdate
union all
select enddate, sum(-1) from personnel where enddate is not null group by enddate
) p
order by p.eventdate asc
Having window function with sum() requires SQL Server 2012. If you're using older version, you can check other options for running totals.
My example in SQL Fiddle
If you have dates that don't have any events and you need to show those too, then the best option is probably to create a separate table of dates for the whole range you'll ever need, for example 1.1.2000 - 31.12.2099.
-- Edit --
To get count for a specific day, it's possible use the same logic, but just sum everything up to that day:
declare #eventdate date
set #eventdate = '20150117'
select
sum(p.changevalue)
from
(
select startdate as eventdate, 1 as changevalue from personnel
where startdate <= #eventdate
union all
select enddate, -1 from personnel
where enddate < #eventdate
) p
Hopefully this is ok, can't test since SQL Fiddle seems to be unavailable.

Related

Is it possible to use the SQL DATEADD function but exclude dates from a table in the calculation?

Is it possible to use the DATEADD function but exclude dates from a table?
We already have a table with all dates we need to exclude. Basically, I need to add number of days to a date but exclude dates within a table.
Example: Add 5 days to 01/08/2021. Dates 03/08/2021 and 04/08/2021 exist in the exclusion table. So, resultant date should be: 08/08/2021.
Thank you
A bit of a "wonky" solution, but it works. Firstly we use a tally to create a Calendar table of dates, that exclude your dates in the table, then we get the nth row, where n is the number of days to add:
DECLARE #DaysToAdd int = 5,
#StartDate date = '20210801';
WITH N AS(
SELECT N
FROM (VALUES(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL))N(N)),
Tally AS(
SELECT 0 AS I
UNION ALL
SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS I
FROM N N1, N N2, N N3), --Up to 1,000
Calendar AS(
SELECT DATEADD(DAY,T.I, #StartDate) AS D,
ROW_NUMBER() OVER (ORDER BY T.I) AS I
FROM Tally T
WHERE NOT EXISTS (SELECT 1
FROM dbo.DatesTable DT
WHERE DT.YourDate = DATEADD(DAY,T.I, #StartDate)))
SELECT D
FROM Calendar
WHERE I = #DaysToAdd+1;
A best solution is probably a calendar table.
But if you're willing to traverse through every date, then a recursive CTE can work. It would require tracking the total iterations and another column to substract if any traversed date was in the table. The exit condition uses the total difference.
An example dataset would be:
CREATE TABLE mytable(mydate date); INSERT INTO mytable VALUES ('20210803'), ('20210804');
And an example function run in it's own batch:
ALTER FUNCTION dbo.fn_getDays (#mydate date, #daysadd int)
RETURNS date
AS
BEGIN
DECLARE #newdate date;
WITH CTE(num, diff, mydate) AS (
SELECT 0 AS [num]
,0 AS [diff]
,DATEADD(DAY, 0, #mydate) [mydate]
UNION ALL
SELECT num + 1 AS [num]
,CTE.diff +
CASE WHEN DATEADD(DAY, num+1, #mydate) IN (SELECT mydate FROM mytable)
THEN 0 ELSE 1 END
AS [diff]
,DATEADD(DAY, num+1, #mydate) [mydate]
FROM CTE
WHERE (CTE.diff +
CASE WHEN DATEADD(DAY, num+1, #mydate) IN (SELECT mydate FROM mytable)
THEN 0 ELSE 1 END) <= #daysadd
)
SELECT #newdate = (SELECT MAX(mydate) AS [mydate] FROM CTE);
RETURN #newdate;
END
Running the function:
SELECT dbo.fn_getDays('20210801', 5)
Produces output, which is the MAX(mydate) from the function:
----------
2021-08-08
For reference the MAX(mydate) is taken from this dataset:
n diff mydate
----------- ----------- ----------
0 0 2021-08-01
1 1 2021-08-02
2 1 2021-08-03
3 1 2021-08-04
4 2 2021-08-05
5 3 2021-08-06
6 4 2021-08-07
7 5 2021-08-08
You can use the IN clause.
To perform the test, I used a W3Schools Test DB
SELECT DATE_ADD(BirthDate, INTERVAL 10 DAY) FROM Employees WHERE FirstName NOT IN (Select FirstName FROM Employees WHERE FirstName LIKE 'N%')
This query shows all the birth dates + 10 days except for the only employee with name starting with N (Nancy)

SQL-Server Get data of each day that falls between two dates

I have a table with job schedules :
job_id [unique ID]
pref_start [date]
spec_duration [time in seconds]
I can calculate the end date from the preferred start and duration. The pref_start is not fixed, and can be changed at whim by the engineers.
I need to report activity in any given week, so if I have data similar to:
jid start end
J1 01/01/yyyy 15/02/yyyy
J2 07/01/yyyy 08/02/yyyy
J3 09/02/yyyy 21/03/yyyy
How would I query "tell me the job id's that occur on each day of the week 07/02/yyyy to 12/02/yyyy"
First find the matching intervals between your jobs and your filtering interval, then the amount of days for the filter interval and the overlapping intervals must match:
DECLARE #Jobs TABLE (
ID INT IDENTITY,
StartDate DATE,
EndDate DATE)
INSERT INTO #Jobs (
StartDate,
EndDate)
VALUES
('2019-01-01', '2019-02-15'),
('2019-01-07', '2019-02-08'),
('2019-02-09', '2019-03-21')
DECLARE #FilterStartDate DATE = '2019-02-07'
DECLARE #FilterEndDate DATE = '2019-02-12'
;WITH AtLeast1DayOverlappingJobs AS
(
SELECT
J.ID,
J.StartDate,
J.EndDate,
OverlappingStartDate = CASE
WHEN J.StartDate > #FilterStartDate THEN J.StartDate ELSE #FilterStartDate END, -- Highest of 2
OverlappingEndDate = CASE
WHEN J.EndDate < #FilterEndDate THEN J.EndDate ELSE #FilterEndDate END -- Lowest of 2
FROM
#Jobs AS J
WHERE
-- They share at least 1 day
#FilterStartDate <= J.EndDate AND #FilterEndDate >= J.StartDate
)
SELECT
T.*
FROM
AtLeast1DayOverlappingJobs AS T
WHERE
-- Amount of days must match between filter and overlapping periods
DATEDIFF(DAY, #FilterStartDate, #FilterEndDate) = DATEDIFF(DAY, T.OverlappingStartDate, T.OverlappingEndDate)
Results:
ID StartDate EndDate OverlappingStartDate OverlappingEndDate
1 2019-01-01 2019-02-15 2019-02-07 2019-02-12

SQL - Counting incidents at time period (done) but including another variable

I'm newish to SQL so sorry if the code is a little scruffy.
Basically I am creating a count of fire engines in use on every hour, which I have done, and that bit works. So I have a count of this for the past five years. Sorted.
But now I want to run it for a specific group of incidents (about 300 of them), showing how many engines were at that incident, every hour, and how many others were in use at the same time, but somewhere else.
My basic working code (that I modified from https://stackoverflow.com/a/43337534/5880512) is as follows. It just counts all P1 and P2 mobilisations at the defined time.
DECLARE #startdate datetime = '2018-05-03 00:00:00'
DECLARE #enddate datetime = '2018-05-05 00:00:00'
;with cte as
(
select #startdate startdate
union all
select DATEADD(minute, 60, startdate)
FROM cte
WHERE DATEADD(minute, 60, startdate) < #enddate
)
select convert(varchar(20), startdate, 120) as CreationTime, (select count(*) FROM MB_MOBILISATIONS WHERE MB_SEND < startdate and MB_LEAVE > startdate And (MB_CALL_SIGN Like '%P1' Or MB_CALL_SIGN Like '%P2')) as Count
from cte
option (maxrecursion 0)
To split these up for a particular incident, I can put the incident ref into the where clause, one as = so it will give me engines at that incident, and one as <> so it gives me the rest. This bit works too.
select convert(varchar(20), startdate, 120) as CreationTime, (select count(*) FROM MB_MOBILISATIONS WHERE MB_SEND < startdate and MB_LEAVE > startdate And (MB_CALL_SIGN Like '%P1' Or MB_CALL_SIGN Like '%P2') and MB_IN_REF = 1704009991) as 'At Incident'
, select convert(varchar(20), startdate, 120) as CreationTime, (select count(*) FROM MB_MOBILISATIONS WHERE MB_SEND < startdate and MB_LEAVE > startdate And (MB_CALL_SIGN Like '%P1' Or MB_CALL_SIGN Like '%P2') and MB_IN_REF <> 1704009991) as 'Other Incident'
The bit I can't work out to do, is to make this work for multiple incidents, without having to change the incident reference manually in the where clause for all 300.
The incident references I want to use will be stored in a temporary table. Ideally, I would like it to pick an ID, set the variables #startdate and #enddate, from the start and end of that incident, then do the hourly count for the duration of that incident.
Hopefully the results would look something like this
IncidentRef DateTime At Incident Other Incident
A 2018-05-03 1:00 4 2
A 2018-05-03 2:00 7 3
A 2018-05-03 3:00 5 3
A 2018-05-03 4:00 2 4
B 2017-03-01 9:00 7 2
B 2017-03-01 10:00 8 3
B 2017-03-01 11:00 6 1
B 2017-03-01 12:00 4 2
I hope that makes sense.
Thanks :)
Use something like this to limit the scope of your search to a smaller list. I've just added and referenced another CTE with a filter. If you're looking to parameterize the list you'll need a different approach like storing those id values in another table first.
with cte as (
select #startdate startdate
union all
select dateadd(minute, 60, startdate)
from cte
where dateadd(minute, 60, startdate) < #enddate
), mobi as (
select * from MB_MOBILISATIONS
where MB_IN_REF in (<insert list here>)
)
select convert(varchar(20), startdate, 120) as CreationTime, m."Count"
from cte cross apply (
select count(*) as "Count" from mobi
where MB_SEND < startdate and MB_LEAVE > startdate and
(MB_CALL_SIGN like '%P1' or MB_CALL_SIGN like '%P2')
) m;
I went ahead and rewrote your scalar subquery but I guess that's just a personal preference.

SQL query with function

I have a table of data which i am using a count statement to get the amount of records for the submission date
example
AuditId Date Crew Shift Cast ObservedBy 2ndObserver AuditType Product
16 2017-06-27 3 Day B1974, B1975 Glen Mason NULL Identification Billet
20 2017-06-29 1 Day 9879 Corey Lundy NULL Identification Billet
21 2017-06-29 4 Day T9627, T9625 Joshua Dwyer NULL ShippingPad Tee
22 2017-06-29 4 Day NULL Joshua Dwyer NULL Identification Billet
23 2017-06-29 4 Day S9874 Joshua Dwyer NULL ShippingPad Slab
24 2017-06-29 4 Day Bay 40 Joshua Dwyer NULL Identification Billet
Basically I am using the following code to get my results
SELECT YEAR([Date]) as YEAR, CAST([Date] as nvarchar(25)) AS [Date], COUNT(*) as "Audit Count"
FROM AuditResults
where AuditType = 'Identification' AND Product = 'Billet'
group by Date
this returns example
YEAR Date Audit Count
2017 2017-06-27 1
2017 2017-06-29 3
Now I want to be able to retrieve all dates even if blank
so I would like the return to be
YEAR Date Audit Count
2017 2017-06-27 1
2017 2017-06-28 0
2017 2017-06-29 3
I have the following function I am trying to use:
ALTER FUNCTION [dbo].[fnGetDatesInRange]
(
#FromDate datetime,
#ToDate datetime
)
RETURNS #DateList TABLE (Dt date)
AS
BEGIN
DECLARE #TotalDays int, #DaysCount int
SET #TotalDays = DATEDIFF(dd,#FromDate,#ToDate)
SET #DaysCount = 0
WHILE #TotalDays >= #DaysCount
BEGIN
INSERT INTO #DateList
SELECT (#ToDate - #DaysCount) AS DAT
SET #DaysCount = #DaysCount + 1
END
RETURN
END
How do I use my select statement with this function? or is there a better way?
cheers
Try this;
ALTER FUNCTION [dbo].[fnGetDatesInRange]
(
#FromDate datetime,
#ToDate datetime
)
RETURNS #YourData TABLE ([Year] int, DateText nvarchar(25),[Audit Count] int)
AS
begin
insert into #YourData
SELECT
YEAR(allDates.[Date]) as YEAR,
CAST(allDates.[Date] as nvarchar(25)) AS [Date],
COUNT(r.Product) as "Audit Count"
from
(
SELECT
[date]=convert(datetime, CONVERT(float,d.Seq))
FROM
(
select top 100000 row_number() over(partition by 1 order by A.name) as Seq
from syscolumns A, syscolumns B
)d
)allDates
left join
AuditResults r on r.[Date]=allDates.[date] and r.AuditType = 'Identification' AND r.Product = 'Billet'
where
allDates.[Date]>=#FromDate and allDates.[Date]<=#ToDate
group by
allDates.[Date]
return
end
The key is the 'allDates' section ;
SELECT
[date]=convert(datetime, CONVERT(float,d.Seq))
FROM
(
select top 100000 row_number() over(partition by 1 order by A.name) as Seq
from syscolumns A, syscolumns B
)d
This will return all dates between 1900 and 2173 (in this example). Limit that as you need but a nice option. A ton of different ways to approach this clearly
you have to create another table calendar as (Mysql)- idea is the same on all RDBMS-
CREATE TABLE `calendar` (
`dt` DATE NOT NULL,
UNIQUE INDEX `calendar_dt_unique` (`dt`)
)
COLLATE='utf8_general_ci'
ENGINE=InnoDB
;
and fill with date data.
more details

Checking a range of dates in SQL

Following on from a question I put on yesterday, I need to return a range of "available" dates for a laptop rollout "booking system". I want to populate a table of possible available dates a user can book a slot on by checking for each date the total possible number of slots, and subtracting the number of slots already booked.
The logic is as follows:
A technician can build 3 laptops per day.
On any day there may be 1, 2 or 3 technicians available.
A table holds the bookings made
I don't want a table of all possible dates, I want to calculate it on the fly
Relevant tables are:
tl_sb_slotBooking
This contains the bookings already made
tl_sb_availabilityPeriods
This is used to calculate the total number of available slots on a given day
I can bring back a list of dates with a fixed maximum number (in this case 3) of slots:
DECLARE #startDate DATE
DECLARE #endDate DATE
SET #startDate = GETDATE()
SET #endDate = DATEADD(m,3,#startDate)
;
WITH dates(Date) AS
(
SELECT #startdate as Date
UNION ALL
SELECT DATEADD(d,1,[Date])
FROM dates
WHERE DATE < #enddate
)
SELECT Date
FROM dates
EXCEPT
SELECT date
FROM tl_sb_booking
GROUP BY date
HAVING COUNT(date) >= 3
However, the maximum won't always be 3, it changes for each day.
I can find the maximum possible slots for a given day:
DECLARE #myDate DATETIME = '2013-06-22'
SELECT SUM(laptopsPerDay) AS totalSlots
FROM tl_sb_technicianAvailability
WHERE startDate <= #myDate AND endDate >= #myDate
AND availabiltyStateID=3
it will bring back 6 as the total number of slots available for 2013-06-22. (The availabilityStateID field is used to store available/unavailable etc.)
So, the bit I am stuck on is combining the two.
What I want is for each possible date, if the number of slots already booked is less than the number of possible slots for that day, add it to the table being returned (otherwise don't).
Firstly, althought you are only generating a small list, using a CTE to generate a sequential list performs terribly and is best avoided.
For the sake of this I will use the system table Master..spt_values for a sequential list of numbers, but if you are worried about using undocumented system tables then there are other methods in the link above.
The first thing I would do is split the technician availability dates into a row per day, this will allow for technicians who are available for only part of the peiod required (e.g. from your sceen shot of the table if you wanted to query from 18th June to 26th June none of the technicians show would appear as available using the query you have posted):
SELECT Date = DATEADD(DAY, spt.Number, ta.StartDate),
ta.TechnicianID,
ta.LapTopsPerDay
FROM tl_sb_technicianAvailability ta
INNER JOIN Master..spt_values spt
ON spt.Type = 'P'
AND spt.Number BETWEEN 0 AND DATEDIFF(DAY, ta.startDate, ta.EndDate)
This would simply turn:
TechnicianID StartDate EndDate LapTopsPerDay
1 20130620 20130624 3
into
Date TechnicianID LapTopsPerDay
20130620 1 3
20130621 1 3
20130622 1 3
20130623 1 3
20130624 1 3
You can then limit this list to the date range required, and sum up the total laptops than can be done as this is not needed on a technicial level:
WITH ExplodedAvailability AS
( SELECT Date = DATEADD(DAY, spt.Number, ta.StartDate),
ta.TechnicianID,
ta.LapTopsPerDay
FROM tl_sb_technicianAvailability ta
INNER JOIN Master..spt_values spt
ON spt.Type = 'P'
AND spt.Number BETWEEN 0 AND DATEDIFF(DAY, ta.startDate, ta.EndDate)
)
SELECT Date, TotalLaptops = SUM(LapTopsPerDay)
FROM ExplodedAvailability
WHERE Date >= #StartDate
AND Date < #EndDate
GROUP BY Date;
Finally you can LEFT JOIN to the bookings table to get the available slots per day
WITH ExplodedAvailability AS
( SELECT Date = DATEADD(DAY, spt.Number, ta.StartDate),
ta.TechnicianID,
ta.LapTopsPerDay
FROM tl_sb_technicianAvailability ta
INNER JOIN Master..spt_values spt
ON spt.Type = 'P'
AND spt.Number BETWEEN 0 AND DATEDIFF(DAY, ta.startDate, ta.EndDate)
), Availability AS
( SELECT Date, TotalLaptops = SUM(LapTopsPerDay)
FROM ExplodedAvailability
WHERE Date >= #StartDate
AND Date < #EndDate
GROUP BY Date
), Bookings AS
( SELECT Date, SlotsBooked = COUNT(*)
FROM tl_sb_booking
GROUP BY Date
)
SELECT Availability.Date,
Availability.TotalLaptops,
RemainingSlots = Availability.TotalLaptops - ISNULL(Bookings.SlotsBooked, 0)
FROM Availability
LEFT JOIN Bookings
ON Bookings.Date = Availability.Date;
I think what you are after is to add a booking to the next available day, so the query to do this would be:
DECLARE #UserID INT = 1;
WITH ExplodedAvailability AS
( SELECT Date = DATEADD(DAY, spt.Number, ta.StartDate),
ta.TechnicianID,
ta.LapTopsPerDay
FROM tl_sb_technicianAvailability ta
INNER JOIN Master..spt_values spt
ON spt.Type = 'P'
AND spt.Number BETWEEN 0 AND DATEDIFF(DAY, ta.startDate, ta.EndDate)
), Availability AS
( SELECT Date, TotalLaptops = SUM(LapTopsPerDay)
FROM ExplodedAvailability
WHERE Date >= CAST(GETDATE() AS DATE)
GROUP BY Date
), Bookings AS
( SELECT Date, SlotsBooked = COUNT(*)
FROM tl_sb_booking
GROUP BY Date
)
INSERT tl_sb_slotBooking (UserID, Date)
SELECT #UserID, MIN(Availability.Date)
FROM Availability
LEFT JOIN Bookings
ON Bookings.Date = Availability.Date
WHERE Availability.TotalLaptops > ISNULL(Bookings.SlotsBooked, 0)
Should this be of use to anyone, this is the way I ultimately did it:
DECLARE #startDate DATE
DECLARE #endDate DATE
SET #startDate = GETDATE()
SET #endDate = DATEADD(m,3,#startDate)
;
WITH dates(currentDate) AS
(
SELECT #startdate as currentDate
UNION ALL
SELECT DATEADD(d,1,[currentDate])
FROM dates
WHERE currentDate < #enddate
)
SELECT currentDate
FROM dates
WHERE /* slots booked for date */
(
SELECT count([date])
FROM tl_sb_booking
where [date] = currentDate
)
<
/* total slots available */
(
SELECT SUM(laptopsPerDay) AS totalSlots
FROM tl_sb_technicianAvailability
WHERE startDate <= currentDate AND endDate >= currentDate
AND availabiltyStateID=3
)

Resources