SQL counts with left join

SQL counts with left join - sql-server

On my Microsoft SQL Server 2016 database I'm trying to determine how many labs (Lab_Space table) have had an assessment (EHS_Assessment_Audit table) done within the last year, grouped by location (Locations table). It's common to have more than one assessment done per year per lab.
Everything I've tried so far results in more "done" counts than labs. For example:
WITH cte AS
(
SELECT DISTINCT
Lab_Space_Id
FROM
EHS_Assessment_Audit
WHERE
Audit_Date >= DATEADD(year, -1, GETDATE())
)
SELECT
l.Site_Name, l.Campus_Name,
COUNT(DISTINCT s.id) Total,
SUM(CASE WHEN a.Lab_Space_ID IS NOT NULL THEN 1 ELSE 0 END) Audited
FROM
Lab_Space s
LEFT OUTER JOIN
cte a ON s.id = a.Lab_Space_Id
JOIN
Locations l ON l.Building_Code = s.Building_Code
GROUP BY
l.Site_Name, l.Campus_Name
ORDER BY
l.Site_Name, l.Campus_Name
The cte there should get me a unique list of labs that have had an assessment done, and then I'm trying to count that grouped by location. I'm ending up with output though where it'll say there are 178 total and 1080 audited for a single site/campus combo.

I think using a CTE in this case is going to be more trouble than it's worth. A subquery is going to be easier to read and modify. For example:
SELECT
l.Site_Name,
l.Campus_Name,
COALESCE(b.NumAudits, 0) as NumTotalAudits,
COALESCE(b.NumLabs, 0) as AuditedLabs
FROM Locations l
LEFT JOIN (
SELECT s.Building_Code, COUNT(*) as NumAudits, COUNT(DISTINCT s.Lab_Space_Id) as NumLabs
FROM Lab_Space s
INNER JOIN EHS_Assessment_Audit a ON s.Lab_Space_Id = a.Lab_Space_Id
WHERE a.Audit_Date >= DATEADD(year, -1, GETDATE())
GROUP BY s.Building_Code
) as b ON l.Building_Code = b.Building_Code
With overly simplistic temp tables and example data:
CREATE TABLE #EHS_Assessment_Audit (Lab_Space_Id int, Audit_Date datetime)
CREATE TABLE #Lab_Space (Lab_Space_Id int, Building_Code int)
CREATE TABLE #Locations (Location_Id int, Building_Code int, Site_Name nvarchar(30), Campus_Name nvarchar(30))
INSERT INTO #Locations VALUES (1, 1, 'Location1', 'Campus1'), (2, 2, 'Location2', 'Campus2')
INSERT INTO #Lab_Space VALUES (1, 1), (2, 1), (3, 2), (4, 2)
INSERT INTO #EHS_Assessment_Audit VALUES (1, '2018-10-11'), (1, '2018-09-11'), (2, '2018-10-11'), (3, '2015-10-11')
SELECT * FROM #Locations
SELECT * FROM #Lab_Space
SELECT * FROM #EHS_Assessment_Audit
SELECT
l.Site_Name,
l.Campus_Name,
COALESCE(b.NumAudits, 0) as NumTotalAudits,
COALESCE(b.NumLabs, 0) as AuditedLabs
FROM #Locations l
LEFT JOIN (
SELECT s.Building_Code, COUNT(*) as NumAudits, COUNT(DISTINCT s.Lab_Space_Id) as NumLabs
FROM #Lab_Space s
INNER JOIN #EHS_Assessment_Audit a ON s.Lab_Space_Id = a.Lab_Space_Id
WHERE a.Audit_Date >= DATEADD(year, -1, GETDATE())
GROUP BY s.Building_Code
) as b ON l.Building_Code = b.Building_Code

Related

How to count number of report runs last 7 days? Year to date? All time?

I'm trying to create an SSRS report that looks similar to the table below:
Report
Earliest Run
Recent Run
Runs Last 7 days
Runs YTD
Runs All Time
Report 1
3/3/19 1:30
7/8/22 2:45
8
86
233
I know how to query the last 3 columns individually, but is it possible to get all 3 columns using 1 query? I have tried the query below to show my line of thinking but its not working as desired.
SELECT Report
,Min(TimeStart) AS EarliestRun
,Max(TimeStart) AS RecentRun
,CASE WHEN TimeStart BETWEEN GETDATE()-7 AND GETDATE() THEN COUNT(Report) END AS RunsLast7Days
FROM ReportHistory
WHERE TimeStart BETWEEN '1/1/2019 00:00' AND GETDATE()
GROUP BY Report

Yes - use conditional aggregation. Don't filter the query at all since you need an "all time" value. Instead, use sum with a conditional expression for the periods of interest.
select ...
sum(case when TimeStart >= dateadd(day, -7, getdate()) then 1 else 0 end) as [Runs Last 7 days],
sum(case when TimeStart >= datefromparts(year(getdate()), 1, 1) then 1 else 0 end) as [Runs YTD],
...
from dbo.ReportHistory
order by ...;

I was going to propose using CROSS APPLY but SMor has done it with less code
CREATE TABLE #Reports (
ReportId INT NOT NULL,
ReportName VARCHAR(20) NOT NULL
);
INSERT INTO #Reports(ReportId, ReportName)
VALUES(1, 'Report 1');
CREATE TABLE #ReportRun (
ReportId INT,
RunDateTime DATETIME2(2)
);
INSERT INTO #ReportRun(ReportId, RunDateTime)
VALUES
(1, '20220508 10:00:00'),
(1, '20220502 10:00:00'),
(1, '20220101 10:00:00'),
(1, '20210501 10:00:00'),
(1, '20210209 10:00:00'),
(1, '20200509 10:00:00'),
(1, '20190509 10:00:00');
GO
-- SELECT * FROM #Reports
-- SELECT * FROM #ReportRun
SELECT R.ReportName, B.RunLast7Days, C.RunYearToDate, D.RunAllTime
FROM #Reports AS R
CROSS APPLY (
SELECT TOP 1 RunDateTime
FROM #ReportRun
WHERE ReportId = R.ReportId
ORDER BY RunDateTime DESC
) AS ER
CROSS APPLY (
SELECT COUNT(*) AS RunLast7Days
FROM #ReportRun
WHERE ReportId = R.ReportId
AND RunDateTime >= DATEADD(day, -7, CONVERT(date, GETDATE())) -- best to set it to the start of the day
GROUP BY ReportId
) AS B
CROSS APPLY (
SELECT COUNT(*) AS RunYearToDate
FROM #ReportRun
WHERE ReportId = R.ReportId
AND RunDateTime >= DATEADD(yy, DATEDIFF(yy, 0, GETDATE()), 0)
GROUP BY ReportId
) AS C
CROSS APPLY (
SELECT COUNT(*) AS RunAllTime
FROM #ReportRun
WHERE ReportId = R.ReportId
GROUP BY ReportId
) AS D

CTE - LEFT OUTER JOIN Performance Problem

Using SQL Server 2017.
SQL FIDDLE: LINK
CREATE TABLE [TABLE_1]
(
PLAN_NR decimal(28,6) NULL,
START_DATE datetime NULL,
);
CREATE TABLE [TABLE_2]
(
PLAN_NR decimal(28,6) NULL,
PERIOD_NR decimal(28,6) NULL,
);
INSERT INTO TABLE_1 (PLAN_NR, START_DATE)
VALUES (1, '2020-05-01'), (2, '2020-08-05');
INSERT INTO TABLE_2 (PLAN_NR, PERIOD_NR)
VALUES (1, 1), (1, 2), (1, 5), (1, 6), (1, 5), (1, 6), (1, 17),
(2, 2), (2, 3), (2, 5), (2, 2), (2, 17), (2, 28);
CREATE VIEW ALL_PERIODS
AS
WITH rec_cte AS
(
SELECT
PLAN_NR, START_DATE,
1 period_nr, DATEADD(day, 7, START_DATE) next_date
FROM
TABLE_1
UNION ALL
SELECT
PLAN_NR, next_date,
period_nr + 1, DATEADD(day, 7, next_date)
FROM
rec_cte
WHERE
period_nr < 100
),
cte1 AS
(
SELECT
PLAN_NR, period_nr, START_DATE
FROM
rec_cte
UNION ALL
SELECT
PLAN_NR, period_nr, DATEADD(DAY, 1, EOMONTH(next_date, -1))
FROM
rec_cte
WHERE
MONTH(START_DATE) <> MONTH(next_date)
),
cte2 AS (
SELECT *, ROW_NUMBER() OVER (PARTITION BY PLAN_NR ORDER BY START_DATE) rn
FROM cte1
)
SELECT PLAN_NR, rn PERIOD_NR, START_DATE
FROM cte2
WHERE rn <= 100
Table_1 lists plans (PLAN_NR) and their start date (START_DATE).
Table_2 lists plan numbers (PLAN_NR) and periods (1 - X). Per plan number periods can appear several times but can also be missing.
A period lasts seven days, unless the period includes a change of month. Then the period is divided into a part before the end of the month and a part after the end of the month.
The view ALL_PERIODS lists 100 periods per plan according to this system.
My problem is the performance of the following select which I would like to use in a view:
SELECT
t2.PLAN_NR
, t2.PERIOD_NR
, a_p.START_DATE
from TABLE_2 as t2
left outer join ALL_PERIODS a_p on t2.PERIOD_NR = a_p.PERIOD_NR and t2.PLAN_NR = a_p.PLAN_NR
From about 4000 entries in TABLE_2 the select becomes incredibly slow.
The join itself does not yet slow down the query. Only with the additional select a_p.START_DATE everything becomes incredibly slow.
I read the view into a temporary table and did the join over that and got no performance issues. (2 seconds for the 4000 entries).
So I assume that the CTE used in the view is the reason for the slow performance.
Unfortunately I can't use temporary tables in views and I would hate to write the data to a normal table.
Is there a way in SQL Server to improve the CTE lag?

Instead of a recusive CTE, generate ALL_PERIODS with a CROSS join between the Plan table and a "number table" either persisted, or as a non-recursive CTE.
EG
WITH N As
(
select top 100 row_number() over (order by (select null)) i
from (values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10) ) v1(i),
(values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10) ) v2(i)
),
plan_period AS
(
SELECT
PLAN_NR, START_DATE,
N.i period_nr, DATEADD(day, 7*N.i, START_DATE) next_date
FROM TABLE_1 CROSS JOIN N
),

if you are able to modify the view I would recommend to do this :
add a table containing numbers starting from 0 to whatever you think you will need in database, you can use below command :
create table numbers ( id int)
go
;with cte (
select 0 num
union all
select num + 1
where num < 2000 -- change this
)
insert into number
from num from cte
change the first cte in the view to this :
WITH rec_cte AS
(
SELECT
PLAN_NR
, DATEADD(DAY, 7* id, START_DATE) START_DATE
, id +1 period_nr
, DATEADD(DAY, 7*( id+1), START_DATE) next_date
FROM
TABLE_1 t
CROSS apply intenum i
WHERE i.id <100
),...
Also consider using temp table instead of cte it might be helpful

Join for tally table. Need for each day each Cust_ID

Can you help to figure the way to produce that output table like on the pic below. This is part of membership/gap tricky processing I need. Could not figure out how to do this for EACH Cust_ID to have entry for each tally date.
Sample code: (* Last select need to be improved))
CREATE TABLE #test
(
Cust_ID VARCHAR(14),
Contr_ID INT,
ENR_START DATE,
ENR_END DATE
)
INSERT INTO #test
VALUES (1, 1, '2018-1-2', '2018-01-5'),
(1, 2, '2018-01-7', '2018-1-8'),
(2, 1, '2018-01-6', '2019-1-10') ----- select * from #test
SELECT TOP (DATEDIFF(DAY, #Period_Start, #Period_End + 1)) ----- create tally
DATEADD(dd, ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) - 1, #Period_Start) dt
INTO
#c -- select * from #c -- 10 days
FROM
master..spt_values
SELECT
t.*, c.dt
FROM
#c c
LEFT JOIN
#test t ON c.dt BETWEEN t.ENR_START AND t.ENR_END
ORDER BY
1, 5

I think this is the logic that you want:
select cu.cust_id, c.dt, t.*
from (select distinct cust_id from test) cu join
c
on c.dt between '2018-01-01' and '2018-01-10' left join
test t
on t.cust_id = cu.cust_id and c.dt between enr_start and enr_end
order by cu.cust_id, c.dt;
The idea is to generate all cust_id/date combinations and then left join to the original data to get any matches.
Here is a db<>fiddle.

Count the number of times a date is contained between 2 date columns

I have a table that looks like this
ID start_dt end_dt
--------------------------
1 1951-12-05 1951-12-21
2 1951-12-19 1951-12-31
3 1957-12-05 1957-12-19
4 1995-12-06 1995-12-20
5 1996-06-24 1996-07-08
6 1997-05-12 1997-05-26
7 1997-10-07 1997-10-21
8 1997-12-25 1998-01-08
9 1998-01-19 1998-02-02
10 1998-08-05 1998-08-19
I'd like to know how many times each individual date is contained between start_dt and end_dt.
From my example, the result set should look something like this
date count
------------------
1951-12-05 1
1951-12-06 1
...
1951-12-19 2
1951-12-20 2
1951-12-21 2
...
1998-08-19 1
What would be the best way to do this?
EDIT: To clarify, I need each date that appears at least once in a date range (between start_dt and end_dt) to get a row in my result set and I want the number of ranges that this date fits in next to it
hope this helps

When you need to turn 2 values (a range) into a series of rows you can use a number table (see Aaron Bertrand's The SQL Server Numbers Table article if you aren't familiar with the idea).
I've used shorter and simpler data but you should get the idea.
declare #dates table (id int not null, start_dt date not null, end_dt date not null)
insert #dates values (1, '20160601', '20160603'),
(2, '20160603', '20160605'),
(3, '20160610', '20160612')
;with cte as (
select
row_number() over (order by so1.object_id) - 1 as n
from
sys.objects so1
cross join sys.objects so2
)
select
dateadd(d, c.n, d.start_dt) as [date],
count(*)
from
#dates d
join cte c on dateadd(d, c.n, d.start_dt) <= d.end_dt
group by
dateadd(d, c.n, d.start_dt)
order by
dateadd(d, c.n, d.start_dt)

If there are no more than a few days (< 80 or so, depending in your sys.objects table) between start_dt and end_dt, you can use this approach (inspired on Rhys').
DECLARE #dates TABLE (id int not null, start_dt date not null, end_dt date not null)
INSERT #dates VALUES
(1, '1951-12-05', '1951-12-21'),
(2, '1951-12-19', '1951-12-31'),
(3, '1957-12-05', '1957-12-19'),
(4, '1995-12-06', '1995-12-20'),
(5, '1996-06-24', '1996-07-08'),
(6, '1997-05-12', '1997-05-26'),
(7, '1997-10-07', '1997-10-21'),
(8, '1997-12-25', '1998-01-08'),
(9, '1998-01-19', '1998-02-02'),
(10, '1998-08-05', '1998-08-19');
WITH RawData AS (
SELECT
DATEADD(d, n.n, d.start_dt) AS [date]
FROM #dates d
INNER JOIN (
SELECT ROW_NUMBER() OVER (ORDER BY object_id) - 1 AS n FROM sys.objects
) n ON DATEADD(d, n.n, d.start_dt) <= d.end_dt
)
SELECT [date], COUNT(*) [count]
FROM RawData
GROUP BY [date]
ORDER BY [date]
I don't think this could take long even with 1000 date ranges. Perhaps you are using a table with more fields and even missing some index?

You could use a CTE
WITH CTE AS(SELECT start_dt AS dates FROM Table
UNION ALL
SELECT end_dt AS dates FROM Table)
SELECT CAST(dates as DATE) as Date, COUNT(dates) AS Count
FROM CTE c
GROUP BY c.dates
order by Count desc
Or perhaps you need something broader if your columns are of DATETIME data type. This way will GROUP BY the whole day:
WITH CTE AS(SELECT CAST(start_dt AS DATE) AS dates FROM Table
UNION ALL
SELECT CAST(end_dt AS DATE) AS dates FROM Table)
SELECT Dates as Date, COUNT(Dates) AS Count
FROM CTE c
GROUP BY c.dates
order by Count desc

SQL - Filter on dates X number of days apart from the previous

I have a table containing orders. I would like to select those orders that are a certain number of days apart for a specific client. For example, in the table below I would like to select all of the orders for CustomerID = 10 that are at least 30 days apart from the previous instance. With the starting point to be the first occurrence (07/05/2014 in this data).
OrderID | CustomerID | OrderDate
==========================================
1 10 07/05/2014
2 10 07/15/2014
3 11 07/20/2014
4 11 08/20/2014
5 11 09/21/2014
6 10 09/23/2014
7 10 10/15/2014
8 10 10/30/2014
I would want to select OrderIDs (1,6,8) since they are 30 days apart from each other and all from CustomerID = 10. OrderIDs 2 and 7 would not be included as they are within 30 days of the previous order for that customer.
What confuses me is how to set the "checkpoint" to the last valid date. Here is a little "pseudo" SQL.
SELECT OrderID
FROM Orders
WHERE CusomerID = 10
AND OrderDate > LastValidOrderDate + 30

i came here and i saw #SveinFidjestøl already posted answer but i can't control my self after by long tried :
with the help of LAG and LEAD we can comparison between same column
and as per your Q you are looking 1,6,8. might be this is helpful
SQL SERVER 2012 and after
declare #temp table
(orderid int,
customerid int,
orderDate date
);
insert into #temp values (1, 10, '07/05/2014')
insert into #temp values (2, 10, '07/15/2014')
insert into #temp values (3, 11, '07/20/2014')
insert into #temp values (4, 11, '08/20/2014')
insert into #temp values (5, 11, '09/21/2014')
insert into #temp values (6, 10, '09/23/2014')
insert into #temp values (7, 10, '10/15/2014')
insert into #temp values (8, 10, '10/30/2014');
with cte as
(SELECT orderid,customerid,orderDate,
LAG(orderDate) OVER (ORDER BY orderid ) PreviousValue,
LEAD(orderDate) OVER (ORDER BY orderid) NextValue,
rownum = ROW_NUMBER() OVER (ORDER BY orderid)
FROM #temp
WHERE customerid = 10)
select orderid,customerid,orderDate from cte
where DATEDIFF ( day , PreviousValue , orderDate) > 30
or PreviousValue is null or NextValue is null
SQL SERVER 2005 and after
WITH CTE AS (
SELECT
rownum = ROW_NUMBER() OVER (ORDER BY p.orderid),
p.orderid,
p.customerid,
p.orderDate
FROM #temp p
where p.customerid = 10)
SELECT CTE.orderid,CTE.customerid,CTE.orderDate,
prev.orderDate PreviousValue,
nex.orderDate NextValue
FROM CTE
LEFT JOIN CTE prev ON prev.rownum = CTE.rownum - 1
LEFT JOIN CTE nex ON nex.rownum = CTE.rownum + 1
where CTE.customerid = 10
and
DATEDIFF ( day , prev.orderDate , CTE.orderDate) > 30
or prev.orderDate is null or nex.orderDate is null
GO

You can use the LAG() function, available in SQL Server 2012, together with a Common Table Expression. You calculate the days between the customer's current order and the customer's previous order and then query the Common Table Expression using the filter >= 30
with cte as
(select OrderId
,CustomerId
,datediff(d
,lag(orderdate) over (partition by CustomerId order by OrderDate)
,OrderDate) DaysSinceLastOrder
from Orders)
select OrderId, CustomerId, DaysSinceLastOrder
from cte
where DaysSinceLastOrder >= 30 or DaysSinceLastOrder is null
Results:
OrderId CustomerId DaysSinceLastOrder
1 10 NULL
6 10 70
3 11 NULL
4 11 31
5 11 32
(Note that 1970-01-01 is chosen arbitrarily, you may choose any date)

Update
A slighty more reliable way of doing it will involve a temporary table. But the original table tbl can be left unchanged. See here:
CREATE TABLE #tmp (id int); -- set-up temp table
INSERT INTO #tmp VALUES (1); -- plant "seed": first oid
WHILE (##ROWCOUNT>0)
INSERT INTO #tmp (id)
SELECT TOP 1 OrderId FROM tbl
WHERE OrderId>0 AND CustomerId=10
AND OrderDate>(SELECT max(OrderDate)+30 FROM tbl INNER JOIN #tmp ON id=OrderId)
ORDER BY OrderDate;
-- now list all found entries of tbl:
SELECT * FROM tbl WHERE EXISTS (SELECT 1 FROM #tmp WHERE id=OrderId)

#tinka shows how to use CTEs to do the trick, and the new windowed functions (for 2012 and later) are probably the best answer. There is also the option, assuming you do not have a very large data set, to use a recursive CTE.
Example:
declare #customerid int = 10;
declare #temp table
(orderid int,
customerid int,
orderDate date
);
insert into #temp values (1, 10, '07/05/2014')
insert into #temp values (2, 10, '07/15/2014')
insert into #temp values (3, 11, '07/20/2014')
insert into #temp values (4, 11, '08/20/2014')
insert into #temp values (5, 11, '09/21/2014')
insert into #temp values (6, 10, '09/23/2014')
insert into #temp values (7, 10, '10/15/2014')
insert into #temp values (8, 10, '10/30/2014');
with datefilter AS
(
SELECT row_number() OVER(PARTITION BY CustomerId ORDER BY OrderDate) as RowId,
OrderId,
CustomerId,
OrderDate,
DATEADD(day, 30, OrderDate) as FilterDate
from #temp
WHERE CustomerId = #customerid
)
, firstdate as
(
SELECT RowId, OrderId, CustomerId, OrderDate, FilterDate
FROM datefilter
WHERE rowId = 1
union all
SELECT datefilter.RowId, datefilter.OrderId, datefilter.CustomerId,
datefilter.OrderDate, datefilter.FilterDate
FROM datefilter
join firstdate
on datefilter.CustomerId = firstdate.CustomerId
and datefilter.OrderDate > firstdate.FilterDate
WHERE NOT EXISTS
(
SELECT 1 FROM datefilter betweens
WHERE betweens.CustomerId = firstdate.CustomerId
AND betweens.orderdate > firstdate.FilterDate
AND datefilter.orderdate > betweens.orderdate
)
)
SELECT * FROM firstdate

Develop Reference

c reactjs sql-server angularjs arrays wpf database batch-file google-app-engine silverlight

SQL counts with left join - sql-server

Related

How to count number of report runs last 7 days? Year to date? All time?

CTE - LEFT OUTER JOIN Performance Problem

Join for tally table. Need for each day each Cust_ID

Count the number of times a date is contained between 2 date columns

SQL - Filter on dates X number of days apart from the previous

Categories

Resources