SUM with CASE counts duplicate rows in SQL GROUP BY - sql-server

I'm trying to do a SUM against all items which match a certain condition, like so:
SELECT l.Building_Name,
SUM(CASE WHEN s.Date >= '20180930' THEN 1 ELSE 0 END) Validated,
COUNT(DISTINCT s.id) Total
FROM Lab_Space s
JOIN Locations l ON s.Building_Code = l.Building_Code
GROUP BY l.Building_Name
The COUNT there is correct, and will say something like 20 because I can put the DISTINCT s.id in there. However, my SUM ends up with something like 1500. This is because when I do the JOIN rows are duplicated multiple times, and thus the SUM is counting against each one.
How can I do a SUM/CASE like this but make sure it only applies to distinct rows?
s.id l.building_name s.date
1 JF 2018-11-10
1 JF 2018-11-10
2 JF 2018-12-12
So if I have data like that, I'm going to get my count properly of 2, but validate will say 3 because the id of 1 appears twice due to doing a JOIN

You can edit this code of temp table if you deem fit.
create table #temp_Lab_Space
([Date] date null
,Building_Code int null
)
create table #temp_Locations
( Building_Code int null
,Building_Name varchar(10) null
)
insert into #temp_Lab_Space values
('2018-11-10',1)
,('2018-11-10', 1)
,('2018-12-12' , 1)
insert into #temp_Locations values
(1, 'JF')
select Building_Name,
SUM(CASE WHEN Date >= '20180930' THEN 1 ELSE 0 END) Validated,
COUNT(DISTINCT Building_Code) Total
from (
select distinct l.Building_Name, s.Building_Code, s.Date
,Rank_1 = rank() over(partition by l.Building_Name order by s.Date asc)
FROM #temp_Lab_Space s
JOIN #temp_Locations l ON s.Building_Code = l.Building_Code
) a
group by Building_Name

wild guess
select l.Building_Name
, count(s.Id)
, sum(s.Validated)
from Locations l
cross apply ( select s.Id
, max(case
when s.Date >= '20180930' then 1
else 0
end) as Validated
from Lab_Space s
where s.Building_Code = l.Building_Code
group by s.Id) s
group by l.Building_Name
should give you the distinct space.id and a flag whether it is validated.

Related

SQL Server Overall Total in a group by

In my SQL Server Query, I am trying to count the number of employees per site. This works, but when I try to add in a percentage of total, it still groups by Site so it is inaccurate.
Is there an easier way to achieve this?
I am using this Query to create a view.
select Site.SiteName,
sum(case when Employee.ActiveStatus = 'Yes' then 1 else 0 end) as
"NumberOfEmployees",
CONVERT(decimal(6,2),(sum(case when Employee.ActiveStatus = 'Yes' then 1
else 0 end))/(convert(decimal(6,2),COUNT(EmployeeID)))) as PercentageOfEmps
from Employee
left join Site
on(Employee.SiteID=Site.SiteID)
GROUP BY Site.SiteName;
GO
You could use subquery:
select
Site.SiteName,
NumberOfEmployees = sum(case when Employee.ActiveStatus = 'Yes' then 1 else 0 end),
PercentageOfEmps = CONVERT(decimal(6,2),(sum(case when Employee.ActiveStatus = 'Yes' then 1
else 0 end))/(SELECT COUNT(EmployeeID) FROM Employee)
from Employee
left join Site
on Employee.SiteID=Site.SiteID
GROUP BY Site.SiteName;
I can't provide an answer for your scenario, as I don't have any sample data to use, therefore I've provided a small dataset.
One method is to use a CTE/Subquery to get a total number and then include the total in the GROUP BY. This method avoids 2 scans of the table:
WITH VTE AS(
SELECT *
FROM (VALUES(1,'Steve',1),
(2,'Jayne',1),
(3,'Greg',2),
(4,'Sarah',3)) V(EmpID, EmpName, SiteID)),
CTE AS(
SELECT V.EmpID,
V.EmpName,
V.SiteID,
COUNT(V.EmpID) OVER () AS TotalCount
FROM VTE V)
SELECT C.SiteID,
COUNT(C.EmpID) AS Employees,
COUNT(C.EmpID) / (C.TotalCount *1.0) AS Perc
FROM CTE C
GROUP BY C.SiteID,
C.TotalCount;
This script should help-
SELECT
Site.SiteName,
COUNT(EmployeeID) AS [NumberOfEmployees],
((COUNT(EmployeeID)*1.0)/(SELECT COUNT(*) FROM Employee WHERE ActiveStatus = 'Yes'))*100.00 as PercentageOfEmps
FROM Employee
INNER JOIN Site
ON Employee.SiteID = Site.SiteID
WHERE Employee.ActiveStatus = 'Yes'
GROUP BY Site.SiteName;
Data creation script
declare #Employee Table(EmployeeID int ,ActiveStatus nvarchar(20) ,SiteID int)
declare #Site Table(SiteName nvarchar(20) ,SiteID int)
insert into #Employee values(1,'Yes',101),(2,'Yes',101),(3,'Yes',102),(4,'Yes',102),
(5,'Yes',101)
insert into #Site values('Site1',101)
insert into #Site values('Site2',102)
//real script to get the %percentage
;with cte as
(
select s.SiteName,sum(case when e.ActiveStatus = 'Yes' then 1 else 0 end) as "NumberOfEmployees"
from #Employee e
left join #Site s
on(e.SiteID=s.SiteID)
GROUP BY s.SiteName
),
cte_sum as
(select sum(NumberOfEmployees) as total from cte )
select c.*, convert (decimal(6,2),c.NumberOfEmployees)/convert (decimal(6,2),cs.total)*100 from cte_sum cs, cte c;

How to write this SQL Server query: Add values in unique rows?

I have a query like below. The relation between table are:
Each truck may have multiple drivers. Table List connects the each row in table Truck with rows in table Driver. Now I want to get the count of unique Trucks under certain condition, and the total size of the unique Trucks under that condition.
Here is what I have:
SELECT t.Year AS [Year]
, t.Month AS [Month]
, t.Day AS [Day]
-- Count will not count NULL
, COUNT( DISTINCT (CASE WHEN (t.Sent = 1 AND r.Internal=1) THEN L.TruckId
ELSE NULL
END) ) AS [Count]
, SUM(CASE WHEN (t.Sent = 1 AND r.Internal = 1) THEN t.Size
END) AS [Size]
FROM Truck t
INNER JOIN List L ON t.Id = L.TruckId
INNER JOIN Driver r ON L.DriverId = r.Id
GROUP BY t.Year, t.Month, t.Day
the COUNT is correct, but the SUM is not.
My question is how to get this SUM? And I do not want to write 2 queries and join them.
Thanks
You can try query like below:
; with cte as (
SELECT
DISTINCT
t.Year AS [Year]
, t.Month AS [Month]
, t.Day AS [Day]
, L.TruckId,
, t.Size
FROM Truck t
INNER JOIN List L ON t.Id = L.TruckId
INNER JOIN Driver r ON L.DriverId = r.Id
WHERE t.Sent = 1 AND r.Internal=1
)
select
Year
, Month
, Day
, count(TruckId) AS [Count]
, sum(Size) AS [Size]
from cte
group by Year, Month, Day

how to add left join with CTE , check my query

i have following query i want add my query left join with CTE how do this please help me because i have driver id i want second last driver id but i want add left join with CTE
select d.Id,d.DriverNo,d.DriverName,TransId=dc.Id,dc.FromDate,dc.ToDate,dc.IsPaid,
Active=(case when (dc.weekoff is null or dc.weekoff=0) then 'Active' else 'Off' end),
Rent=(case when (IsNull(dc.CommissionTotal,0))> IsNull(dc.AccJobsTotal,0) then IsNull(dc.CommissionTotal,0)-(IsNull(dc.AccJobsTotal,0)) else 0 end),
BalanceDue=IsNull(dc.OldBalance,0),
AgentCommission=IsNull(dc.AgentFeesTotal,0),
PDA= (case when (dc.weekoff is null or dc.weekoff=0) then (IsNull(dc.PDARent,0)+IsNull(dc.CollectionDeliveryCharges,0)) else 0 end),
Total=(case when (IsNull(dc.CommissionTotal,0))> IsNull(dc.AccJobsTotal,0) then IsNull(dc.CommissionTotal,0)-(IsNull(dc.AccJobsTotal,0)) else 0 end)
+((IsNull(dc.OldBalance,0))
+((IsNull(dc.AgentFeesTotal,0)))
+(case when (dc.weekoff is null or dc.weekoff=0) then (IsNull(dc.PDARent,0)+IsNull(dc.CollectionDeliveryCharges,0)) else 0 end))
from Fleet_Driver d
inner join Fleet_DriverCommision dc
on d.Id=dc.DriverId
where dc.Id in (select Max(Id) from Fleet_DriverCommision
group by DriverId) as T1
left join on
> LEFT JOIN WITH CTE
With cte as
(select AgentFeesTotal,DriverId,Row_Number()over(Partition by DriverID order by Transdate desc) as Rn,
count(1)over(Partition by DriverID) as cnt from Fleet_DriverCommision)
Select AgentFeesTotal,DriverId
from cte
Where (Rn = 2 and cnt > 1) or (Rn = 1 and cnt = 1)
This is example
with cte
as
(select AgentFeesTotal,DriverId,Row_Number()over(Partition by DriverID order by Transdate desc) as Rn,
count(1)over(Partition by DriverID) as cnt from Fleet_DriverCommision)
Select AgentFeesTotal,DriverId
from cte
Where (Rn = 2 and cnt > 1) or (Rn = 1 and cnt = 1)
select t2.DriverNo from Fleet_Driver t2
left join
cte c
on c.DriverId=t2.Id
It looks like you are struggling with the syntax for using CTEs. The CTE declaration needs to happen before the rest of the query and then behaves like another table. Also note that the WITH statement must be the first statement or follow a semi-colon. This should get you on the right track. Also be sure to check the examples in the MSDN documentation.
--With statement first - must follow ; if there are multiple statements...
With cte as
(select AgentFeesTotal,DriverId,
Row_Number()over(Partition by DriverID order by Transdate desc) as Rn,
count(1)over(Partition by DriverID) as cnt
from Fleet_DriverCommision
)
-- ...then select statement...
select d.Id,d.DriverNo,d.DriverName,TransId=dc.Id,
dc.FromDate,dc.ToDate,dc.IsPaid,
Active=(case when (dc.weekoff is null or dc.weekoff=0) then 'Active' else 'Off' end),
Rent=(case when (IsNull(dc.CommissionTotal,0))> IsNull(dc.AccJobsTotal,0) then IsNull(dc.CommissionTotal,0)-(IsNull(dc.AccJobsTotal,0)) else 0 end),
BalanceDue=IsNull(dc.OldBalance,0),
AgentCommission=IsNull(dc.AgentFeesTotal,0),
PDA= (case when (dc.weekoff is null or dc.weekoff=0) then (IsNull(dc.PDARent,0)+IsNull(dc.CollectionDeliveryCharges,0)) else 0 end),
Total=(case when (IsNull(dc.CommissionTotal,0))> IsNull(dc.AccJobsTotal,0) then IsNull(dc.CommissionTotal,0)-(IsNull(dc.AccJobsTotal,0)) else 0 end)
+((IsNull(dc.OldBalance,0))
+((IsNull(dc.AgentFeesTotal,0)))
+(case when (dc.weekoff is null or dc.weekoff=0) then (IsNull(dc.PDARent,0)+IsNull(dc.CollectionDeliveryCharges,0)) else 0 end))
from Fleet_Driver d
inner join Fleet_DriverCommision dc
on d.Id=dc.DriverId
--...join in cte as a normal table
left join cte
on --join criteria here
where dc.Id in (select Max(Id) from Fleet_DriverCommision
group by DriverId) as T1
--move the remainder of the logic into your query
Select AgentFeesTotal,DriverId
from cte
Where (Rn = 2 and cnt > 1) or (Rn = 1 and cnt = 1)

Add a WHERE clause in a complex SQL query

I want to pass a ShowRoomId value to the query below. The Employees table has a ShowRoomId column.
How can I do it?
My SQL query is as following:
SELECT *
FROM Employees A
OUTER APPLY (SELECT TOP 1 *
FROM EmployeeBasics B
WHERE (A.EmployeeID = B.EmployeeID)
ORDER BY B.BasicUpdateDate DESC) AS B
OUTER APPLY (
SELECT C.EmployeeId , count(*) AS TotalAbsent
FROM EmployeeAbsents C
WHERE C.AbsentDate BETWEEN '2016-05-01' AND '2016-05-30' AND A.EmployeeID = C.EmployeeID
GROUP BY C.EmployeeId
) AS C
OUTER APPLY (
SELECT EmployeeId,
SUM(CASE WHEN TransctionTypeId = 1 THEN Amount ELSE 0 END) AS Payment,
SUM(CASE WHEN TransctionTypeId = 2 THEN Amount ELSE 0 END) AS RecoverSalary,
SUM(CASE WHEN TransctionTypeId = 3 THEN Amount ELSE 0 END) AS RecoverCash
FROM dbo.EmployeeAdvances D
WHERE A.EmployeeID = D.EmployeeID
GROUP BY EmployeeId
) AS D
Simply use a WHERE clause at the end as following:
... YOUR SELECT ...
WHERE Col = ...YourCondition...
OR
Use WITH keyword to keep your current SELECT-statement in a cte. Then do your query on it.
WITH cte AS
(
... YOUR SELECT ...
)
SELECT *
FROM cte
WHERE Col = ...YourCondition...
OR
You can add your SELECT-statement in to parentheses and name it with an allias name. So you can do query on it too.
SELECT *
FROM
(
... YOUR SELECT ...
) t
WHERE t.Col = ...YourCondition...
As per Giorgi Nakeuri's advice, I added the WHERE clause at the end of the statement.
And it works for me. Revised code is here:
SELECT *
FROM Employees A
OUTER APPLY (SELECT TOP 1 *
FROM EmployeeBasics B
WHERE (A.EmployeeID = B.EmployeeID)
ORDER BY B.BasicUpdateDate DESC) AS B
OUTER APPLY (
SELECT C.EmployeeId , count(*) AS TotalAbsent
FROM EmployeeAbsents C
WHERE C.AbsentDate BETWEEN '2016-05-01' AND '2016-05-30' AND A.EmployeeID = C.EmployeeID
GROUP BY C.EmployeeId
) AS C
OUTER APPLY (
SELECT EmployeeId,
SUM(CASE WHEN TransctionTypeId = 1 THEN Amount ELSE 0 END) AS Payment,
SUM(CASE WHEN TransctionTypeId = 2 THEN Amount ELSE 0 END) AS RecoverSalary,
SUM(CASE WHEN TransctionTypeId = 3 THEN Amount ELSE 0 END) AS RecoverCash
FROM dbo.EmployeeAdvances D
WHERE A.EmployeeID = D.EmployeeID
GROUP BY EmployeeId
) AS D
WHERE A.ShowRoomId = 2

Sum of missing data

The below query displays sites against the total orders within last week.
But if there is no order for a given site in last week, i should still see the site with a sum of zero.
At the moment its only giving me four sites, thats because no order has been made in the last week for those sites.
select SITE
,SUM(Case When OrderDate >= dateadd(dd,(datediff(dd,-53690,getdate()-1)/7)*7,-53690)
Then 1
Else 0
End) as COMPLETED
from
(
SELECT DISTINCT ORDERS.SITE, ORDERS.ORDERDATE FROM ORDERS
INNER JOIN PHONEDATA AS P
ON ORDERS.RECID = P.OrderID
where SITE IN ('SITE1','SITE2','SITE3','SITE4','SITE5','SITE6','SITE7')
) X
GROUP BY SITE
order by SITE
RESULT:
Site---------------------Completed
SITE1-----------------------2
SITE2-----------------------2
SITE3-----------------------2
SITE4-----------------------2
EXPECTED RESULT:
Site---------------------Completed
SITE1-----------------------2
SITE2-----------------------2
SITE3-----------------------2
SITE4-----------------------2
SITE5-----------------------0
SITE6-----------------------0
SITE7-----------------------0
updated:
select SITE
,SUM(Case When OrderDate >= dateadd(dd,(datediff(dd,-53690,getdate()-1)/7)*7,-53690)
Then 1
Else 0
End) as COMPLETED
from
(
SELECT DISTINCT ORDERS.SITE, ORDERS.ORDERDATE FROM ORDERS
where SITE IN ('SITE1','SITE2','SITE3','SITE4','SITE5','SITE6','SITE7')
) X
GROUP BY SITE
order by SITE
I have now removed the inner join with phone data table, so i am now getting the missing sites. but the reason i avoided this approach is because if i only rely on the orders table the orderdate time field is inserted few times for a given order, and the final order makes it to the phonedata table, so now i get more values in completed count but it should only consider the latest value for each day for each site
result of update :
Site---------------------Completed
SITE1-----------------------5
SITE2-----------------------5
SITE3-----------------------5
SITE4-----------------------5
SITE5-----------------------0
SITE6-----------------------0
SITE7-----------------------0
expected
Site---------------------Completed
SITE1-----------------------2
SITE2-----------------------2
SITE3-----------------------2
SITE4-----------------------2
SITE5-----------------------0
SITE6-----------------------0
SITE7-----------------------0
If there are no rows in the table with the sites that have no orders, how can it return any rows to count? Perhaps you have a table with all the possible sites that can be joined to? Or create a temp table with the site values. You could then left join the orders table to this. i.e.
create table #sites (site varchar(25));
insert into #sites values ('SITE1','SITE2','SITE3','SITE4','SITE5','SITE6','SITE7');
...
from
(
SELECT DISTINCT ORDERS.SITE, ORDERS.ORDERDATE FROM
#sites s left join ORDERS on orders.site = s.site
INNER JOIN PHONEDATA AS P
ON ORDERS.RECID = P.OrderID
) X
...
Try using a left join instead of the inner join. It is probably not getting rows from the phone data table:
select SITE
,SUM(Case When OrderDate >= dateadd(dd,(datediff(dd,-53690,getdate()-1)/7)*7,-53690)
Then 1
Else 0
End) as COMPLETED
from
(
SELECT DISTINCT ORDERS.SITE, ORDERS.ORDERDATE FROM ORDERS
Left JOIN PHONEDATA AS P
ON ORDERS.RECID = P.OrderID
where SITE IN ('SITE1','SITE2','SITE3','SITE4','SITE5','SITE6','SITE7')
) X
GROUP BY SITE
order by SITE
It'd be best to start with a "Site" table and then left join to your results. This example mimics the behavior, and can be used as a hack-workaround.
DECLARE #table TABLE
(
site VARCHAR(10) ,
Completed TINYINT
)
INSERT INTO #table
( site, Completed )
VALUES ( 'SITE1', 0 ),
( 'SITE2', 0 ),
( 'SITE3', 0 ),
( 'SITE4', 0 ),
( 'SITE5', 0 ),
( 'SITE6', 0 ),
( 'SITE7', 0 )
WITH cte
AS ( SELECT SITE ,
SUM(CASE WHEN OrderDate >= DATEADD(dd,( DATEDIFF(dd, -53690, GETDATE() - 1) / 7 ) * 7, -53690)
THEN 1
ELSE 0
END) AS COMPLETED
FROM ( SELECT DISTINCT
ORDERS.SITE ,
ORDERS.ORDERDATE
FROM ORDERS
INNER JOIN PHONEDATA AS P ON ORDERS.RECID = P.OrderID
WHERE SITE IN ( 'SITE1', 'SITE2', 'SITE3',
'SITE4', 'SITE5', 'SITE6',
'SITE7' )
)
GROUP BY SITE
)
SELECT t.site ,
t.completed + cte.COMPLETED
FROM #table t
LEFT OUTER JOIN cte ON t.site = cte.Site
ORDER BY t.site

Resources