SQL Server Overall Total in a group by - sql-server

In my SQL Server Query, I am trying to count the number of employees per site. This works, but when I try to add in a percentage of total, it still groups by Site so it is inaccurate.
Is there an easier way to achieve this?
I am using this Query to create a view.
select Site.SiteName,
sum(case when Employee.ActiveStatus = 'Yes' then 1 else 0 end) as
"NumberOfEmployees",
CONVERT(decimal(6,2),(sum(case when Employee.ActiveStatus = 'Yes' then 1
else 0 end))/(convert(decimal(6,2),COUNT(EmployeeID)))) as PercentageOfEmps
from Employee
left join Site
on(Employee.SiteID=Site.SiteID)
GROUP BY Site.SiteName;
GO

You could use subquery:
select
Site.SiteName,
NumberOfEmployees = sum(case when Employee.ActiveStatus = 'Yes' then 1 else 0 end),
PercentageOfEmps = CONVERT(decimal(6,2),(sum(case when Employee.ActiveStatus = 'Yes' then 1
else 0 end))/(SELECT COUNT(EmployeeID) FROM Employee)
from Employee
left join Site
on Employee.SiteID=Site.SiteID
GROUP BY Site.SiteName;

I can't provide an answer for your scenario, as I don't have any sample data to use, therefore I've provided a small dataset.
One method is to use a CTE/Subquery to get a total number and then include the total in the GROUP BY. This method avoids 2 scans of the table:
WITH VTE AS(
SELECT *
FROM (VALUES(1,'Steve',1),
(2,'Jayne',1),
(3,'Greg',2),
(4,'Sarah',3)) V(EmpID, EmpName, SiteID)),
CTE AS(
SELECT V.EmpID,
V.EmpName,
V.SiteID,
COUNT(V.EmpID) OVER () AS TotalCount
FROM VTE V)
SELECT C.SiteID,
COUNT(C.EmpID) AS Employees,
COUNT(C.EmpID) / (C.TotalCount *1.0) AS Perc
FROM CTE C
GROUP BY C.SiteID,
C.TotalCount;

This script should help-
SELECT
Site.SiteName,
COUNT(EmployeeID) AS [NumberOfEmployees],
((COUNT(EmployeeID)*1.0)/(SELECT COUNT(*) FROM Employee WHERE ActiveStatus = 'Yes'))*100.00 as PercentageOfEmps
FROM Employee
INNER JOIN Site
ON Employee.SiteID = Site.SiteID
WHERE Employee.ActiveStatus = 'Yes'
GROUP BY Site.SiteName;

Data creation script
declare #Employee Table(EmployeeID int ,ActiveStatus nvarchar(20) ,SiteID int)
declare #Site Table(SiteName nvarchar(20) ,SiteID int)
insert into #Employee values(1,'Yes',101),(2,'Yes',101),(3,'Yes',102),(4,'Yes',102),
(5,'Yes',101)
insert into #Site values('Site1',101)
insert into #Site values('Site2',102)
//real script to get the %percentage
;with cte as
(
select s.SiteName,sum(case when e.ActiveStatus = 'Yes' then 1 else 0 end) as "NumberOfEmployees"
from #Employee e
left join #Site s
on(e.SiteID=s.SiteID)
GROUP BY s.SiteName
),
cte_sum as
(select sum(NumberOfEmployees) as total from cte )
select c.*, convert (decimal(6,2),c.NumberOfEmployees)/convert (decimal(6,2),cs.total)*100 from cte_sum cs, cte c;

Related

SUM with CASE counts duplicate rows in SQL GROUP BY

I'm trying to do a SUM against all items which match a certain condition, like so:
SELECT l.Building_Name,
SUM(CASE WHEN s.Date >= '20180930' THEN 1 ELSE 0 END) Validated,
COUNT(DISTINCT s.id) Total
FROM Lab_Space s
JOIN Locations l ON s.Building_Code = l.Building_Code
GROUP BY l.Building_Name
The COUNT there is correct, and will say something like 20 because I can put the DISTINCT s.id in there. However, my SUM ends up with something like 1500. This is because when I do the JOIN rows are duplicated multiple times, and thus the SUM is counting against each one.
How can I do a SUM/CASE like this but make sure it only applies to distinct rows?
s.id l.building_name s.date
1 JF 2018-11-10
1 JF 2018-11-10
2 JF 2018-12-12
So if I have data like that, I'm going to get my count properly of 2, but validate will say 3 because the id of 1 appears twice due to doing a JOIN
You can edit this code of temp table if you deem fit.
create table #temp_Lab_Space
([Date] date null
,Building_Code int null
)
create table #temp_Locations
( Building_Code int null
,Building_Name varchar(10) null
)
insert into #temp_Lab_Space values
('2018-11-10',1)
,('2018-11-10', 1)
,('2018-12-12' , 1)
insert into #temp_Locations values
(1, 'JF')
select Building_Name,
SUM(CASE WHEN Date >= '20180930' THEN 1 ELSE 0 END) Validated,
COUNT(DISTINCT Building_Code) Total
from (
select distinct l.Building_Name, s.Building_Code, s.Date
,Rank_1 = rank() over(partition by l.Building_Name order by s.Date asc)
FROM #temp_Lab_Space s
JOIN #temp_Locations l ON s.Building_Code = l.Building_Code
) a
group by Building_Name
wild guess
select l.Building_Name
, count(s.Id)
, sum(s.Validated)
from Locations l
cross apply ( select s.Id
, max(case
when s.Date >= '20180930' then 1
else 0
end) as Validated
from Lab_Space s
where s.Building_Code = l.Building_Code
group by s.Id) s
group by l.Building_Name
should give you the distinct space.id and a flag whether it is validated.

Create a stored procedure to aggregate rows

Having a transaction table with the following rows:
Id UserId PlatformId TransactionTypeId
-------------------------------------------------
0 1 3 1
1 1 1 2
2 2 3 2
3 3 2 1
4 2 3 1
How do I write a stored procedure that can aggregate the rows into a new table with the following format?
Id UserId Platforms TransactionTypeId
-------------------------------------------------
0 1 {"p3":1,"p1":1} {"t1":1,"t2":1}
1 2 {"p3":2} {"t2":1,"t1":1}
3 3 {"p2":1} {"t1":1}
So the rows are gouped by User, count each platform/transactionType and store as key/value json string.
Ref: My previous related question
You could use GROUP BY and FOR JSON:
SELECT MIN(ID) AS ID, UserId, MIN(sub.x) AS Platforms, MIN(sub2.x) AS Transactions
FROM tab t
OUTER APPLY (SELECT CONCAT('p', platformId) AS platform, cnt
FROM (SELECT PlatformId, COUNT(*) AS cnt
FROM tab t2 WHERE t2.UserId = t.UserId
GROUP BY PlatformId) s
FOR JSON AUTO) sub(x)
OUTER APPLY (SELECT CONCAT('t', TransactiontypeId) AS Transactions, cnt
FROM (SELECT TransactiontypeId, COUNT(*) AS cnt
FROM tab t2 WHERE t2.UserId = t.UserId
GROUP BY TransactiontypeId) s
FOR JSON AUTO) sub2(x)
GROUP BY UserId;
DBFiddle Demo
Result is a bit different(array of key-value) but please treat it as starting point.
Your sample JSON is not really a json, but since you want it that way:
SELECT u.UserId, plt.pValue, ttyp.ttValue
FROM Users AS [u]
CROSS APPLY (
SELECT '{'+STUFF( (SELECT ',"'+pn.pName+'":'+LTRIM(STR(pn.pCount))
FROM (SELECT p.Name AS pName, COUNT(*) AS pCount
FROM transactions t
left JOIN Platforms p ON p.PlatformID = t.PlatformId
WHERE t.UserId = u.UserId
GROUP BY p.PlatformId, p.Name
) pn
FOR XML PATH('')),1,1,'')+'}'
) plt(pValue)
CROSS APPLY (
SELECT '{'+STUFF( (SELECT ',"'+tty.ttName+'":'+LTRIM(STR(tty.ttCount))
FROM (SELECT tt.Name AS ttName, COUNT(*) AS ttCount
FROM transactions t
left JOIN dbo.TransactionType tt ON tt.TransactionTypeId = t.TransactionTypeID
WHERE t.UserId = u.UserId
GROUP BY tt.TransactionTypeId, tt.Name
) tty
FOR XML PATH('')),1,1,'')+'}'
) ttyp(ttValue)
WHERE EXISTS (SELECT * FROM transactions t WHERE u.UserId = t.UserId)
ORDER BY UserId;
DBFiddle Sample

Faster execution of non nulls for a column

I need to get percentage of nulls for a given column in a table. The table contains close to 368081344 records as of now in table. Number of records will increase by 20 million each day. Below is the query am using.
SELECT (COUNT_BIG(column)/ count_big(*)) * 100
from <table>
Then, I perform 100 - above output to fetch the required output
Please let me know best possible solution which can yield faster result
Have you tried the below method :
DECLARE #T TABLE
(
Id INT
)
;WITH CTE
AS
(
SELECT
SeqNo = 1,
NULL "Val"
UNION ALL
SELECT
SeqNo = SeqNo+1,
Val
FROM CTE
WHERE SeqNo<100
)
INSERT INTO #T(Id)
SELECT Val FROM CTE
UNION ALL
SELECT SeqNo FROM CTE
SELECT
TotCount = COUNT(1),
ValCount = SUM(CASE WHEN Id IS NULL THEN 0 ELSE 1 END),
NullCount = SUM(CASE WHEN Id IS NOT NULL THEN 0 ELSE 1 END),
NullPercent = (CAST(SUM(CASE WHEN Id IS NOT NULL THEN 0 ELSE 1 END) AS FLOAT)/CAST(COUNT(1) AS FLOAT))*100
FROM #T
Partial answer only. Not sure how to get the count for a specific column
You can speed up the total row count using this query.
SELECT P.ROWS
FROM SYS.OBJECTS AS O INNER JOIN SYS.PARTITIONS AS P
ON O.OBJECT_ID = P.OBJECT_ID
WHERE O.NAME = 'PARENT' AND
P.INDEX_ID < 2
ORDER BY O.NAME

Using max(col) with count in sub-query SQL Server

I am putting together a query in SQL Server but having issues with the sub-query
I wish to use the max(loadid) and count the number of records the query returns.
So for example my last loadid is 400 and the amount of records with 400 is 2300, so I would my recor_count column should display 2300. I have tried various ways below but am getting errors.
select count (loadid)
from t1
where loadid = (select max(loadid) from t1) record_count;
(select top 1 LOADID, count(*)
from t1
group by loadid
order by count(*) desc) as Record_Count
Showing loadid and number of matching rows with the use of grouping, ordering by count and limiting the output to 1 row with top.
select top 1 loadid, count(*) as cnt
from t1
group by loadid
order by cnt desc
This may be easier to achieve with a window function in the inner query:
SELECT COUNT(*)
FROM (SELECT RANK() OVER (ORDER BY loadid DESC) AS rk
FROM t1) t
WHERE rk = 1
Another simplest way to achieve the result :
Set Nocount On;
Declare #Test Table
(
Id Int
)
Insert Into #Test(Id) Values
(397),(398),(399),(400)
Declare #Abc Table
(
Id Int
,Value Varchar(100)
)
INsert Into #Abc(Id,Value) Values
(398,'')
,(400,'')
,(397,'')
,(400,'')
,(400,'')
Select a.Id
,Count(a.Value) As RecordCount
From #Abc As a
Join
(
Select Max(t.Id) As Id
From #Test As t
) As v On a.Id = v.Id
Group By a.Id

select top 1 with a group by

I have two columns:
namecode name
050125 chris
050125 tof
050125 tof
050130 chris
050131 tof
I want to group by namecode, and return only the name with the most number of occurrences. In this instance, the result would be
050125 tof
050130 chris
050131 tof
This is with SQL Server 2000
I usually use ROW_NUMBER() to achieve this. Not sure how it performs against various data sets, but we haven't had any performance issues as a result of using ROW_NUMBER.
The PARTITION BY clause specifies which value to "group" the row numbers by, and the ORDER BY clause specifies how the records within each "group" should be sorted. So partition the data set by NameCode, and get all records with a Row Number of 1 (that is, the first record in each partition, ordered by the ORDER BY clause).
SELECT
i.NameCode,
i.Name
FROM
(
SELECT
RowNumber = ROW_NUMBER() OVER (PARTITION BY t.NameCode ORDER BY t.Name),
t.NameCode,
t.Name
FROM
MyTable t
) i
WHERE
i.RowNumber = 1;
select distinct namecode
, (
select top 1 name from
(
select namecode, name, count(*)
from myTable i
where i.namecode = o.namecode
group by namecode, name
order by count(*) desc
) x
) as name
from myTable o
SELECT max_table.namecode, count_table2.name
FROM
(SELECT namecode, MAX(count_name) AS max_count
FROM
(SELECT namecode, name, COUNT(name) AS count_name
FROM mytable
GROUP BY namecode, name) AS count_table1
GROUP BY namecode) AS max_table
INNER JOIN
(SELECT namecode, COUNT(name) AS count_name, name
FROM mytable
GROUP BY namecode, name) count_table2
ON max_table.namecode = count_table2.namecode AND
count_table2.count_name = max_table.max_count
I did not try but this should work,
select top 1 t2.* from (
select namecode, count(*) count from temp
group by namecode) t1 join temp t2 on t1.namecode = t2.namecode
order by t1.count desc
Here are to examples that you could use but the temp table use is more efficient than the view, but was done on a small data sample. You would want to check your own statistics.
--Creating A View
GO
CREATE VIEW StateStoreSales AS
SELECT t.state,t.stor_id,t.stor_name,SUM(s.qty) 'TotalSales'
,ROW_NUMBER() OVER (PARTITION BY t.state ORDER BY SUM(s.qty) DESC) AS 'Rank'
FROM [dbo].[sales] s
JOIN [dbo].[stores] t ON (s.stor_id = t.stor_id)
GROUP BY t.state,t.stor_id,t.stor_name
GO
SELECT * FROM StateStoreSales
WHERE Rank <= 1
ORDER BY TotalSales Desc
DROP VIEW StateStoreSales
---Using a Temp Table
SELECT t.state,t.stor_id,t.stor_name,SUM(s.qty) 'TotalSales'
,ROW_NUMBER() OVER (PARTITION BY t.state ORDER BY SUM(s.qty) DESC) AS 'Rank' INTO #TEMP
FROM [dbo].[sales] s
JOIN [dbo].[stores] t ON (s.stor_id = t.stor_id)
GROUP BY t.state,t.stor_id,t.stor_name
SELECT * FROM #TEMP
WHERE Rank <= 1
ORDER BY TotalSales Desc
DROP TABLE #TEMP

Resources