sql Server: Rank by sum of points and order by ranking - sql-server

I have a game table with these fields:
ID Name Email Points
----------------------------------
1 John john#aaa.com 120
2 Test bob#aaa.com 100
3 John john#bbb.com 80
4 Bob bob#aaa.com 50
5 John john#aaa.com 80
I want to group them by email (email Identifies that both players are the same no matter that row 2 and 4 have different names) and have also sum of points and the last entered name in the results and rank them with the heighest sum of points to the lowest
the Result I want from the sample table is:
Ranking Name Points Games_Played Average_Points
------------------------------------------------------------------------------------------
1 John 200 2 100
2 Bob 150 2 75
3 John 80 1 80
I could achieve getting ranking, sum of points, and average points but getting the last entered name I think need joining with the same table again and it seems a little wrong.
Any ideas how to do this?

Displaying the Name and grouping be email will cause to use e.g. MIN(Name) and lead to duplicate names.
Select Rank() over (order by Points desc) as Rank
,Name,Points,Games_Played,Average_Points
from
(
Select Min(Name) as Name,Email,Sum(Points) as Points
,Count(*) as Games_Played,AVG(Points) as Average_Points
From #a Group by Email
) a
order by Rank
SQLFiddle
in the Fiddle are two commented lines you should uncomment to see the behavior on identical results.

You can use Ranking Functions from SQL-Server 2005 upwards:
WITH Points
AS (SELECT Sum_Points = Sum(points) OVER (
partition BY email),
Games_Played = Count(ID) OVER (
partition BY email),
Average_Points = AVG(Points) OVER (
partition BY email),
Rank = DENSE_RANK() OVER (
Partition BY email Order By Points DESC),
*
FROM dbo.Game)
SELECT Ranking=DENSE_RANK()OVER(ORDER BY Sum_Points DESC),
Name,
Points=Sum_Points,
Games_Played,
Average_Points
FROM Points
WHERE Rank = 1
Order By Sum_Points DESC;
DEMO
Note that the result is different since i'm showing the row with the highest point in case that the email is not unique, so "Test" instead of "Bob".

Below are separate solutions for SQL Server 2012+, 2005 to 2008 R2, and 2000:
2012+
CREATE TABLE #PlayerPoints
( ID INT PRIMARY KEY
, Name VARCHAR(10) NOT NULL
, Email VARCHAR(20) NOT NULL
, Points INT NOT NULL);
INSERT INTO #PlayerPoints (ID, Name, Email, Points)
VALUES
(1, 'John', 'john#aaa.com', 120)
, (2, 'Test', 'bob#aaa.com', 100)
, (3, 'John', 'john#bbb.com', 80)
, (4, 'Bob', 'bob#aaa.com', 50)
, (5, 'John', 'john#aaa.com', 80)
WITH BaseData
AS
(SELECT ID
, Email
, Points
, LastRecordName = LAST_VALUE(Name) OVER
(PARTITION BY Email
ORDER BY ID DESC
ROWS UNBOUNDED PRECEDING)
FROM #PlayerPoints)
SELECT Email
, LastRecordName = MAX(LastRecordName)
, Points = SUM(Points)
, Games_Played = COUNT(*)
, Average_Points = AVG(Points)
FROM BaseData
GROUP BY Email
ORDER BY Points DESC;
2005 to 2008 R2
CREATE TABLE #PlayerPoints
( ID INT PRIMARY KEY
, Name VARCHAR(10) NOT NULL
, Email VARCHAR(20) NOT NULL
, Points INT NOT NULL);
INSERT INTO #PlayerPoints (ID, Name, Email, Points)
VALUES
(1, 'John', 'john#aaa.com', 120)
, (2, 'Test', 'bob#aaa.com', 100)
, (3, 'John', 'john#bbb.com', 80)
, (4, 'Bob', 'bob#aaa.com', 50)
, (5, 'John', 'john#aaa.com', 80)
WITH BaseData
AS
(SELECT ID
, Email
, Name
, ReverseOrder = ROW_NUMBER() OVER
(PARTITION BY Email
ORDER BY ID DESC)
FROM #PlayerPoints)
SELECT pp.Email
, LastRecordName = MAX(bd.Name)
, Points = SUM(pp.Points)
, Games_Played = COUNT(*)
, Average_Points = AVG(pp.Points)
FROM #PlayerPoints pp
JOIN BaseData bd
ON pp.Email = bd.Email
AND bd.ReverseOrder = 1
GROUP BY pp.Email
ORDER BY Points DESC;
2000
CREATE TABLE #PlayerPoints
( ID INT PRIMARY KEY
, Name VARCHAR(10) NOT NULL
, Email VARCHAR(20) NOT NULL
, Points INT NOT NULL);
INSERT INTO #PlayerPoints (ID, Name, Email, Points)
SELECT 1, 'John', 'john#aaa.com', 120
UNION ALL
SELECT 2, 'Test', 'bob#aaa.com', 100
UNION ALL
SELECT 3, 'John', 'john#bbb.com', 80
UNION ALL
SELECT 4, 'Bob', 'bob#aaa.com', 50
UNION ALL
SELECT 5, 'John', 'john#aaa.com', 80;
SELECT pp.Email
, LastRecordName = MAX(sppmi.Name)
, Points = SUM(pp.Points)
, Games_Played = COUNT(*)
, Average_Points = AVG(pp.Points)
FROM #PlayerPoints pp
JOIN
(SELECT spp.Email
, spp.Name
FROM #PlayerPoints spp
JOIN
(SELECT Email
, MaximumID = MAX(ID)
FROM #PlayerPoints
GROUP BY Email) mi
ON spp.ID = mi.MaximumID) sppmi
ON pp.Email = sppmi.Email
GROUP BY pp.Email
ORDER BY Points DESC;

I think this is what you need
select ROW_NUMBER() OVER (ORDER BY sum(r1.points) Desc) as Ranking,
r1.name as Name,
sum(r1.points) as Points,
r3.gplayed as 'Games Played',
r2.points 'Average Points'
from ranks r1
join (select avg(points) as points, email from ranks group by email) r2
on r1.email = r2.email
join (select email, count(*) as gplayed from ranks group by email) r3
on r1.email = r3.email
group by
r1.email,
r1.name,
r2.points,
r3.gplayed
Here is a SQL Fiddle.

Only the solution from #RegisteredUser seems to handle the constraint on the name. However, it requires SQL Server 2012, so here is a more general solution:
Select dense_rank() over (order by sum(points) desc) as ranking
max(case when islastid = 1 then Name end) as Name, Email, Sum(Points) as Points,
Count(*) as Games_Played, AVG(Points) as Average_Points
From (select g.*,
row_number() over (partition by email order by id desc) as islastid
from games g
) t
Group by Email;
You don't have enough information in the question to choose between rank() and dense_rank().
Also, this version is simpler relative to other versions, because you can mix window functions and aggregation functions.

Related

Snowflake : IN operator

so I want something as below in my query
select * from table a
where a.id in(select id, max(date) from table a group by id)
I am getting error here , as IN is equivalent to = .
how to do it?
example :
id
date
1
2022-31-01
1
2022-21-03
2
2022-01-01
2
2022-02-01
I need to get only one record based on date(max). The table has more columns than just id and date
so I need to something like this in snowflake
select * from table a
where id in(select id,max(date) from table a group by id)
```-----------------------
All solutions are working , if i select from table .
but i have case statement in view where duplicate records are coming
example :
create or replace view v_test
as
select * from
(
select id,lastdatetime,*,
case when start_date < timestamp and timestamp < end
and move_date = '9999-12-31' then 'Y'
else 'N' end as IND
from table a
) a
so if any one select view where IND= 'Y', more than 1 records are coming
what i want is to select latest records for ID where IND='Y' and max(lastdatetime)
how to incorporate this logic in view?
I think you are trying to get the latest record for each id?
select *
from table a
qualify row_number() over (partition by id order by date desc) = 1
So if we look at your sub-select:
using this "data" for the examples:
with data (id, _date) as (
select column1, to_date(column2, 'yyyy-dd-mm') from values
(1, '2022-31-01'),
(1, '2022-21-03'),
(2, '2022-01-01'),
(2, '2022-02-01')
)
select id, max(_date)
from data
group by 1;
it gives:
ID
MAX(_DATE)
1
2022-03-21
2
2022-01-02
which makes it seem you want the "the last date, per id"
which can classically (ansi sql) be written:
select d.*
from data as d
join (
select
id,
max(_date) as max_date
from data
group by 1
) as c
on d.id = c.id and d._date = c.max_date
;
ID
_DATE
1
2022-03-21
2
2022-01-02
which gives you "all the rows values". BUT if you have many rows with the same last date, you will get those, in the output.
Another methods is to use a ROW_NUMBER to pick one and only one row, which is the style of answer Mike has given:
with data (id, _date, extra) as (
select column1, to_date(column2, 'yyyy-dd-mm'), column3 from values
(1, '2022-31-01', 'extra_a'),
(1, '2022-21-03', 'extra_b_double_a'),
(1, '2022-21-03', 'extra_b_double_b'),
(2, '2022-01-01', 'extra_c'),
(2, '2022-02-01', 'extra_d')
)
select *
from data
qualify row_number() over (partition by id order by _date desc) =1 ;
gives:
ID
_DATE
EXTRA
1
2022-03-21
extra_b_double_a
2
2022-01-02
extra_d
now if you want the "all rows of the last day" you method works, albeit the QUALIFY/ROW_NUMBER is faster. You can use RANK
with data (id, _date, extra) as (
select column1, to_date(column2, 'yyyy-dd-mm'), column3 from values
(1, '2022-31-01', 'extra_a'),
(1, '2022-21-03', 'extra_b_double_a'),
(1, '2022-21-03', 'extra_b_double_b'),
(2, '2022-01-01', 'extra_c'),
(2, '2022-02-01', 'extra_d')
)
select *
from data
qualify dense_rank() over (partition by id order by _date desc) =1 ;
ID
_DATE
EXTRA
1
2022-03-21
extra_b_double_a
1
2022-03-21
extra_b_double_b
2
2022-01-02
extra_d
Now the last thing that it almost seems you are asking for, is "how do find the ID with the most recent data (here 1) and get all rows for that"
with data (id, _date, extra) as (
select column1, to_date(column2, 'yyyy-dd-mm'), column3 from values
(1, '2022-31-01', 'extra_a'),
(1, '2022-21-03', 'extra_b_double_a'),
(1, '2022-21-03', 'extra_b_double_b'),
(2, '2022-01-01', 'extra_c'),
(2, '2022-02-01', 'extra_d')
)
select *
from data
qualify id = last_value(id) over (order by _date);
Here is an example of how to use the in operator with a subquery:
select * from table1 t1 where t1.id in (select t2.id from table2 t2);
Usage of IN is possible to match on both columns:
select *
from tab AS a
where (a.id, a.date) in (select id, max(date) from tab group by id);
For sample data:
CREATE TABLE tab (id, date)
AS
SELECT column1, to_date(column2, 'yyyy-dd-mm')
FROM VALUES
(1, '2022-31-01'),
(1, '2022-21-03'),
(2, '2022-01-01'),
(2, '2022-02-01');
Output:

TSQL: Group by one column, count all rows and keep value on second column based on row_number

I have a query that returns an Id, a Name and the Row_Number() based on some rules.
The query looks like that
SELECT
tm.id AS Id,
pn.Name AS Name,
ROW_NUMBER() OVER(PARTITION BY tm.id ORDER BY tm.CreatedDate ASC) AS Row
FROM
#tempTable AS tm
LEFT JOIN
names pn WITH (NOLOCK) ON tm.nameId = pn.NameId
WHERE ....
The output of the above query looks like the table below with the dummy data
CREATE TABLE people
(
id int,
name varchar(55),
row int
);
INSERT INTO people
VALUES (1, 'John', 1), (1, 'John', 2), (2, 'Mary', 1),
(3, 'Jeff', 1), (4, 'Bill', 1), (4, 'Bill', 2),
(4, 'Bill', 3), (4, 'Billy', 4), (5, 'Bobby', 1),
(5, 'Bob', 2), (5, 'Bob' , 3), (5, 'Bob' , 4);
What I try to do, is group by the id field, count all rows, but for the name, use the one with row = 1
My attempt is like this, but, obviously, I get different rows since I include the x.name in the group by.
SELECT
x.id,
x.name,
COUNT(*) AS Value
FROM
(SELECT
tm.id AS Id,
pn.Name AS Name,
ROW_NUMBER() OVER(PARTITION BY tm.id ORDER BY tm.CreatedDate ASC) AS Row
FROM
#tempTable AS tm
LEFT JOIN
names pn WITH (NOLOCK) ON tm.nameId = pn.NameId
WHERE ....
) x
GROUP BY
x.id, x.name
ORDER BY
COUNT(*) DESC
The desired results from the dummy data are:
id name count
------------------
1 John 2
2 Mary 1
3 Jeff 1
4 Bill 4
5 Bobby 4
You can use FIRST_VALUE() window function to get the name of the row with row number = 1 and with the keyword DISTINCT there is no need to GROUP BY:
SELECT DISTINCT tm.id AS Id
, FIRST_VALUE(pn.Name) OVER (PARTITION BY tm.id ORDER BY tm.CreatedDate ASC) AS Name
, COUNT(*) OVER (PARTITION BY tm.id) AS counter
FROM #tempTable AS tm
LEFT JOIN names pn WITH (NOLOCK) ON tm.nameId = pn.NameId
WHERE ....
If you can't use FIRST_VALUE() then you can do it with conditional aggregation:
SELECT id,
MAX(CASE WHEN Row = 1 THEN Name END) AS NAME,
COUNT(*) AS Counter
FROM (
SELECT tm.id AS Id
, pn.Name AS Name
, ROW_NUMBER() OVER(PARTITION BY tm.id ORDER BY tm.CreatedDate ASC) AS Row
FROM #tempTable AS tm
LEFT JOIN names pn WITH (NOLOCK) ON tm.nameId = pn.NameId
WHERE ....
) t
GROUP BY id
This could be one solution to your problem: group on both id and the target name (case when p.row = 1 then p.name end) for the counting. Adding a with rollup to the grouping will "roll up" the count aggregations. Another aggregation on just id can then be use to merge the row values from the intermediate data set (visible in fiddle).
with cte as
(
select p.id,
case when p.row = 1 then p.name end as name,
count(1) as cnt
from people p
group by p.id, case when p.row = 1 then p.name end with rollup
having grouping(p.id) = 0
)
select cte.id,
max(cte.name) as name,
max(cte.cnt) as [count]
from cte
group by cte.id;
Fiddle
This would be another solution: do a regular count query with grouping on id and fetch the required name afterwards with a cross apply.
with cte as
(
select p.id,
count(1) as cnt
from people p
group by p.id
)
select cte.id,
n.name,
cte.cnt as [count]
from cte
cross apply ( select p.name
from people p
where p.id = cte.id
and p.row = 1 ) n;
Fiddle

Only show value of Max rows with partition by?

the title might be a bit off however i'm trying to remove the values of a row without removing the actual row.
This is my table:
SELECT ID,CustomerID,Weight FROM Orders
What am i trying to accomplish is this:
The MAX() value of ID Group By CustomerID that would give me null values in Weight where max and group by is not set
Is it possible to do this in one line? with a partiton by?
Something like:
SELECT MAX(ID) over (partition by CustomerID,Weight).... I know this is wrong but if possible to do without a join or CTE and only in one line in the select statement that would be great.
One possible approach is using ROW_NUMBER:
SELECT
ID,
CustomerID,
CASE
WHEN ROW_NUMBER() OVER (PARTITION BY CustomerId ORDER BY ID DESC) = 1 THEN [Weight]
ELSE Null
END AS [Weight]
FROM #Orders
ORDER BY ID
Input:
CREATE TABLE #Orders (
ID int,
CustomerID int,
[Weight] int
)
INSERT INTO #Orders
(ID, CustomerID, [Weight])
VALUES
(1, 11, 100),
(2, 11, 17),
(3, 11, 35),
(4, 22, 26),
(5, 22, 78),
(6, 22, 10030)
Output:
ID CustomerID Weight
1 11 NULL
2 11 NULL
3 11 35
4 22 NULL
5 22 NULL
6 22 10030
Try this
;WITH CTE
AS
(
SELECT
MAX_ID = MAX(ID) OVER(PARTITION BY CustomerId),
ID,
CustomerId,
Weight
FROM Orders
)
SELECT
ID,
CustomerId,
Weight = CASE WHEN ID = MAX_ID THEN Weight ELSE NULL END
FROM CTE
You can try this.
SELECT ID,CustomerId,CASE WHEN ID= MAX(ID) OVER(PARTITION BY CustomerId) THEN Weight ELSE NULL END AS Weight FROM Orders

second highest salary in each department

I am trying to find the second highest salary in each department.
Schema:
CREATE TABLE employees
(
ID int NOT NULL,
NAME char(50) NOT NULL,
departmentid int,
salary int
);
Sample records:
/*departmentid =1 */
INSERT INTO employees VALUES (1, 'Max', 1, 90000);
INSERT INTO employees VALUES (2, 'Joe', 1, 70000);
INSERT INTO employees VALUES (3, 'Randy', 1, 70000);
/*departmentid =2 */
INSERT INTO employees VALUES (4, 'Henry', 2, 80000);
INSERT INTO employees VALUES (5, 'SAM', 2, 60000);
/*departmentid =3 */
INSERT INTO employees VALUES (6, 'Janet', 3, 69000);
My query:
SELECT departmentid,
NAME,
salary
FROM
(
SELECT
departmentid,
NAME,
salary,
Dense_rank()OVER (partition BY departmentid
ORDER BY salary DESC) AS Rank,
Count(1)OVER(partition BY departmentid) AS cnt
FROM
employees
)t
WHERE
t.rank = 2
OR ( t.rank = 1
AND cnt = 1 )
The output I am getting is as below;
departmentid NAME salary
1 Joe 70000
1 Randy 70000
2 SAM 60000
3 Janet 69000
My expected output is
departmentid NAME salary
1 Joe 70000
1 Randy 70000
2 SAM 60000
3 NULL NULL
As there is only one record for departmentid=3, it should return null.
What is wrong with this query? Any other ways to achieve this result?
I've also included a SQL fiddle.
ROW_NUMBER() and select = 2
;WITH salary AS
(
[RN] = SELECT ROW_NUMBER() OVER (PARTITION BY departmentid ORDER BY salary),*
FROM <table>
)
SELECT
*
FROM salary
WHERE [RN] = 2
I've used two CTEs.
The first returns a list of every department. You'll need this to ensure departments with less than 2 salaries are included in the final result.
The second ranks each employee within their department.
Finally, I've used a left outer join to maintain the complete list of departments.
WITH Department AS
(
-- Returns a list of the departments.
SELECT
departmentid
FROM
employees
GROUP BY
departmentid
),
EmployeeRanked AS
(
SELECT
DENSE_RANK() OVER (PARTITION BY departmentid ORDER BY salary DESC) AS [Rank],
departmentid,
NAME,
salary
FROM
employees
)
SELECT
er.Rank,
d.departmentid,
er.NAME,
er.salary
FROM
Department AS d
LEFT OUTER JOIN EmployeeRanked AS er ON er.departmentid = d.departmentid
AND er.[Rank] = 2
;
Returns
Rank departmentid NAME salary
2 1 Joe 70000
2 1 Randy 70000
2 2 SAM 60000
(null) 3 (null) (null)
Use a sub query as i wrote here : http://sqlfiddle.com/#!6/bb5e1/26
with ranks as(
SELECT departmentid,
salary,
row_number() over (partition by (departmentid) order by salary desc) as rank
FROM employees
)
Select *
from ranks
Where ranks.rank = 2
If the departmentid having only one row, and if you consider that also. Then
Query
;with cte as(
select [rank] = dense_rank() over(
partition by departmentid
order by departmentid, salary desc
), *
from employees
)
select ID, NAME, departmentid, salary from cte
where [rank] = 2
union all
select max(ID), max(NAME), departmentid, max(salary)
from cte
group by departmentid
having count([rank]) = 1;
There is also a simple way:
SELECT TOP 1 * FROM (Select top 2 * FROM employees order by salary desc ) e Order by salary asc
Edit: this returns only the 2nd highest overall
I think you can get correct answer by just removing below code from your code
OR ( t.rank = 1
AND cnt = 1 )
also main table should be left join from this result to get null in rest of columns

Maximum and Minimum Rows Alternatively in SQL Server

This is an Employee table,
Id Name Salary
1 A.J 7000
2 B.S 30000
3 C.K 2000
4 D.O 10000
5 E.L 500
Now i want to display 1st highest salary then minimum salary then 2nd maximum salary then 2nd minimum salaray and so on..up to nth row.
Expected Output,
Id Name Salary
2 B.S 30000
5 E.L 500
4 D.O 10000
3 C.K 2000
1 A.J 7000
One more variant without explicit COUNT. SQL Fiddle.
Try also to add this row to sample data (6, 'X.Y', 7000) in the fiddle. The query still returns correct results.
DECLARE #Employee TABLE (ID int, Name nvarchar(50), Salary money);
INSERT INTO #Employee (ID, Name, Salary) VALUES
(1, 'A.J', 7000),
(2, 'B.S', 30000),
(3, 'C.K', 2000),
(4, 'D.O', 10000),
(5, 'E.L', 500);
WITH
CTE
AS
(
SELECT *, NTILE(2) OVER (ORDER BY Salary, ID) AS n
FROM #Employee AS E
)
SELECT
*
,SIGN(n-1.5) AS s
,SIGN(n-1.5)*Salary AS ss
,ROW_NUMBER() OVER(PARTITION BY n ORDER BY SIGN(n-1.5)*Salary DESC) AS rn
FROM CTE
ORDER BY rn, ss DESC;
Result
ID Name Salary n s ss rn
2 B.S 30000.00 2 1.0 30000.00000 1
5 E.L 500.00 1 -1.0 -500.00000 1
4 D.O 10000.00 2 1.0 10000.00000 2
3 C.K 2000.00 1 -1.0 -2000.00000 2
1 A.J 7000.00 1 -1.0 -7000.00000 3
I left intermediary columns in the output to illustrate how it works.
Using Row_Number() and Count()
Fiddle Demo
declare #count int=(select count(1) from Employee);
with cte1 as
(
select ROW_NUMBER() over(order by salary desc) as rn,0 Sort,Id,Name,Salary, count(Id) over () cnt from Employee
union all
select ROW_NUMBER() over(order by salary) as rn,1 Sort,Id,Name,Salary, count(Id) over () cnt from Employee
)
select top (#count) Id,Name,Salary from cte1 where rn <= (floor(cnt/2) + cnt%2) order by rn,sort
Below is the solution:
--Create dummy employee table
CREATE TABLE tbl_Employee
(
Id INT,
Name VARCHAR(100),
Salary NUMERIC(9, 2)
)
GO
--Insert few dummy rows in the table
INSERT INTO #Employee
(Id, Name, Salary)
VALUES(100, 'John', 7000),
(101, 'Scott', 30000),
(102, 'Jeff', 2000),
(103, 'Jimy', 10000),
(104, 'Andrew', 500),
(105, 'Alister', 100)
GO
--Get data as required
DECLARE #Cnt INT = 0, #SeqLimit INT = 0
SELECT #Cnt = COUNT(1) FROM tbl_employee
SET #SeqLimit = CEILING(#Cnt / 2.0)
SELECT * FROM
(
SELECT ROW_NUMBER() OVER(ORDER BY Salary DESC) AS SEQ, Id, Name, Salary FROM tbl_employee
)DT1
WHERE SEQ <= #SeqLimit
UNION ALL
SELECT * FROM
(
SELECT ROW_NUMBER() OVER(ORDER BY Salary ASC) AS SEQ, Id, Name, Salary FROM tbl_employee
)DT2
WHERE SEQ <= #SeqLimit - (#Cnt % 2)
ORDER BY SEQ ASC, Salary DESC
The same can be achieved with different approaches and here you can find more on this:
http://www.sqlrelease.com/order-max-and-min-value-rows-alternatively-in-sql-server

Resources