I have the following Table definition with sample data. In the following table.
"TP" consecutive 3 records 2 times,then "SL" consecutive 1 records 2 times……
id | Result
1 | TP
2 | TP
3 | TP
4 | SL
5 | TP
6 | NONE
7 | NONE
8 | SL
9 | TP
10 | TP
11 | TP
12 | SL
13 | SL
14 | SL
And I am looking for a result like this:
comboNum | num
TP_3 | 2
SL_1 | 2
TP_1 | 1
SL_3 | 1
Any suggestions?
You can as the below
DECLARE #Tbl TABLE (Id INT, Result VARCHAR(10))
INSERT INTO #Tbl
VALUES
(1,'TP')
,(2,'TP')
,(3,'TP')
,(4,'SL')
,(5,'TP')
,(6,'NONE')
,(7,'NONE')
,(8,'SL')
,(9,'TP')
,(10,'TP')
,(11,'TP')
,(12,'SL')
,(13,'SL')
,(14,'SL')
;WITH CTE1
AS
(
SELECT *, ROW_NUMBER() OVER (ORDER BY Result, Id) RowId FROM #Tbl
),CTE2
AS
(
SELECT
Result,
MAX(C.Id) - MIN(C.Id) Cons,
MIN(C.Id) StartP,
MAX(C.Id) EndP
FROM
CTE1 C
WHERE
c.Result <> 'NONE'
GROUP BY
C.Result,
C.RowId - C.Id
)
SELECT
C.Result + '_' + CAST(C.Cons + 1 AS VARCHAR(50)) AS comboNum,
COUNT(*) AS Num
FROM
CTE2 C
GROUP BY
C.Result,
C.Cons
ORDER BY Num DESC
Result:
comboNum Num
------------------ -----------
TP_3 2
SL_1 2
TP_1 1
SL_3 1
Two CTEs with tricky ROW_NUMBER() sequence:
;WITH cte as (
SELECT id,
Result,
ROW_NUMBER() OVER (PARTITION BY Result ORDER BY id) - ROW_NUMBER() OVER (ORDER BY id) as seq
FROM YourTable
WHERE Result != 'NONE'
), final AS (
SELECT MIN(id) as mid,
Result +'_'+ CAST(MAX(id)-MIN(id)+1 as nvarchar(max)) as comboNum
FROM cte
GROUP BY Result, seq
)
SELECT comboNum,
COUNT(mid) as num
FROM final
GROUP BY comboNum
ORDER BY MIN(mid)
Output:
comboNum num
TP_3 2
SL_1 2
TP_1 1
SL_3 1
Declare #tblTest AS TABLE(
ID INT,
Result VARCHAR(50)
)
INSERT INTO #tblTest VALUES(1,'TP')
,(2,'TP')
,(3,'TP')
,(4,'SL')
,(5,'TP')
,(6,'NONE')
,(7,'NONE')
,(8,'SL')
,(9,'TP')
,(10,'TP')
,(11,'TP')
,(12,'SL')
,(13,'SL')
,(14,'SL')
;WITH X AS
(
SELECT
T.*,
ROW_NUMBER() OVER (ORDER BY ID) AS SrNo,
ROW_NUMBER() OVER (PARTITION BY Result ORDER BY id) AS PartNo
FROM #tblTest T
WHERE Result<>'NONE'
)
SELECT
ComboNum,
COUNT(Occurance) AS Num
FROM
(
SELECT
Result +'_'+ CAST((max(ID)-min(ID))+1 AS VARCHAR(5)) AS ComboNum,
(MAX(ID)-MIN(ID))+1 AS Occurance,
MIN(SrNo) AS SrNo
FROM X
GROUP BY Result, (SrNo - PartNo)
) Z
GROUP BY ComboNum,Occurance
ORDER BY MIN(SrNo)
Output:
Related
Got a problem when constructing a analysis SQL using SQL Server
The raw data as below
GameID | UsrRegID | Score_User
281 | 1 | 1
281 | 1 | 2
281 | 1 | 3
282 | 1 | 0
282 | 1 | 0
282 | 1 | 1
283 | 1 | 2
283 | 1 | 3
Below is the expect output result:
Distinct_Count_GameID | UsrRegID | Score_User
3 | 1 | 7
The logic for calculating the Score_user as below:
Sum(Max(Score_user) for each GemeID)
So the result need to be 3+1+3=7.
Can using the pure SQL to get the above expecting output?
I think we need to aggregate twice here. One option uses ROW_NUMBER:
WITH cte AS (
SELECT GameID, UsrRegID, Score_User,
ROW_NUMBER() OVER (PARTITION BY GameID, UsrRegID ORDER BY Score_User DESC) rn
FROM yourTable
)
SELECT
UsrRegID,
COUNT(DISTINCT GameID) AS Distinct_Count_GameID,
SUM(Score_User) AS Score_User
FROM cte
WHERE rn = 1
GROUP BY
UsrRegID;
You can't do an aggregate of an aggregate on the same SELECT, you can chain them together with CTE or subqueries.
;WITH Maxs AS
(
SELECT
T.GameID,
T.UsrRegID,
MaxScore = MAX(T.Score_User)
FROM
YourTable AS T
GROUP BY
T.GameID,
T.UsrRegID
)
SELECT
M.UsrRegID,
Distinct_Count_GameID = COUNT(DISTINCT(M.GameID)),
Score_User = SUM(M.MaxScore)
FROM
Maxs AS M
GROUP BY
M.UsrRegID
You can also try like following.
SELECT Count(DISTINCT [rgameid]) Distinct_Count_GameID,
Count(DISTINCT [usrregid]) UsrRegID,
(SELECT Sum(M)
FROM (SELECT Max([score_user]) M
FROM [TableName]
GROUP BY [rgameid])t) AS Score_User
FROM [TableName]
DEMO
First find maximum value of score for each GameId and UsrRegID and then find SUM() for the column, Score_User and group it by the columns, GameID and UsrRegID using GROUP BY clause.
Query
select count(distinct [t].[GameID]) as [GameID], [t].[UsrRegID],
sum([t].[Score_User]) as [Score_User] from(
select [GameID], [UsrRegID], max([Score_User]) as [Score_User]
from [your_table_name]
group by [GameID], [UsrRegID]
) as [t]
group by [t].[UsrRegID];
Or, give a row number based on the descending order of score value and group by GameID and UsrRegID. Then find the count of distinct GameId and sum of maximum score.
Query
;with cte as(
select [rn] = row_number() over(
partition by [GameID], [UsrRegID]
order by [Score_User] desc
), *
from [your_table_name]
)
select count(distinct [GameID]) as [GameID], [UsrRegID],
sum([Score_User]) as [Score_User] from cte
where [rn] = 1
group by [UsrRegID];
Aggregates and a COUNT(Distinct GameID):
declare #raw as table (GameID int, UsrRegID int, Score_user int)
insert into #raw values (281, 1, 1)
,(281, 1, 2)
,(281, 1, 3)
,(282, 1, 0)
,(282, 1, 0)
,(282, 1, 1)
,(283, 1, 2)
,(283, 1, 3)
select count(distinct GameID) as Distinct_Count_GameID, UsrRegID, sum(max_score_user)
from
(
select GameID
, UsrRegID
, max(score_user) as max_score_user
from #raw
group by GameID, UsrRegID
) a
group by a.UsrRegID
I have a table in MS SQL Server, where are some null values in column "value"
Group ID Value
A 1 10
A 2
A 3
A 4 40
B 1
B 2 20
B 3 30
B 4
I want to update null values by not null in the same group with with the first higher ID, or if there is not any higher in same group, first lower. So the result should look like this.
Group ID Value
A 1 10
A 2 40
A 3 40
A 4 40
B 1 20
B 2 20
B 3 30
B 4 30
Thanks!
You can use windowed version of SUM function in order to determine islands of NULL valued records along with the record having the higher ID in the same group:
SELECT [Group], ID, Value,
SUM(CASE WHEN Value IS NULL THEN 0 ELSE 1 END) OVER
(PARTITION BY [Group] ORDER BY ID DESC) AS grp
FROM mytable
Output:
Group ID Value grp
-----------------------
A 4 40 1
A 3 30 2
A 2 NULL 2
A 1 NULL 2
B 4 40 1
B 3 NULL 1
B 2 20 2
B 1 10 3
You can now wrap the above query in a CTE and use another CTE to do the update:
;WITH CTE AS (
SELECT [Group], ID, Value,
SUM(CASE WHEN Value IS NULL THEN 0 ELSE 1 END) OVER
(PARTITION BY [Group] ORDER BY ID DESC) AS grp
FROM mytable
), ToUpdate AS (
SELECT [Group], ID, Value,
MAX(Value) OVER (PARTITION BY [Group], grp) AS group_value
FROM CTE
)
UPDATE ToUpdate
SET Value = group_value
WHERE Value IS NULL
Demo here
Edit:
The above query doesn't handle the edge case where the very last record within a Group slice is NULL. To handle this case as well you can use the following query:
;WITH CTE AS (
SELECT [Group], ID, Value,
SUM(CASE WHEN Value IS NULL THEN 0 ELSE 1 END) OVER
(PARTITION BY [Group] ORDER BY ID DESC) AS grp,
SUM(CASE WHEN Value IS NULL THEN 0 ELSE 1 END) OVER
(PARTITION BY [Group] ORDER BY ID) AS grp2
FROM mytable
), ToUpdate AS (
SELECT [Group], ID, Value,
MAX(Value) OVER (PARTITION BY [Group], grp) AS group_value,
MAX(Value) OVER (PARTITION BY [Group], grp2) AS group_value2
FROM CTE
)
UPDATE ToUpdate
SET Value = COALESCE(group_value, group_value2)
WHERE Value IS NULL
Demo here
Please try this-
DATA GENERATION
DECLARE #T TABLE
(
GroupCd CHAR(1),
Id INT,
Value INT
)
INSERT INTO #T
VALUES('A',1,10),
('A',2,NULL),
('A',3,NULL),
('A',4,40),
('B',1,NULL),
('B',2,20),
('B',3,30),
('B',4,NULL)
SOLUTION
UPDATE a
SET a.Value = b.Value
FROM #T a
INNER JOIN
(
SELECT a.GroupCd,a.Id,Coalesce(a.Value,z.Value,z1.Value) Value
FROM #T a
OUTER APPLY
(
SELECT TOP 1 Value
FROM #T b
WHERE a.GroupCd = b.GroupCd
AND b.Value IS NOT NULL AND a.Id < b.Id
ORDER BY Id
)z
OUTER APPLY
(
SELECT TOP 1 Value
FROM #T b
WHERE a.GroupCd = b.GroupCd
AND b.Value IS NOT NULL AND a.Id > b.Id
ORDER BY Id DESC
)z1
)b ON a.GroupCd = b.GroupCd AND a.Id = b.Id
SELECT * FROM #T
OUTPUT
GroupCd Id Value
------- ----------- -----------
A 1 10
A 2 40
A 3 40
A 4 40
B 1 20
B 2 20
B 3 30
B 4 30
(8 rows affected)
You Can try This simple Method
DECLARE #T TABLE
(
GroupCd CHAR(1),
Id INT,
Value INT
)
INSERT INTO #T
VALUES('A',1,NULL),
('A',2,NULL),
('A',3,30),
('A',4,40),
('B',1,10),
('B',2,20),
('B',3,NULL),
('B',4,40)
SELECT
*,
NewVal = COALESCE(Value,(SELECT TOP 1 Value FROM #T WHERE GroupCd = T.GroupCd AND Id > T.Id AND Value IS NOT NULL ORDER BY Id ASC))
FROM #T T
My Result
update MY_TABLE set [value] = [newValue] from (
select [Group] [newGroup],
[Value] [newValue]
from (
select [Group], [Value],
row_number() over (partition by [group] order by [Id] desc) [rn]
from MY_TABLE
where [Value] is not null
) [a] where [rn] = 1
) where [Group] = [newGroup] and [Value] is null
Brothers can you help me? Thanks
Table A
Id Name IdParent
1 Operation Null
2 Developer 1
3 Android 2
4 IOS 2
Expectes result:
ID Name
1 +Operation
2 +------ Developer
3 +------------Android
4 +------------ IOS
By adding a sequence during the recursive build, you can easily create the proper presentation sequence and nesting
Declare #YourTable table (id int,IdParent int,Name varchar(50))
Insert into #YourTable values
( 1, NULL,'Operation')
,( 2, 1 ,'Developer')
,( 3, 2 ,'Android')
,( 4, 2 ,'IOS')
,( 5, 1 ,'Poet')
,( 6, 5 ,'Limerick')
,( 7, 5 ,'Haiku')
Declare #Top int = null --<< Sets top of Hier Try 2
Declare #Nest varchar(25) = '|-----' --<< Optional: Added for readability
;with cteP as (
Select Seq = cast(10000+Row_Number() over (Order by Name) as varchar(500))
,ID
,IdParent
,Lvl=1
,Name
From #YourTable
Where IsNull(#Top,-1) = case when #Top is null then isnull(IdParent ,-1) else ID end
Union All
Select Seq = cast(concat(p.Seq,'.',10000+Row_Number() over (Order by r.Name)) as varchar(500))
,r.ID
,r.IdParent
,p.Lvl+1
,r.Name
From #YourTable r
Join cteP p on r.IdParent = p.ID)
Select A.ID
,A.IdParent
,A.Lvl
,Name = Replicate(#Nest,A.Lvl-1) + A.Name
From ctep A
Order By A.Seq
Returns
ID IdParent Lvl Name
1 NULL 1 Operation
2 1 2 |-----Developer
3 2 3 |-----|-----Android
4 2 3 |-----|-----IOS
5 1 2 |-----Poet
7 5 3 |-----|-----Haiku
6 5 3 |-----|-----Limerick
Here's another version:
WITH RawData AS (
SELECT 1 AS Id, 'Operation' AS Name, CONVERT(INT, NULL) AS IdParent
UNION ALL
SELECT 2 AS Id, 'Developer' AS Name, 1 AS IdParent
UNION ALL
SELECT 3 AS Id, 'Android' AS Name, 2 AS IdParent
UNION ALL
SELECT 4 AS Id, 'IOS' AS Name, 2 AS IdParent),
Depth AS (
SELECT
Id,
1 AS depth,
IdParent
FROM
RawData
UNION ALL
SELECT
d.Id,
d.depth + 1,
r.IdParent
FROM
Depth d
INNER JOIN RawData r ON r.Id = d.IdParent),
MaxDepth AS (
SELECT
Id,
MAX(depth) AS depth
FROM
Depth
GROUP BY
Id)
SELECT
r.Id,
'+' + REPLICATE('----', m.depth - 1) + r.Name AS Name
FROM
RawData r
INNER JOIN MaxDepth m ON m.Id = r.Id;
Results:
Id Name
1 +Operation
2 +----Developer
3 +--------Android
4 +--------IOS
DECLARE #mockup TABLE(Id INT, Name VARCHAR(100), IdParent INT);
INSERT INTO #mockup VALUES
(1,'Operation',Null)
,(2,'Developer',1)
,(3,'Android',2)
,(4,'IOS',2);
--The query uses a recursive CTE and finally REPLICATE with the recursive level to add the number of hyphens...
WITH recCTE AS
(
SELECT Id, Name, 1 AS Lvl, CAST(REPLACE(STR(ROW_NUMBER() OVER (ORDER BY Id),5),' ','0') AS VARCHAR(MAX)) AS Seq
FROM #mockup
WHERE IdParent IS NULL
UNION ALL
SELECT m.Id,m.Name,r.Lvl +1,r.Seq + '.' + REPLACE(STR(ROW_NUMBER() OVER (ORDER BY m.Id),5),' ','0')
FROM #mockup AS m
INNER JOIN recCTE AS r ON m.IdParent=r.Id
)
SELECT *
,'+' + REPLICATE('-',Lvl*4) + Name
FROM recCTE
ORDER BY Seq
the result
+----+-----------+-----+----------------------+
| Id | Name | Lvl | (Kein Spaltenname) |
+----+-----------+-----+----------------------+
| 1 | Operation | 1 | +----Operation |
+----+-----------+-----+----------------------+
| 2 | Developer | 2 | +--------Developer |
+----+-----------+-----+----------------------+
| 3 | Android | 3 | +------------Android |
+----+-----------+-----+----------------------+
| 4 | IOS | 3 | +------------IOS |
+----+-----------+-----+----------------------+
I have a table of players each having an ID (indexed primary key), a name, and a score. The table is not sorted except by index. e.g.
[dbo].[PlayerScores]
ID | Name | Score
=================
1 | Bob | 17
2 | Carl | 24
3 | Ann | 31
4 | Joan | 11
5 | Lou | 17
6 | Dan | 25
7 | Erin | 33
8 | Fred | 29
I've defined a leaderboard such that all of the players are ordered by their score and assigned a rank, so I'm using the RANK() function:
SELECT RANK() OVER (ORDER BY [Score] DESC) AS [Score_Rank],
[Name],
[Score]
FROM [dbo].[PlayerScores]
So far so good. For the above data, I'll get
Rank | Name | Score
=================
1 | Erin | 33
2 | Ann | 31
3 | Fred | 29
4 | Dan | 25
5 | Carl | 24
6 | Bob | 17
6 | Lou | 17
8 | Joan | 11
However, when I present this leaderboard to the players, I don't need or want to show them everything - only the players immediately above and below them (there won't be any paged navigation - players only get to see a snapshot of their overall position).
I'm therefore trying to retrieve (n) rows of data surrounding a specific player, such that:
If there are (n) or fewer rows in the table, all rows will be returned.
Where there are at least (n) rows in the table, (n) rows of data will be returned.
There should be (n/2) rows above and below the specified player.
If there aren't (n/2) rows above the specified player, return all the rows above, and enough rows below to make up (n) rows total.
If there aren't (n/2) rows below the specified player, return all the rows below, and enough rows above to make up (n) rows total.
How can I construct my query such that I can always return the minimum number of rows? E.g. for my above dataset and n=5, Erin would see
Rank | Name | Score
=================
1 | Erin | 33
2 | Ann | 31
3 | Fred | 29
4 | Dan | 25
5 | Carl | 24
While Dan would see
Rank | Name | Score
=================
2 | Ann | 31
3 | Fred | 29
4 | Dan | 25
5 | Carl | 24
6 | Bob | 17
And Lou would see
Rank | Name | Score
=================
4 | Dan | 25
5 | Carl | 24
6 | Bob | 17
6 | Lou | 17
8 | Joan | 11
I found a partial solution for this using a UNION on two queries (one getting n/2 rows above and one getting n/2 rows below the specified player), but it falls down if the player is at (or near) the top or bottom of the table - the resulting dataset is clipped, and I always want to retrieve a full (n) rows where possible.
I think the solution might have something to do with Window functions, making use of LAG and LEAD, but I honestly can't get my head around the syntax and most of the examples I've found don't care about not returning enough rows total. Thanks!
sql rank vs row number
Two versions of the same procedure, one outputs the result set in order, the second does not.
rextester link to try it out: http://rextester.com/JLQU48329
create table dbo.PlayerScores (Id int, Name nvarchar(64), Score int)
insert into dbo.PlayerScores (Id, Name, Score) values
(1,'Bob',17) ,(2,'Carl',24) ,(3,'Ann',31) ,(4,'Joan',11)
,(5,'Lou',17) ,(6,'Dan',25) ,(7,'Erin',33) ,(8,'Fred',29);
go
/* ordered resultset */
create procedure dbo.PlayerScores_getMiddle_byId (#PlayerId int, #Results int = 5) as
begin;
with cte as (
select
Score_Order = row_number() over (order by Score desc)
, Score_Rank = rank() over (order by Score desc)
, Id
, Name
, Score
from dbo.PlayerScores
)
select c.Score_Rank, c.Name, c.Score
from (
select top (#Results) i.*
from cte i
cross apply (select Score_Order from cte where Id = #PlayerId) as x
order by abs(i.Score_Order-x.Score_Order)
) as c
order by Score_Rank;
end
go
exec dbo.PlayerScores_getMiddle_byId 7,5; -- Erin
exec dbo.PlayerScores_getMiddle_byId 6,5; --Dan
exec dbo.PlayerScores_getMiddle_byId 5,5; --Lou
go
/* unordered result set */
/*
create procedure dbo.PlayerScores_getMiddle_byId (#PlayerId int,#Results int = 5) as
begin;
with cte as (
select
Score_Order = row_number() over (order by Score desc)
, Score_Rank = rank() over (order by Score desc)
, Id
, Name
, Score
from dbo.PlayerScores
)
select top (#Results) c.Score_Rank, c.Name, c.Score
from cte as c
cross apply (select
Score_Order
from cte
where Id = #PlayerId) as x
order by abs(c.Score_Order-x.Score_Order)
end
--go
exec dbo.PlayerScores_getMiddle_byId 7,5; -- Erin
exec dbo.PlayerScores_getMiddle_byId 6,5; --Dan
exec dbo.PlayerScores_getMiddle_byId 5,5; --Lou
--*/
This will do what you want.
WITH cte AS (
SELECT RANK() OVER (ORDER BY [Score] DESC) AS [Score_Rank],
ROW_NUMBER() OVER (ORDER BY [Score] DESC) AS [RowNum],
COUNT(ID) OVER (PARTITION BY (Select NULL)) AS MaxRow,
[Name],
[Score],
[ID]
FROM #playScores
)
SELECT Score_Rank, Name, Score
FROM
cte
CROSS APPLY (SELECT RowNum AS AnchorRN FROM cte WHERE ID = #playerID) tmp
WHERE
(
RowNum <=
CASE WHEN tmp.AnchorRN < ((#n)/2) THEN #n
ELSE tmp.AnchorRN + ((#n)/2) END
)
AND
(
RowNum >=
CASE WHEN tmp.AnchorRN > (MaxRow - (#n)/2) THEN (MaxRow -#n + 1)
ELSE tmp.AnchorRN - ((#n)/2) END
);
SELECT *
, ROW_NUMBER() OVER (ORDER BY Score) AS RowNum
FROM
#playScores
ORDER BY
RowNum;
This is the whole answer and test code.
DECLARE #playScores TABLE (
ID INT
, Name NVARCHAR(50)
, Score INT
);
INSERT INTO #playScores (ID, Name, Score)
VALUES
(1 ,' Bob ', 17),
(2 ,' Carl ', 24),
(3 ,' Ann ', 31),
(4 ,' Joan ', 11),
(5 ,' Lou ', 17),
(6 ,' Dan ', 25),
(7 ,' Erin ', 33),
(8 ,' Fred ', 29);
DECLARE #n INT = 5;
DECLARE #playerID INT =5;
SELECT *
FROM
#playScores
ORDER BY
Score DESC;
WITH cte AS (
SELECT RANK() OVER (ORDER BY [Score] DESC) AS [Score_Rank],
ROW_NUMBER() OVER (ORDER BY [Score] DESC) AS [RowNum],
COUNT(ID) OVER (PARTITION BY (Select NULL)) AS MaxRow,
[Name],
[Score],
[ID]
FROM #playScores
)
SELECT Score_Rank, Name, Score
FROM
cte
CROSS APPLY (SELECT RowNum AS AnchorRN FROM cte WHERE ID = #playerID) tmp
WHERE
(
RowNum <=
CASE WHEN tmp.AnchorRN < ((#n)/2) THEN #n
ELSE tmp.AnchorRN + ((#n)/2) END
)
AND
(
RowNum >=
CASE WHEN tmp.AnchorRN > (MaxRow - (#n)/2) THEN (MaxRow -#n + 1)
ELSE tmp.AnchorRN - ((#n)/2) END
);
SELECT *
, ROW_NUMBER() OVER (ORDER BY Score) AS RowNum
FROM
#playScores
ORDER BY
RowNum;
SELECT *
, ROW_NUMBER() OVER (ORDER BY Score) AS RowNum
FROM
#playScores
ORDER BY
RowNum;
or use standard SQL:
with pRank(id, name, rank)
as (Select p.Id, p.Name nam,
(Select count(*) from players
where score <= p.score) rnk
from players p)
Select p.id, p.nam, p.score,
n.id, n.nam, n.score
from pRank p join pRank n
on n.Rnk between
case when p.Rnk < #n/2 then 0
else p.Rnk - #n / 2 end
and case when p.Rnk < #n/2 then #n
else p.Rnk + #n / 2 end
order by p.rnk, p.Id, n.rnk
Test:
declare #t table
(id integer primary key not null,
nam varchar(30) not null, score int not null)
insert #t(id, nam, score)
values
(1, 'Bob ',17),
(2, 'Carl',24),
(3, 'Ann ',31),
(4, 'Joan',11),
(5, 'Lou ',17),
(6, 'Dan ',25),
(7, 'Erin',33),
(8, 'Fred',29)
declare #n int = 4;
with pRank(id, nam, rnk)
as (Select p.Id, p.Nam,
(Select count(*) from #t
where score <= p.score) rank
from #t p)
Select p.id, p.Nam, p.rnk,
n.id, n.nam, n.rnk
from pRank p join pRank n
on n.rnk between
case when p.rnk < #n/2 then 0
else p.rnk - #n / 2 end
and case when p.rnk < #n/2 then #n
else p.rnk + #n / 2 end
order by p.rnk, p.id, n.rnk .
For each group, grouped using field GRP, I would like to retrieve the most frequently occurring value in column A and the most frequently occurring value in column B, and potentially do this for many other columns.
Sample Data:
GRP | A | B
-----------
Cat | 1 | 1
Cat | 2 | 1
Cat | 3 | 2
Cat | 3 | 3
Dog | 5 | 6
Dog | 5 | 7
Dog | 6 | 7
Expected Output:
GRP | A | B
-----------
Cat | 3 | 1
Dog | 5 | 7
This query achieves that result:
SELECT
freq1.GRP,
freq1.A,
freq2.B
FROM (
SELECT
GRP,
A,
ROW_NUMBER() OVER(PARTITION BY GRP ORDER BY COUNT(*) DESC) AS F_RANK
FROM MyTable
GROUP BY GRP, A
) AS freq1
INNER JOIN (
SELECT
GRP,
B,
ROW_NUMBER() OVER(PARTITION BY GRP ORDER BY COUNT(*) DESC) AS F_RANK
FROM MyTable
GROUP BY GRP, B
) AS freq2 ON freq2.GRP = freq1.GRP
WHERE freq1.F_RANK = 1 AND freq2.F_RANK = 1
It just doesn't look very efficient, and even less so if I were to add a column C, D, etc...
Is there a better way?
I wouldn't say this approach is "better" because it will generate the exact same execution plan. However, I find this type of approach a lot more maintainable as the number of columns might grow. For me this is a lot easier to read.
with GroupA as
(
select Grp
, A
, ROW_NUMBER() over(partition by grp order by count(*) desc) as RowNum
from MyTable
group by Grp, A
)
, GroupB as
(
select Grp
, B
, ROW_NUMBER() over(partition by grp order by count(*) desc) as RowNum
from MyTable
group by Grp, B
)
select a.Grp
, a.A
, b.B
from GroupA a
inner join GroupB b on a.Grp = b.Grp and b.RowNum = 1
where a.RowNum = 1;
An alternative using results ranked in a temp table:
SELECT GRP, A, B,
ROW_NUMBER() OVER (PARTITION BY A ORDER BY GRP, A) ARank,
ROW_NUMBER() OVER (PARTITION BY B ORDER BY GRP, B) BRank
INTO #TMP
FROM MyTable
SELECT t1.GRP,
(SELECT TOP 1 A FROM #TMP WHERE GRP = t1.Grp ORDER BY ARank DESC) A,
(SELECT TOP 1 B FROM #TMP WHERE GRP = t1.Grp ORDER BY BRank DESC) B
FROM MyTable t1
GROUP BY T1.GRP
DROP TABLE #TMP
Full Solution on SQL Fiddle
Schema Setup:
CREATE TABLE MyTable
([GRP] varchar(3), [A] int, [B] int)
;
INSERT INTO MyTable
([GRP], [A], [B])
VALUES
('Cat', 1, 1),
('Cat', 2, 1),
('Cat', 3, 2),
('Cat', 3, 3),
('Dog', 5, 6),
('Dog', 5, 7),
('Dog', 6, 7)
;
Query 1:
SELECT GRP, A, B,
ROW_NUMBER() OVER (PARTITION BY A ORDER BY GRP, A) ARank,
ROW_NUMBER() OVER (PARTITION BY B ORDER BY GRP, B) BRank
INTO #TMP
FROM MyTable
SELECT t1.GRP,
(SELECT TOP 1 A FROM #TMP WHERE GRP = t1.Grp ORDER BY ARank DESC) A,
(SELECT TOP 1 B FROM #TMP WHERE GRP = t1.Grp ORDER BY BRank DESC) B
FROM MyTable t1
GROUP BY T1.GRP
DROP TABLE #TMP
Results:
| GRP | A | B |
|-----|---|---|
| Cat | 3 | 1 |
| Dog | 5 | 7 |
I'll start out this answer by saying this is NOT going to be more efficient to run - it should just be easier to add/subtract columns. To do this you just add them into the code in two places.
You can use dynamic SQL to build your result set like this:
CREATE TABLE ##fields (id INT IDENTITY(1,1),fieldname VARCHAR(255))
INSERT INTO ##fields
( fieldname )
VALUES ('A'),('B') --Add field names here
DECLARE #maxid INT
SELECT #maxid = MAX(id) FROM ##fields
CREATE TABLE ##Output (GRP VARCHAR(3), A INT, B INT) --Add field names here
INSERT INTO ##Output
( GRP )
SELECT DISTINCT GRP FROM MyTable
DECLARE #SQL NVARCHAR(MAX)
DECLARE #i INT = 1
WHILE #i <=#maxid
BEGIN
SELECT #SQL = 'with cte as (SELECT GRP , ' + fieldname + ' ,
ROW_NUMBER() OVER ( PARTITION BY GRP ORDER BY COUNT(*) DESC ) AS F_RANK
FROM MyTable
GROUP BY GRP , ' + fieldname + ')
UPDATE O
SET O.' + fieldname + ' = cte.' + fieldname + '
FROM ##Output O
INNER JOIN cte ON O.GRP = cte.GRP AND cte.F_Rank = 1' FROM ##fields WHERE id = #i
EXEC sp_executesql #sql
SET #i = #i + 1
END
SELECT *
FROM ##Output
DROP TABLE ##fields
DROP TABLE ##Output
Using your simple example above, I received the following performance stats:
Dynamic SQL
CPU = 31
Reads = 504
Duration = 39
Your SQL
CPU = 0
Reads = 6
Duration = 1
Clearly, this way is not a more efficient way of doing this. I did want to throw it out there anyway as an alternative to your current method.
First we create the test data:
DECLARE #MyTable TABLE
(
GRP varchar(10),
A int,
B int
)
INSERT INTO #MyTable
( GRP, A, B)
VALUES
('Cat', 1, 1),
('Cat', 2, 1),
('Cat', 3, 2),
('Cat', 3, 3),
('Dog', 5, 6),
('Dog', 5, 7),
('Dog', 6, 7);
Now we use first_value from a subselect (or a cte if you wanted) and grab the top cat and dog columns
SELECT DISTINCT
GRP,
FIRST_VALUE(A) OVER(PARTITION BY GRP ORDER BY d.A_CNT DESC) AS A_RANK,
FIRST_VALUE(B) OVER(PARTITION BY GRP ORDER BY d.B_CNT DESC) AS B_RANK
FROM
(
SELECT
GRP,
A,
ROW_NUMBER() OVER (PARTITION BY A ORDER BY GRP, A) AS A_CNT,
B,
ROW_NUMBER() OVER (PARTITION BY B ORDER BY GRP, B) AS B_CNT
FROM #MyTable
) d
Output:
GRP A_RANK B_RANK
Cat 3 1
Dog 5 7