SQL Server Ranking issue

SQL Server Ranking issue - sql-server

I am trying to apply ranking to my data set the logic is as follows:
For each ID , Order by ID2 ASC and Order by IsMaster Desc rank the row 1 and only change it when the ID4 value changes
My dataset and desired output looks like:
Test data
CREATE TABLE Test_Table
(ID INT ,ID2 INT, IsMaster INT, ID4 VARCHAR(10))
GO
INSERT INTO Test_Table (ID ,ID2 , IsMaster , ID4 )
VALUES
(1, 101, 1 ,'AAA') -- 1 <-- Desired output for rank
,(1, 102, 0 ,'AAA') -- 1
,(1, 103, 0 ,'AAB') -- 2
,(1, 104, 0 ,'AAB') -- 2
,(1, 105, 0 ,'CCC') -- 3
,(2, 101, 1 ,'AAA') -- 1
,(2, 102, 0 ,'AAA') -- 1
,(2, 103, 0 ,'AAA') -- 1
,(2, 104, 0 ,'AAB') -- 2
,(2, 105, 0 ,'CCC') -- 3
this is what I have tried so far:
SELECT *
,DENSE_RANK() OVER (PARTITION BY ID ORDER BY ID2 ASC, IsMaster DESC ) rn
FROM Test_Table
please please please help me thank you.

This is a island/gap problem.
First you use LAG() to see if you have a different ID4 on the same partition.
Is important you also need partition by IsMaster
Then you create the islands when ID4 changes.
Finally use comulative SUM() to get the proper rank.
Sql Demo
WITH id4_change as (
SELECT *,
LAG(ID4) OVER (PARTITION BY ID, IsMaster ORDER BY ID2) as prev
FROM Test_Table
), islands as (
SELECT *,
CASE WHEN ID4 = PREV
THEN 0
ELSE 1
END as island
FROM id4_change
)
SELECT *,
SUM(island) OVER (PARTITION BY ID, IsMaster ORDER BY ID2) rank
FROM islands
ORDER BY ID, ID2, IsMaster DESC
;
OUTPUT: You can see when ID4 = PREV doesnt create a new "Island" so have same rank.
EDIT: You can simplify first two querys
WITH id4_change as (
SELECT *,
CASE WHEN ID4 = LAG(ID4) OVER (PARTITION BY ID, IsMaster ORDER BY ID2)
THEN 0
ELSE 1
END as island
FROM Test_Table
)
SELECT *,
SUM(island) OVER (PARTITION BY ID, IsMaster ORDER BY ID2) rank
FROM id4_change
ORDER BY ID, ID2, IsMaster DESC
;

Another way probably less efficient but it will work.
WITH X AS
(
SELECT *
,ROW_NUMBER() OVER (PARTITION BY ID ORDER BY ID2) RowNum
FROM dbo.Test_Table
)
, CTE_VehicleNumber
as
(
SELECT T.ID , T.ID2, t.IsMaster ,T.ID4 , t.RowNum , 1 as [Rank]
FROM X as T
WHERE T.IsMaster = 1
UNION ALL
SELECT T.ID, T.ID2, t.IsMaster ,T.ID4 , t.RowNum , CASE WHEN t.ID4 <> c.ID4 THEN 1+ C.[Rank]
ELSE 0+ C.[Rank]
END as [Rank]
FROM CTE_VehicleNumber as C
inner join X as T ON T.RowNum = C.RowNum + 1
AND t.ID = c.ID
)
SELECT ID , ID2, IsMaster ,ID4 , [Rank]
FROM CTE_VehicleNumber
ORDER BY ID , ID2, IsMaster ,ID4 , [Rank]
OPTION (MAXRECURSION 0);

Are you sure that your orders of ID2 and IsMaster affect the desired result, considering the rest of the data in ID and ID4?
I just tried to use the following code:
; WITH CTE AS (
SELECT DISTINCT ID, ID4, DENSE_RANK() OVER (ORDER BY ID4) Rnk
FROM #Test_Table
)
SELECT t.*, c.Rnk
FROM #Test_Table t
INNER JOIN CTE c ON t.ID = c.ID AND t.ID4 = c.ID4;
... and even with changing the order of ID2 and IsMaster I can't get it to "misbehave" - IF there's only one IsMaster = 1 per a group of ID4's and no duplicates in ID2.

Related

TSQL: Group by one column, count all rows and keep value on second column based on row_number

I have a query that returns an Id, a Name and the Row_Number() based on some rules.
The query looks like that
SELECT
tm.id AS Id,
pn.Name AS Name,
ROW_NUMBER() OVER(PARTITION BY tm.id ORDER BY tm.CreatedDate ASC) AS Row
FROM
#tempTable AS tm
LEFT JOIN
names pn WITH (NOLOCK) ON tm.nameId = pn.NameId
WHERE ....
The output of the above query looks like the table below with the dummy data
CREATE TABLE people
(
id int,
name varchar(55),
row int
);
INSERT INTO people
VALUES (1, 'John', 1), (1, 'John', 2), (2, 'Mary', 1),
(3, 'Jeff', 1), (4, 'Bill', 1), (4, 'Bill', 2),
(4, 'Bill', 3), (4, 'Billy', 4), (5, 'Bobby', 1),
(5, 'Bob', 2), (5, 'Bob' , 3), (5, 'Bob' , 4);
What I try to do, is group by the id field, count all rows, but for the name, use the one with row = 1
My attempt is like this, but, obviously, I get different rows since I include the x.name in the group by.
SELECT
x.id,
x.name,
COUNT(*) AS Value
FROM
(SELECT
tm.id AS Id,
pn.Name AS Name,
ROW_NUMBER() OVER(PARTITION BY tm.id ORDER BY tm.CreatedDate ASC) AS Row
FROM
#tempTable AS tm
LEFT JOIN
names pn WITH (NOLOCK) ON tm.nameId = pn.NameId
WHERE ....
) x
GROUP BY
x.id, x.name
ORDER BY
COUNT(*) DESC
The desired results from the dummy data are:
id name count
------------------
1 John 2
2 Mary 1
3 Jeff 1
4 Bill 4
5 Bobby 4

You can use FIRST_VALUE() window function to get the name of the row with row number = 1 and with the keyword DISTINCT there is no need to GROUP BY:
SELECT DISTINCT tm.id AS Id
, FIRST_VALUE(pn.Name) OVER (PARTITION BY tm.id ORDER BY tm.CreatedDate ASC) AS Name
, COUNT(*) OVER (PARTITION BY tm.id) AS counter
FROM #tempTable AS tm
LEFT JOIN names pn WITH (NOLOCK) ON tm.nameId = pn.NameId
WHERE ....
If you can't use FIRST_VALUE() then you can do it with conditional aggregation:
SELECT id,
MAX(CASE WHEN Row = 1 THEN Name END) AS NAME,
COUNT(*) AS Counter
FROM (
SELECT tm.id AS Id
, pn.Name AS Name
, ROW_NUMBER() OVER(PARTITION BY tm.id ORDER BY tm.CreatedDate ASC) AS Row
FROM #tempTable AS tm
LEFT JOIN names pn WITH (NOLOCK) ON tm.nameId = pn.NameId
WHERE ....
) t
GROUP BY id

This could be one solution to your problem: group on both id and the target name (case when p.row = 1 then p.name end) for the counting. Adding a with rollup to the grouping will "roll up" the count aggregations. Another aggregation on just id can then be use to merge the row values from the intermediate data set (visible in fiddle).
with cte as
(
select p.id,
case when p.row = 1 then p.name end as name,
count(1) as cnt
from people p
group by p.id, case when p.row = 1 then p.name end with rollup
having grouping(p.id) = 0
)
select cte.id,
max(cte.name) as name,
max(cte.cnt) as [count]
from cte
group by cte.id;
Fiddle
This would be another solution: do a regular count query with grouping on id and fetch the required name afterwards with a cross apply.
with cte as
(
select p.id,
count(1) as cnt
from people p
group by p.id
)
select cte.id,
n.name,
cte.cnt as [count]
from cte
cross apply ( select p.name
from people p
where p.id = cte.id
and p.row = 1 ) n;
Fiddle

Update null values by value in same column

I have a table in MS SQL Server, where are some null values in column "value"
Group ID Value
A 1 10
A 2
A 3
A 4 40
B 1
B 2 20
B 3 30
B 4
I want to update null values by not null in the same group with with the first higher ID, or if there is not any higher in same group, first lower. So the result should look like this.
Group ID Value
A 1 10
A 2 40
A 3 40
A 4 40
B 1 20
B 2 20
B 3 30
B 4 30
Thanks!

You can use windowed version of SUM function in order to determine islands of NULL valued records along with the record having the higher ID in the same group:
SELECT [Group], ID, Value,
SUM(CASE WHEN Value IS NULL THEN 0 ELSE 1 END) OVER
(PARTITION BY [Group] ORDER BY ID DESC) AS grp
FROM mytable
Output:
Group ID Value grp
-----------------------
A 4 40 1
A 3 30 2
A 2 NULL 2
A 1 NULL 2
B 4 40 1
B 3 NULL 1
B 2 20 2
B 1 10 3
You can now wrap the above query in a CTE and use another CTE to do the update:
;WITH CTE AS (
SELECT [Group], ID, Value,
SUM(CASE WHEN Value IS NULL THEN 0 ELSE 1 END) OVER
(PARTITION BY [Group] ORDER BY ID DESC) AS grp
FROM mytable
), ToUpdate AS (
SELECT [Group], ID, Value,
MAX(Value) OVER (PARTITION BY [Group], grp) AS group_value
FROM CTE
)
UPDATE ToUpdate
SET Value = group_value
WHERE Value IS NULL
Demo here
Edit:
The above query doesn't handle the edge case where the very last record within a Group slice is NULL. To handle this case as well you can use the following query:
;WITH CTE AS (
SELECT [Group], ID, Value,
SUM(CASE WHEN Value IS NULL THEN 0 ELSE 1 END) OVER
(PARTITION BY [Group] ORDER BY ID DESC) AS grp,
SUM(CASE WHEN Value IS NULL THEN 0 ELSE 1 END) OVER
(PARTITION BY [Group] ORDER BY ID) AS grp2
FROM mytable
), ToUpdate AS (
SELECT [Group], ID, Value,
MAX(Value) OVER (PARTITION BY [Group], grp) AS group_value,
MAX(Value) OVER (PARTITION BY [Group], grp2) AS group_value2
FROM CTE
)
UPDATE ToUpdate
SET Value = COALESCE(group_value, group_value2)
WHERE Value IS NULL
Demo here

Please try this-
DATA GENERATION
DECLARE #T TABLE
(
GroupCd CHAR(1),
Id INT,
Value INT
)
INSERT INTO #T
VALUES('A',1,10),
('A',2,NULL),
('A',3,NULL),
('A',4,40),
('B',1,NULL),
('B',2,20),
('B',3,30),
('B',4,NULL)
SOLUTION
UPDATE a
SET a.Value = b.Value
FROM #T a
INNER JOIN
(
SELECT a.GroupCd,a.Id,Coalesce(a.Value,z.Value,z1.Value) Value
FROM #T a
OUTER APPLY
(
SELECT TOP 1 Value
FROM #T b
WHERE a.GroupCd = b.GroupCd
AND b.Value IS NOT NULL AND a.Id < b.Id
ORDER BY Id
)z
OUTER APPLY
(
SELECT TOP 1 Value
FROM #T b
WHERE a.GroupCd = b.GroupCd
AND b.Value IS NOT NULL AND a.Id > b.Id
ORDER BY Id DESC
)z1
)b ON a.GroupCd = b.GroupCd AND a.Id = b.Id
SELECT * FROM #T
OUTPUT
GroupCd Id Value
------- ----------- -----------
A 1 10
A 2 40
A 3 40
A 4 40
B 1 20
B 2 20
B 3 30
B 4 30
(8 rows affected)

You Can try This simple Method
DECLARE #T TABLE
(
GroupCd CHAR(1),
Id INT,
Value INT
)
INSERT INTO #T
VALUES('A',1,NULL),
('A',2,NULL),
('A',3,30),
('A',4,40),
('B',1,10),
('B',2,20),
('B',3,NULL),
('B',4,40)
SELECT
*,
NewVal = COALESCE(Value,(SELECT TOP 1 Value FROM #T WHERE GroupCd = T.GroupCd AND Id > T.Id AND Value IS NOT NULL ORDER BY Id ASC))
FROM #T T
My Result

update MY_TABLE set [value] = [newValue] from (
select [Group] [newGroup],
[Value] [newValue]
from (
select [Group], [Value],
row_number() over (partition by [group] order by [Id] desc) [rn]
from MY_TABLE
where [Value] is not null
) [a] where [rn] = 1
) where [Group] = [newGroup] and [Value] is null

sql query that gets the difference between 2 recent rows for every row item that occurs more than once in a table

Sql query that gets the difference between 2 recent rows for every value that occurs more than once in a table.
for example
book value date
A 4 2017-07-17 09:16:44.480
A 2 2017-08-15 10:05:58.273
B 3 2017-04-15 10:05:58.273
C 2 2017-08-15 10:05:58.273
B 3 2017-04-13 10:05:58.273
B 3 2017-04-12 10:05:58.273
should return
A 2
B 0

Here is a solution:
SELECT book, MAX(value) - MIN(value) AS difference FROM (
SELECT book, value, ROW_NUMBER() OVER (PARTITION BY book ORDER BY date DESC) AS rownum FROM t
) AS a WHERE rownum <= 2 GROUP BY book HAVING MAX(rownum) >= 2
And here it is in SQLFiddle

SELECT id_pk FROM [table] GROUP BY [fields you whant to compare by] HAVING COUNT(*) > 1)
this select returns you the list of pk from element that are repited
so, in other select you migth get another Select like
Select * from [table] where id_pk in(
SELECT id_pk FROM [table] GROUP BY [fields you whant to compare by] HAVING COUNT(*) > 1)) limit 2
this is functional, still not good as i'm not analising complexity.

Add a rownumber before calculating:
create table #test ([book] char(1), [value] int, [date] datetime)
insert into #test values ('A', 4, '2017-07-17 09:16:44.480')
insert into #test values ('A', 2, '2017-08-15 10:05:58.273')
insert into #test values ('B', 3, '2017-04-15 10:05:58.273')
insert into #test values ('C', 2, '2017-08-15 10:05:58.273')
insert into #test values ('B', 3, '2017-04-13 10:05:58.273')
insert into #test values ('B', 3, '2017-04-12 10:05:58.273')
;with cte as(
Select ROW_NUMBER () OVER (order by [book], [date] ) as rownumber, *
from #test)
select distinct [1].book, abs(first_value([1].[Value]) over (partition by [1].book order by [1].rownumber desc) - [2].val2) as [Difference]
from cte [1]
inner join
(select rownumber, book, first_value([Value]) over (partition by book order by rownumber desc) as val2
from cte) [2] on [1].book = [2].book and [1].rownumber < [2].rownumber

I would use analytic functions:
;with CTE as (
SELECT book
,value
,LAG(value) OVER (PARTITION BY book ORDER BY date) last_value
,ROW_NUMBER() OVER (PARTITION BY book ORDER BY date DESC) rn
FROM MyTable
)
SELECT book
,value - last_value as value_change
FROM CTE
WHERE rn = 1
AND last_value IS NOT NULL
LAG() was added in SQL Server 2012, but even if you're on a higher version, your database must have the compatibility version set to 110 or higher for them to be available. Here's an alternative that should work on SQL Server 2005 or higher, or a database compatibility 90 or higher.
;with CTE as (
SELECT book
,value
,ROW_NUMBER() OVER (PARTITION BY book ORDER BY date DESC) rn
FROM MyTable
)
SELECT c1.book
c1.value - c2.value as value_change
FROM CTE c1
INNER JOIN CTE c2
ON c1.book = c2.book
WHERE c1.rn = 1
AND c2.rn = 2

Sort by most recent but keep together by another ID column

I am trying to get some sorting and keep together (not really grouping) working.
In my sample data I would like to keep the DealerIDs together, sorted by IsPrimaryDealer DESC, but show the group (ok maybe it is grouping) of dealers by the ones with the most recent entry.
Result set 2 is the closest, but Grant and his brother should be displayed as the first two rows, in that order. (Grant should be row 1, Grants Brother row 2 because Grants Brother was the most recently added)
DECLARE #temp TABLE (
DealerPK int not null IDENTITY(1,1), DealerID int,
IsPrimaryDealer bit, DealerName varchar(50), DateAdded datetime
)
INSERT INTO #temp VALUES
(1, 1, 'Bob', GETDATE() - 7),
(2, 1, 'Robert', GETDATE() - 7),
(3, 1, 'Grant', GETDATE() - 7),
(3, 0, 'Grants Brother', GETDATE() - 1),
(2, 0, 'Roberts Nephew', GETDATE() - 2),
(1, 0, 'Bobs Cousin', GETDATE() - 3)
-- Data As Entered
SELECT * FROM #temp
-- Data Attempt at Row Numbering
SELECT *, intPosition =
ROW_NUMBER() OVER (PARTITION BY IsPrimaryDealer ORDER BY DealerID, IsPrimaryDealer DESC)
FROM #temp
ORDER BY DateAdded DESC
-- Data Attempt By DateAdded
SELECT *, intPosition =
ROW_NUMBER() OVER (PARTITION BY DealerID ORDER BY DateAdded DESC)
FROM #temp
ORDER BY intPosition, DateAdded
Expected Result
PK DID IsPr Name DateAdded
3 3 1 Grant 2015-10-08 17:14:26.497
4 3 0 Grants Brother 2015-10-14 17:14:26.497
2 2 1 Robert 2015-10-08 17:14:26.497
5 2 0 Roberts Nephew 2015-10-13 17:14:26.497
1 1 1 Bob 2015-10-08 17:14:26.497
6 1 0 Bobs Cousin 2015-10-12 17:14:26.497

As requested by OP:
;WITH Cte AS(
SELECT *,
mx = MAX(DateAdded) OVER(PARTITION BY DealerID) FROM #temp
)
SELECT *
FROM Cte
ORDER BY mx DESC, DealerID, IsPrimaryDealer DESC

Hope i understood your question,
This query results expected output :
SELECT Row_number()
OVER (
PARTITION BY DealerID
ORDER BY DealerPK)RN,
DealerPK,
DealerID,
IsPrimaryDealer,
DealerName,
DateAdded
FROM #temp
ORDER BY DealerID DESC

How to generate sequence numbers for hierarchical data in sql server

I have create a function in sql to get a serial number in hierarchy.I have table called Goals.the structure of table is below
GoalId ParentId Goalstatement
---------- ---------- ----------
1 0 abc
2 0 def
3 1 acc
4 2 efc
5 3 dec
6 0 efc
7 3 jhg
I want to write a function to get the result as
Serial no GoalId ParentId GoalStatement
---------- ---------- ---------- --------------------
1 1 0
2 2 0
3 6 0
1.1 3 1
1.1.1 5 3
1.1.2 7 3
2.1 4 2
----------
I have tried with common table expression
WITH Hierarchy(GoalID, ParentId, Parents)
AS
(
SELECT GoalID, GoalParentID, CAST('' AS VARCHAR(MAX))
FROM Goals AS FirtGeneration
WHERE GoalParentID =0
UNION ALL
SELECT NextGeneration.GoalID, NextGeneration.GoalParentID,
CAST(CASE WHEN Parent.Parents = ''
THEN(CAST(NextGeneration.GoalParentID AS VARCHAR(MAX)))
ELSE(Parent.Parents + '.' + CAST(NextGeneration.GoalParentID AS VARCHAR(MAX)))
END AS VARCHAR(MAX))
FROM Goals AS NextGeneration
INNER JOIN Hierarchy AS Parent ON NextGeneration.GoalParentID = Parent.GoalID
)
SELECT *
FROM Hierarchy
OPTION(MAXRECURSION 32767)
Can any one help me to write a function to create serial number in a hierarchical way

Your recursive CTE is quite close but you need to add in a ROW_NUMBER() in order to generate the sequential numbers at each level of the hierarchy. Try this;
DECLARE #Goals TABLE (GoalId INT, GoalParentID INT, Goalstatement VARCHAR(100))
INSERT #Goals VALUES
(1, 0, 'abc'),
(2, 0, 'def'),
(3, 1, 'acc'),
(4, 2, 'efc'),
(5, 3, 'dec'),
(6, 0, 'efc'),
(7, 3, 'jhg')
;WITH NumberedGoals(GoalId, GoalParentID, Goalstatement, GoalSequence) AS (
SELECT
GoalId, GoalParentID, Goalstatement, ROW_NUMBER() OVER (PARTITION BY GoalParentID ORDER BY GoalId) AS GoalSequence
FROM
#Goals
), Hierarchy(GoalID, GoalParentID, GoalSequence, Parents)
AS
(
SELECT GoalID, GoalParentID, GoalSequence, CAST(GoalSequence AS VARCHAR(MAX))
FROM NumberedGoals AS FirtGeneration
WHERE GoalParentID = 0
UNION ALL
SELECT NextGeneration.GoalID, NextGeneration.GoalParentID, NextGeneration.GoalSequence,
CAST(CASE WHEN Parent.Parents = ''
THEN(CAST(NextGeneration.GoalSequence AS VARCHAR(MAX)))
ELSE(Parent.Parents + '.' + CAST(NextGeneration.GoalSequence AS VARCHAR(MAX)))
END AS VARCHAR(MAX))
FROM NumberedGoals AS NextGeneration
INNER JOIN Hierarchy AS Parent ON NextGeneration.GoalParentID = Parent.GoalID
)
SELECT h.Parents as [Serial no], h.GoalId, h.GoalParentId, g.GoalStatement
FROM Hierarchy h
JOIN #Goals g ON g.GoalID = h.GoalID
OPTION (MAXRECURSION 32767)

;with Hierarchy
as
(
select GoalID,
ParentId,
Row_Number() over(partition by ParentId order by GoalID) as number,
cast(Row_Number() over(partition by ParentId order by GoalID) as nvarchar(200)) newnumber
from Goals where ParentId = 0
Union All
Select p.GoalId,
p.ParentId,
Row_Number() over(partition by p.ParentId order by p.GoalID) as number,
cast(cte.newnumber + '.' + cast(Row_Number() over(partition by p.ParentId order by p.GoalID) as nvarchar(200)) as nvarchar(200)) newnumber
From Goals p
Join Hierarchy cte On cte.GoalId = p.ParentId
)
select * from Hierarchy