Show all RowNumber records for duplicates? - sql-server

I have a table of string resources :
;WITH cte AS
(
SELECT 1 AS id , 'john' AS name, 10 AS age
UNION
SELECT 2 AS id , 'john' AS name, 10 AS age
UNION
SELECT 3 AS id , 'john' AS name, 12 AS age
UNION
SELECT 4 AS id , 'paul' AS name, 6 AS age
UNION
SELECT 5 AS id , 'paul ' AS name, 6 AS age
UNION
SELECT 6 AS id , 'paul different' AS name, 7 AS age
UNION
SELECT 7 AS id , 'ringo' AS name, 2 AS age
)
So the name "john" has age of 10.
Later on , someone else (not me) also added "john" with age 10.
So I want to clean all duplicates.
But that's not the problem. Before I delete I want to see all duplicates.
So I did this :
SELECT *
FROM (
SELECT ID,
name,
age,
ROW_NUMBER() OVER(PARTITION BY name, age ORDER BY id) AS rn
FROM cte
) a WHERE a.rn>1
ORDER BY
name,
age,
a.rn
Result :
Which basically shows me duplicates. But I want to see also where rn=1 only if there's more version for the current value.
Question
In other words : How can I enhance my query so :
Show all versions for a record ( all row numbers , rn) only if there are versions for this record
Desired result :
ID name age rn
1 john 10 1
2 john 10 2
4 paul 6 1
5 paul 6 2
Sql online - demo
NB I know i can do it with rescanning the table for the same name and age . bUt I thought if there's more elegant way of doing it.

Use exists operator to find the name which are duplicated. Try this.
;WITH cte AS
(
SELECT 1 AS id , 'john' AS name, 10 AS age
UNION
SELECT 2 AS id , 'john' AS name, 10 AS age
UNION
SELECT 3 AS id , 'john' AS name, 12 AS age
UNION
SELECT 4 AS id , 'paul' AS name, 6 AS age
UNION
SELECT 5 AS id , 'paul ' AS name, 6 AS age
UNION
SELECT 6 AS id , 'paul different' AS name, 7 AS age
UNION
SELECT 7 AS id , 'ringo' AS name, 2 AS age
)
, cte1
AS (SELECT ID,
name,
age,
Row_number() OVER(PARTITION BY name, age ORDER BY id) AS rn
FROM cte)
SELECT *
FROM cte1 a
WHERE EXISTS (SELECT 1
FROM cte1 b
WHERE a.name = b.name and a.age=b.age
AND b.rn > 1)
ORDER BY name, age, a.rn
or use Inner Join
SELECT a.id,a.name,a.age
FROM cte1 a
JOIN cte1 b
ON a.name = b.name
AND a.age = b.age
AND b.rn > 1
ORDER BY a.name, a.age, a.rn
Or To do it in single table scan use Dense_Rank plus window function
;WITH cte AS
(
SELECT 1 AS id , 'john' AS name, 10 AS age
UNION
SELECT 2 AS id , 'john' AS name, 10 AS age
UNION
SELECT 3 AS id , 'john' AS name, 12 AS age
UNION
SELECT 4 AS id , 'paul' AS name, 6 AS age
UNION
SELECT 5 AS id , 'paul ' AS name, 6 AS age
UNION
SELECT 6 AS id , 'paul different' AS name, 7 AS age
UNION
SELECT 7 AS id , 'ringo' AS name, 2 AS age
)
, cte1
AS (SELECT ID,
name,
age,
count(age) over (partition by name,age) cnt,
dense_rank() OVER(PARTITION BY name ORDER BY age) AS rn
FROM cte)
SELECT *
FROM cte1
WHERE rn = 1
AND cnt > 1

Related

second highest salary in each department

I am trying to find the second highest salary in each department.
Schema:
CREATE TABLE employees
(
ID int NOT NULL,
NAME char(50) NOT NULL,
departmentid int,
salary int
);
Sample records:
/*departmentid =1 */
INSERT INTO employees VALUES (1, 'Max', 1, 90000);
INSERT INTO employees VALUES (2, 'Joe', 1, 70000);
INSERT INTO employees VALUES (3, 'Randy', 1, 70000);
/*departmentid =2 */
INSERT INTO employees VALUES (4, 'Henry', 2, 80000);
INSERT INTO employees VALUES (5, 'SAM', 2, 60000);
/*departmentid =3 */
INSERT INTO employees VALUES (6, 'Janet', 3, 69000);
My query:
SELECT departmentid,
NAME,
salary
FROM
(
SELECT
departmentid,
NAME,
salary,
Dense_rank()OVER (partition BY departmentid
ORDER BY salary DESC) AS Rank,
Count(1)OVER(partition BY departmentid) AS cnt
FROM
employees
)t
WHERE
t.rank = 2
OR ( t.rank = 1
AND cnt = 1 )
The output I am getting is as below;
departmentid NAME salary
1 Joe 70000
1 Randy 70000
2 SAM 60000
3 Janet 69000
My expected output is
departmentid NAME salary
1 Joe 70000
1 Randy 70000
2 SAM 60000
3 NULL NULL
As there is only one record for departmentid=3, it should return null.
What is wrong with this query? Any other ways to achieve this result?
I've also included a SQL fiddle.
ROW_NUMBER() and select = 2
;WITH salary AS
(
[RN] = SELECT ROW_NUMBER() OVER (PARTITION BY departmentid ORDER BY salary),*
FROM <table>
)
SELECT
*
FROM salary
WHERE [RN] = 2
I've used two CTEs.
The first returns a list of every department. You'll need this to ensure departments with less than 2 salaries are included in the final result.
The second ranks each employee within their department.
Finally, I've used a left outer join to maintain the complete list of departments.
WITH Department AS
(
-- Returns a list of the departments.
SELECT
departmentid
FROM
employees
GROUP BY
departmentid
),
EmployeeRanked AS
(
SELECT
DENSE_RANK() OVER (PARTITION BY departmentid ORDER BY salary DESC) AS [Rank],
departmentid,
NAME,
salary
FROM
employees
)
SELECT
er.Rank,
d.departmentid,
er.NAME,
er.salary
FROM
Department AS d
LEFT OUTER JOIN EmployeeRanked AS er ON er.departmentid = d.departmentid
AND er.[Rank] = 2
;
Returns
Rank departmentid NAME salary
2 1 Joe 70000
2 1 Randy 70000
2 2 SAM 60000
(null) 3 (null) (null)
Use a sub query as i wrote here : http://sqlfiddle.com/#!6/bb5e1/26
with ranks as(
SELECT departmentid,
salary,
row_number() over (partition by (departmentid) order by salary desc) as rank
FROM employees
)
Select *
from ranks
Where ranks.rank = 2
If the departmentid having only one row, and if you consider that also. Then
Query
;with cte as(
select [rank] = dense_rank() over(
partition by departmentid
order by departmentid, salary desc
), *
from employees
)
select ID, NAME, departmentid, salary from cte
where [rank] = 2
union all
select max(ID), max(NAME), departmentid, max(salary)
from cte
group by departmentid
having count([rank]) = 1;
There is also a simple way:
SELECT TOP 1 * FROM (Select top 2 * FROM employees order by salary desc ) e Order by salary asc
Edit: this returns only the 2nd highest overall
I think you can get correct answer by just removing below code from your code
OR ( t.rank = 1
AND cnt = 1 )
also main table should be left join from this result to get null in rest of columns

how to select rows where column value has changed

I have a table in which I have few columns like below:
Cusnbr Name LoadNumber
1 Z 10
1 Z 9
1 Z 8
1 C 7
1 C 6
1 C 5
1 B 4
1 B 3
1 A 2
1 A 1
it is just for one cusnbr there are million of cusnbr like this..
I want output like below
Cusnbr Name LoadNumber
1 C 7
1 B 4
1 A 2
For that I write below query in sql server 2008:
;With x as
(
Select * ,rn=Row_number() over (order by cusnbr,loadnumber) from table
)
select x.* from x left outer join x as y on x.rn=y.rn+1
and x.name<>y.name where y.name is not null
but I am not getting the desired output in the above code I am getting last Z also which I don't want and I am getting irregular data not in the correct form in which I want
Any help will be appreciated !!
like this I want but not able to get the desired output
I use this example
Though the question is not clear to me , Guessing from the output I have tried out Dense Rank . I guessed you want the record with highest LoadNumber with the same name .
Select * from cteTrial where LoadNumber in (
Select MAX(x.LoadNumber) as LoadNumber from (
Select cusnbr , name , LoadNumber , DENSE_RANK() over (order by Name desc )
as Dense from cteTrial) as x group by x.Dense
)
If you can use CTE it will produce better performance .
i written the code as per expected
;With cte(Cusnbr , Name , LoadNumber)
AS
(
SELECT 1,'Z', 10 Union all
SELECT 1,'Z', 9 Union all
SELECT 1,'Z', 8 Union all
SELECT 1,'C', 7 Union all
SELECT 1,'C', 6 Union all
SELECT 1,'C', 5 Union all
SELECT 1,'B', 4 Union all
SELECT 1,'B', 3 Union all
SELECT 1,'A', 2 Union all
SELECT 1,'A', 1
)
SELECT cusnbr,
NAME,
loadnumber
FROM (SELECT *,
Row_number()
OVER(
partition BY NAME
ORDER BY loadnumber DESC) AS RNk,
Row_number()
OVER(
ORDER BY (SELECT 1)) - 1 AS RNO
FROM (SELECT *
FROM cte)dt)DT2
WHERE DT2.rnk = 1
AND rno > 0
ORDER BY NAME DESC
Result
cusnbr NAME loadnumber
-------------------------
1 C 7
1 B 4
1 A 2

Removing Subqueries

I have 2 tables tab1 and tab2.
tab1:
id name monthid salary inflow
-----------------------------------------
1 mohan 1 2000 1000
1 mohan 3 3000 1000
1 mohan 4 4500 1600
1 mohan 2 2500 1200
in tab2 I want this output:
id name salary inflow
--------------------------
1 mohan 12000 1600
In tab2, salary column is the sum of salary of tab1 and inflow is the inflow of highest month.
I tried this query:
Insert into tab2(id, name, salary)
select id, name, sum(salary)
from tab1
update tab2
set inflow = (select inflow
from tab1
where monthid = max(monthid))
But I know this is not the correct method.
Can anyone help me to correct this query? And I also want to remove the subqueries.
You can use row_number as below
Insert into tab2(id, [name], [salary], inflow)
Select id, [name], Salary, inflow from (
Select id, [name], sum(salary) over(partition by id) as Salary,
inflow, RowN = Row_number() over (partition by id order by monthid desc) from tab1 ) a
Where a.RowN = 1
Without subquery you can use top(1) with ties as below
Insert into tab2(id, [name], [salary], inflow)
Select top (1) with ties id, [name], sum(salary) over(partition by id) as salary, inflow
from tab1
order by Row_number() over (partition by id order by monthid desc)
DECLARE #tab1 table(id int,name varchar(100),monthid int, salary int,inflow int)
INSERT INTO #tab1
SELECT 1,'Mohan',1,2000,1000
UNION ALL
SELECT 1,'Mohan',3,3000,1000
UNION ALL
SELECT 1,'Mohan',4,4500,1600
UNION ALL
SELECT 1,'Mohan',2,2500,1200
SELECT top 1
id, name,SUM(salary) OVER(PARTITION BY id) as salary,MAX(inflow) OVER(PARTITION BY id) as inflow
FROM #tab1
OR
SELECT DISTINCT
id, name,SUM(salary) OVER(PARTITION BY id) as salary,MAX(inflow) OVER(PARTITION BY id) as inflow
FROM #tab1

How can I find duplicate on one column

I have a SQL server database,and there are many duplicate in one(RanjePhoneNumber) column.
I am trying to select rows from a table that have duplicates in RanjePhoneNumber column and they have a same CityId.
My Table:
RanjePhoneNumber ContactId CityId
776323 280739 7
342261 186372 80
468284 75980 7
776323 101969 9
362875 170242 13
224519 164914 7
342261 203606 55
776323 280733 7
342261 203602 80
My expected results:
RanjePhoneNumber ContactId CityId
776323 280739 7
342261 186372 80
776323 280733 7
342261 203602 80
Group by those two columns:
SELECT RanjePhoneNumber, CityID
FROM dbo.TableName
GROUP BY RanjePhoneNumber, CityID
HAVING COUNT(*) > 1
If you want to select all columns you could use a ranking function:
WITH CTE AS
(
SELECT t.*, Cnt = COUNT(*) OVER (PARTITION BY RanjePhoneNumber, CityID)
FROM dbo.TableName
)
SELECT RanjePhoneNumber, ContactId, CityId
FROM CTE
WHERE Cnt > 1
If you don't want to find all rows which belong to this "duplicate-group" but only all but the first, use the ROW_NUMBER approach the other answer has shown.
;with cte
as
(select
Ranjephonenumber,
contactid,
cityid,
row_number() over (partition by Ranjephonenumber,cityid order by cityid) as rn
from table
)
select
Ranjephonenumber,contactid,city from cte where rn>1

Make Total in Pivot Query

Below is my query
SELECT UserName ,
TotCount ,
EntryDate
FROM #CandidateCount AS cc
and output of above query is
UserName TotCount EntryDate
--------------------------------
neelam 2 17/12/2013
neelam 1 18/12/2013
neelam 6 19/12/2013
snehal 7 17/12/2013
snehal 0 18/12/2013
snehal 2 19/12/2013
I have updated above query
SELECT *
FROM ( SELECT EntryDate ,
UserName ,
TotCount
FROM #CandidateCount AS tbl
) AS SourceTable PIVOT
( SUM(TotCount) FOR UserName IN ( [neelam], [snehal] ) )AS PivotTable
ORDER BY EntryDate DESC
and output of above query is
EntryDate neelam snehal
19/12/2013 6 2
18/12/2013 1 0
17/12/2013 2 7
Now i want to make total datewise like this
EntryDate neelam snehal Total
19/12/2013 6 2 8
18/12/2013 1 0 1
17/12/2013 2 7 9
How can i do this? Thanks.
I've put your data inside a CTE named Source to give you an example. With another CTE you can easily put your totals aside. You can later join these totals on both EntryDates:
WITH Source AS (
SELECT 'neelam' AS UserName, 2 AS TotCount, '17/12/2013' AS EntryDate UNION
SELECT 'neelam' AS UserName, 1 AS TotCount, '18/12/2013' AS EntryDate UNION
SELECT 'neelam' AS UserName, 6 AS TotCount, '19/12/2013' AS EntryDate UNION
SELECT 'snehal' AS UserName, 7 AS TotCount, '17/12/2013' AS EntryDate UNION
SELECT 'snehal' AS UserName, 0 AS TotCount, '18/12/2013' AS EntryDate UNION
SELECT 'snehal' AS UserName, 2 AS TotCount, '19/12/2013' AS EntryDate
)
, Totals AS (
SELECT EntryDate
, SUM(TotCount) AS Total
FROM Source
GROUP BY EntryDate
)
SELECT PivotTable.*
, Totals.Total
FROM ( SELECT EntryDate ,
UserName ,
TotCount
FROM Source AS tbl
) AS SourceTable PIVOT
( SUM(TotCount) FOR UserName IN ( [neelam], [snehal] ) )AS PivotTable
JOIN Totals ON PivotTable.EntryDate = Totals.EntryDate
ORDER BY PivotTable.EntryDate DESC
In your exact case:
WITH Totals AS (
SELECT EntryDate
, SUM(TotCount) AS Total
FROM #CandidateCount
GROUP BY EntryDate
)
SELECT PivotTable.*
, Totals.Total
FROM ( SELECT EntryDate ,
UserName ,
TotCount
FROM #CandidateCount AS tbl
) AS SourceTable PIVOT
( SUM(TotCount) FOR UserName IN ( [neelam], [snehal] ) )AS PivotTable
JOIN Totals ON PivotTable.EntryDate = Totals.EntryDate
ORDER BY PivotTable.EntryDate DESC

Resources