How can I find duplicate on one column - sql-server

I have a SQL server database,and there are many duplicate in one(RanjePhoneNumber) column.
I am trying to select rows from a table that have duplicates in RanjePhoneNumber column and they have a same CityId.
My Table:
RanjePhoneNumber ContactId CityId
776323 280739 7
342261 186372 80
468284 75980 7
776323 101969 9
362875 170242 13
224519 164914 7
342261 203606 55
776323 280733 7
342261 203602 80
My expected results:
RanjePhoneNumber ContactId CityId
776323 280739 7
342261 186372 80
776323 280733 7
342261 203602 80

Group by those two columns:
SELECT RanjePhoneNumber, CityID
FROM dbo.TableName
GROUP BY RanjePhoneNumber, CityID
HAVING COUNT(*) > 1
If you want to select all columns you could use a ranking function:
WITH CTE AS
(
SELECT t.*, Cnt = COUNT(*) OVER (PARTITION BY RanjePhoneNumber, CityID)
FROM dbo.TableName
)
SELECT RanjePhoneNumber, ContactId, CityId
FROM CTE
WHERE Cnt > 1
If you don't want to find all rows which belong to this "duplicate-group" but only all but the first, use the ROW_NUMBER approach the other answer has shown.

;with cte
as
(select
Ranjephonenumber,
contactid,
cityid,
row_number() over (partition by Ranjephonenumber,cityid order by cityid) as rn
from table
)
select
Ranjephonenumber,contactid,city from cte where rn>1

Related

Select a random row from each group SQL Server

I have a table like below
ID Code Age
----------------
1 4758 21
1 7842 14
1 9821 23
1 6842 9
2 8472 24
2 7558 31
2 7841 28
3 7881 38
3 8794 42
3 4871 43
For each ID, I want to select one of the rows at random like so
ID Code Age
----------------
1 7842 14
2 7841 28
3 4871 43
Is this possible in SQL Server?
select top 1 with ties id,code,age
from
table
order by row_number() over (partition by id order by rand())
Update: as per this Return rows in random order, you have to use NEWId,since RAND() is fixed for the duration of the SELECT on MS SQL Server.
select top 1 with ties id,code,age
from
table
order by row_number() over (partition by id order by NEWID())
Use Newid() in order by clause of Row_number()
SELECT [ID], [Code], [Age]
FROM (SELECT *,
Row_number()
OVER(
PARTITION BY ID
ORDER BY Newid()) RNO
FROM #Table1)A
WHERE RNO = 1
with cte as
(
select *,rank() over ( partition by id order by Newid()) as rn from #c
)
select id,code,age from cte where rn=1
To select different sets each time, use checksum(newid()) in the order by clause.
Query
;with cte as(
select *, rn = row_number() over(
partition by ID
order by abs(checksum(newid())) % 15
)
from [your_table_name]
)
select * from cte
where rn = 1;

Select top 2 distinct for each id and date

I have a table like this :
Table1:
[Id] [TDate] [Score]
1 1.1.00 50
1 1.1.00 60
2 1.1.01 50
2 1.1.01 70
2 1.3.01 40
3 1.1.00 80
3 1.1.00 30
3 1.2.00 40
My desired output should be like this:
[ID] [TDate] [Score]
1 1.1.00 60
2 1.1.01 70
2 1.3.01 40
3 1.1.00 80
3 1.2.00 40
So fare, I have written this:
SELECT DISTINCT TOP 2 Id, TDate, Score
FROM
( SELECT Id, TDate, Score, ROW_NUMBER() over(partition by TDate order by Score) Od
FROM Table1
) A
WHERE A.Od = 1
ORDER BY Score
But it gives me :
[ID] [TDate] [Score]
2 1.1.01 70
3 1.1.00 80
of course I can do this:
"select top 2 ...where ID = 1"
and then:
union
`"Select top 2 ... where ID = 2"`
etc..
but I have a 100,000 of this..
Any way to generalize it to any Id?
Thank you.
WITH TOPTWO AS (
SELECT Id, TDate, Score, ROW_NUMBER()
over (
PARTITION BY TDate
order by SCORE
) AS RowNo
FROM [table_name]
)
SELECT * FROM TOPTWO WHERE RowNo <= 2
Your output doesn't make sense. Let me assume you want two rows per id. Then the query would look like:
SELECT TOP 2 Id, TDate, Score
FROM (SELECT Id, TDate, Score,
ROW_NUMBER() over (partition by id order by Score DESC) as seqnum
FROM Table1
) t
WHERE seqnum <= 2
ORDER BY Score;
Notes:
This assumes that you want two rows per id. Hence, id is in the PARTITION BY.
The WHERE now selects two rows per group in the PARTITION BY.
There is no need for SELECT DISTINCT in the outer query -- at least for this question.
Try this : Make partition by ID and TDate and sort by score in descending order
ROW_NUMBER() over(partition by ID,TDate order by Score DESC) Od
Complete script
WITH CTE AS(
SELECT *,
ROW_NUMBER() over(partition by ID,TDate order by Score DESC) RN
FROM TableName
)
SELECT *
FROM CTE
WHERE RN = 1
Unless I am missing something this can be done with a simple group by
First I prepare a temp table for testing :
declare #table table (ID int, TDate varchar(10), Score int)
insert into #Table values(1, '1.1.00', 50)
insert into #Table values(1, '1.1.00', 60)
insert into #Table values(2, '1.1.01', 50)
insert into #Table values(2, '1.1.01', 70)
insert into #Table values(2, '1.3.01', 40)
insert into #Table values(3, '1.1.00', 80)
insert into #Table values(3, '1.1.00', 30)
insert into #Table values(3, '1.2.00', 40)
Now lets do a select on this table
select ID, TDate, max(Score) as Score
from #table
group by ID, TDate
order by ID, TDate
The result is this :
ID TDate Score
1 1.1.00 60
2 1.1.01 70
2 1.3.01 40
3 1.1.00 80
3 1.2.00 40
So all you need to do is change #table to your table name and you are done

SQL Server query should return max value records

I have table like this:
id_Seq_No emp_name Current_Property_value
-----------------------------------------------
1 John 100
2 Peter 200
3 Pollard 50
4 John 500
I want the max record value of particular employee.
For example, John has 2 records seq_no 1, 4. I want 4th seq_no Current_Property_Value in single query.
Select
max(id_Seq_No)
from
t1
where
emp_name = 'John'
To get the Current_Property_value, just order the results by id_Seq_No and get the first one:
SELECT
TOP 1 Current_Property_value
FROM
table
WHERE
emp_name = 'John'
ORDER BY
id_Seq_No DESC
this will give highest for all tied employees
select top 1 with ties
id_Seq_No,emp_name,Current_Property_value
from
table
order by
row_number() over (partition by emp_name order by Current_Property_value desc)
You can use ROW_NUMBER with CTE.
Query
;WITH CTE AS(
SELECT rn = ROW_NUMBER() OVER(
PARTITION BY emp_name
ORDER BY id_Seq_No DESC
), *
FROM your_table_name
WHERE emp_name = 'John'
)
SELECT * FROM CTE
WHERE rn = 1;

Remove all but last entry by column

I would like to select distinct NAME and delete rows where the ID <> Max(ID) for that NAME.
The table is as follows ...
NAME DATE ID
BudgetX.dbo.Persons 2015-02-13 13:53:38.780 1
BudgetX.dbo.Persons 2015-02-13 13:53:38.780 2
BudgetX.dbo.ILFS 2015-02-13 14:29:55.347 3
BudgetX.dbo.ILFS 2015-02-13 14:29:55.347 4
BudgetX.dbo.ILFS 2015-02-13 14:30:10.813 5
I would like to remove all but rows 2 and 5.
Any ideas appreciated.
Thanks, RR
You could use a CTE + ROW_NUMBER:
WITH CTE AS
(
SELECT Name, Date, ID,
RN = ROW_NUMBER() OVER (PARTITION BY Name ORDER BY ID DESC)
FROM dbo.TableName
)
DELETE FROM CTE WHERE RN > 1

how to to get first top 6 records indifferent columns T-sql?

I got a situation to display first top 6 records. first 3 records in FirstCol and next 3 in SecondCol. My query is like this:
select top 6 [EmpName]
from [Emp ]
order by [Salary] Desc
Result:
[EmpName]
----------------------
Sam
Pam
Oliver
Jam
Kim
Nixon
But I want the result to look like this:
FirstCol SecondCol
Sam Jam
Pam Kim
Oliver Nixon
; WITH TOP_3 AS
(
select TOP 3 [EmpName]
,ROW_NUMBER() OVER (ORDER BY [Salary] Desc) rn
from [Emp ]
order by [Salary] Desc
),
Other3 AS
(
SELECT [EmpName]
,ROW_NUMBER() OVER (ORDER BY [Salary] Desc) rn
FROM Employees
ORDER BY [Salary] DESC OFFSET 3 ROWS FETCH NEXT 3 ROWS ONLY
)
SELECT T3.[EmpName] , O3.[EmpName]
FROM TOP_3 T3 INNER JOIN Other3 O3
ON T3.RN = O3.RN
ORDER BY T3.RN ASC
You can do this using several windowing functions, this is kind of ugly but it will get you the result that you want:
;with data as
(
-- get your Top 6
select top 6 empname, salary
from emp
order by salary desc
),
buckets as
(
-- use NTILE to split the six rows into 2 buckets
select empname,
nt = ntile(2) over(order by salary desc),
salary
from data
)
select
FirstCol = max(case when nt = 1 then empname end),
SecondCol = max(case when nt = 2 then empname end)
from
(
-- create a row number for each item in the buckets to return multiple rows
select empname,
nt,
rn = row_number() over(partition by nt order by salary desc)
from buckets
) d
group by rn;
See SQL Fiddle with Demo. This uses the function NTILE, this takes your dataset of six rows and splits it into two buckets - 3 rows in bucket 1 and 3 rows in bucket 2. The (2) inside the NTILE is used to determine the number of buckets.
Next I used row_number() to create a unique value for each row within each bucket, this allows you to return multiple rows for each column.

Resources