Find duplicates with different values in T-sql? - sql-server

I got situation to find duplicates from different sourceport system.
for Ex: I got table like below:
declare #table table (id int,portnumber int, [sourceport] varchar(50), sourcereportedDate datetime )
insert into #table values (1, 1111, 'north' , '2016-08-20 09:44:30.847')
insert into #table values (2, 1111, 'north' , '2016-08-21 09:44:30.847')
insert into #table values (3, 1111, 'north' , '2016-08-22 09:44:30.847')
insert into #table values (4, 2222, 'north' , '2016-08-20 09:44:30.847')
insert into #table values (5, 2222, 'north' , '2016-08-26 09:44:30.847')
insert into #table values (6, 2222, 'south' , '2016-08-22 09:44:30.847')
insert into #table values (7, 3333, 'south' , '2016-08-10 09:44:30.847')
insert into #table values (8, 3333, 'north' , '2016-08-12 09:44:30.847')
insert into #table values (9, 4444, 'north' , '2016-08-20 09:44:30.847')
insert into #table values (10, 5555, 'south' , '2016-08-21 09:44:30.847')
insert into #table values (11, 5555, 'south' , '2016-08-27 09:44:30.847')
insert into #table values (12, 6666, 'south' , '2016-08-10 09:44:30.847')
insert into #table values (13, 6666, 'north' , '2016-08-21 09:44:30.847')
insert into #table values (14, 6666, 'south' , '2016-08-09 09:44:30.847')
Now I want to find duplicates with 'portnumber' should be same and 'sourceport' should be different. if 'portnumber' same and 'sourceport' same it should not be duplicate.
and also I need additional column which holds the Id of greatest 'sourcereportedDate' date
I want get output like below:
(4, 2222, 'north' , '2016-08-20 09:44:30.847',5)
(5, 2222, 'north' , '2016-08-26 09:44:30.847','latest')
(6, 2222, 'south' , '2016-08-22 09:44:30.847',5)
(7, 3333, 'south' , '2016-08-10 09:44:30.847',8)
(8, 3333, 'north' , '2016-08-12 09:44:30.847','latest')
(12, 6666, 'south' , '2016-08-10 09:44:30.847',13)
(13, 6666, 'north' , '2016-08-21 09:44:30.847','latest')
(14, 6666, 'south' , '2016-08-09 09:44:30.847',13)
Thanks in advance.

Please try this (i still think it can be further optimized)-
;with DupWithMaxDate as (
select
a.portnumber,
sourcereportedDate = max(a.sourcereportedDate)
from #table a
left join #table b on b.portnumber = a.portnumber and b.sourceport <> a.sourceport
where b.portnumber is not null
group by a.portnumber
),
DupWithMaxID as (
select
a.portnumber,
max_id = a.id
from DupWithMaxDate x
inner join #table a on a.portnumber = x.portnumber and a.sourcereportedDate = x.sourcereportedDate
)
select
a.id,
a.portnumber,
a.sourceport,
a.sourcereportedDate,
new_column = case when a.id = x.max_id then 'Latest' else convert(varchar(10), x.max_id) end
from DupWithMaxID x
inner join #table a on a.portnumber = x.portnumber
Updated above query -
;with DuplicateWithMaxID as (
select
portnumber = a.portnumber,
max_id = a.id,
rank_id = row_number() over (partition by a.portnumber order by a.sourcereportedDate desc)
from #table a
inner join #table b on b.portnumber = a.portnumber and b.sourceport <> a.sourceport
)
select
a.id,
a.portnumber,
a.sourceport,
a.sourcereportedDate,
new_column = case when a.id = x.max_id then 'Latest' else convert(varchar(10), x.max_id) end
from
DuplicateWithMaxID x
inner join #table a on a.portnumber = x.portnumber
where
x.rank_id = 1

Try this:
;
with
dis as
(
select distinct portnumber, sourceport
from #table
),
dup as
(
select portnumber
from dis
group by portnumber
having count(1) > 1
),
mx as
(
select
dup.portnumber,
max(t.sourcereportedDate) as sourcereportedDate
from
dup
join
#table as t
on
t.portnumber = dup.portnumber
group by
dup.portnumber
),
mxi as
(
select
mx.portnumber,
t.id
from
mx
left join
#table as t
on
t.portnumber = mx.portnumber
and t.sourcereportedDate = mx.sourcereportedDate
)
select
t.id,
t.portnumber,
t.sourceport,
t.sourcereportedDate,
case when t.id = mxi.id
then 'latest'
else cast(mxi.id as varchar(10))
end as latest
from
dup
join
#table as t
on
t.portnumber = dup.portnumber
join
mxi
on
mxi.portnumber = t.portnumber
left join
mx
on
mx.portnumber = t.portnumber
and mx.sourcereportedDate = t.sourcereportedDate

Related

Getting duplicate rows in SQL Server

The screenshot contains of 3 tables all connected with other
The second screenshot shows the SQL query and the result obtained:
Query:
SELECT DISTINCT
t.topic_id
, t.topic_name
, t.topic_cover
, t.topic_viewers
, t1.subscribe_id
FROM
tbltopic t
INNER JOIN
tblsubject_grade tg ON (t.subject_garde_id = tg.subject_garde_id)
INNER JOIN
tblsubcription t1 ON (tg.subject_garde_id = t1.subject_garde_id)
The real issue is you have multiple JOIN conditions across a couple of tables
Check this image:
You ll see that table tblsubject_grade JOINS to BOTH the other tables.
Youre query should be:
if OBJECT_ID('tempdb..#topic') IS NOT NULL DROP TABLE #topic
if OBJECT_ID('tempdb..#subjGrade') IS NOT NULL DROP TABLE #subjGrade;
if OBJECT_ID('tempdb..#subscription') IS NOT NULL DROP TABLE #subscription;
CREATE TABLE #topic (topic_id int, topic_name varchar(20), topic_cover varchar(20), topic_viewers int, teacher_id int, subject_garde_id int);
CREATE TABLE #subjGrade (subject_garde_id INT, grade_id INT, subject_id INT);
CREATE TABLE #subscription (subscribe_id INT, sub_status INT, sub_date date, student_id INT, archive_status INT, teacher_id int, subject_garde_id int);
INSERT INTO #topic (topic_id, topic_name, topic_cover, topic_viewers, teacher_id, subject_garde_id)
VALUES
(4, 'numbers', 'somestring', 0,2,1),
(6, 'shapes', 'somestring', 0,9,1),
(7, 'story time', 'somestring', 0, 2, 5)
INSERT INTO #subjGrade (subject_garde_id , grade_id , subject_id)
VALUES
(1, 1, 1),
(2, 1, 2),
(3, 1, 3),
(4, 2, 1),
(5, 2, 2),
(6, 2, 3),
(7, 2, 4),
(8, 3, 1)
INSERT INTO #subscription (subscribe_id, sub_status, sub_date, student_id, archive_status , teacher_id , subject_garde_id)
VALUES
(2, 1, '9-7-2021', 1,0,9,1),
(3, 1, '9-7-2021', 1,0,2,1)
SELECT
t.topic_id
, t.topic_name
, t.topic_cover
, t.topic_viewers
, t1.subscribe_id
FROM #topic t
INNER JOIN #subjGrade tg ON t.subject_garde_id = tg.subject_garde_id
INNER JOIN #subscription t1 ON
tg.subject_garde_id = t1.subject_garde_id
AND t1.subject_garde_id = t.subject_garde_id
AND t.teacher_id = t1.teacher_id

Recursive CTE with hierarchy data

Sample data:
declare #docs table (docid int, name varchar(10), isfolder int)
declare #hierarchy table (childid int, parent varchar(10))
insert #docs values (1, 'Doc1', 0)
insert #docs values (2, 'Doc2', 0 )
insert #docs values (3, 'Folder1', 1 )
insert #docs values (4, 'Folder2', 1 )
insert #docs values (5, 'SubFolderA', 1 )
insert #docs values (6, 'SubFolderB', 1 )
insert #hierarchy values (1, 5)
insert #hierarchy values (1, 6)
insert #hierarchy values (2, 6)
insert #hierarchy values (5, 3)
insert #hierarchy values (6, 4)
I want to list the data above so I end up with the documents (isfolder = 0) and the folder paths that they are in.
Expected output:
DocId Name Path
--------------------------------------
1 Doc1 Folder1\SubFolderA
1 Doc1 Folder2\SubFolderB
2 Doc2 Folder2\SubFolderB
CTE
I started to write this next CTE as it said this can be used to create my output but I am wrong somewhere
;WITH folderCTE (docid, name) AS
(
-- Anchor member
SELECT docid, CAST(name AS varchar(max)) AS name
FROM #docs
WHERE isfolder = 0
UNION ALL
-- Recursive member that references expression_name.
SELECT d.docid, CAST(folderCTE.Name + '\' + d.name AS varchar(max)) AS name
FROM folderCTE
INNER JOIN #docs d ON d.docid = folderCTE.docid
)
-- references expression name
SELECT *
FROM folderCTE
Can anyone show me how I should be doing a CTE for this?
If you use a CTE which just expands the hierarchy for folders, and a second one to build the hierarchy, you can simply join to this to get the folder for your docs:
declare #docs table (docid int, name varchar(10), isfolder int)
declare #hierarchy table (childid int, parentid varchar(10))
insert #docs values (1, 'Doc1', 0)
insert #docs values (2, 'Doc2', 0 )
insert #docs values (3, 'Folder1', 1 )
insert #docs values (4, 'Folder2', 1 )
insert #docs values (5, 'SubFolderA', 1 )
insert #docs values (6, 'SubFolderB', 1 )
insert #docs values (7, 'SuperFold1', 1 )
insert #hierarchy values (1, 5)
insert #hierarchy values (1, 6)
insert #hierarchy values (2, 6)
insert #hierarchy values (5, 3)
insert #hierarchy values (6, 4)
insert #hierarchy values (3, 7)
;WITH folderCTE
AS
(
select docid, cast(name as nvarchar(max)) as name, parentid
from #docs d
left join #hierarchy h on d.docid=h.childid
where d.isfolder = 1
),
folderHierarchyCTE
as
(
select docid, cast(name as nvarchar(max)) as name
from folderCTE where parentid is null
union all
select d.docid, cast(p.name + '/' + d.name as nvarchar(max))
from folderCTE d
inner join folderHierarchyCTE p on d.parentid = p.docid
)
SELECT d.docid, d.name, f.name
FROM #docs d
inner join #hierarchy h on h.childid=d.docid
inner join folderHierarchyCTE f on h.parentid=f.docid
where d.isfolder = 0

Repeat the column value until value change in same column value

My table structure:
declare #TestTable as table
(
id int,
somedate date,
somevalue int
)
insert into #TestTable values
(1, '2019-01-01', 1000),
(2, '2019-01-02', null ),
(3, '2019-01-03', null),
(4, '2019-01-04', null ),
(5, '2019-01-05', 800),
(6, '2019-01-06', null),
(7, '2019-01-07', null),
(8, '2019-01-08', null),
(9, '2019-01-09', null),
(10, '2019-01-10', 700)
Repeat the column value until any change in value of column (somevalue).. Is it possible with window functions?
Required output:
You can achieve that by using window function as well.
try the following:
select id, somevalue, newvalue=max(somevalue) over (partition by c)
from
(
select id, somevalue
,c=count(somevalue) over (order by id)
from #testtable
) t
order by id;
Please find the demo here.
Try this below logic-
DEMO HERE
SELECT A.*,
(
SELECT somevalue
FROM #TestTable
WHERE id = (
SELECT MAX(id)
FROM #TestTable
WHERE id <= A.id
AND somevalue IS NOT NULL
)
) new_column
FROM #TestTable A
ORDER BY A.id
You can achieve this using COALESCE (Transact-SQL)
.
SELECT
[id]
, somedate
, COALESCE(somevalue,
(SELECT TOP (1) somevalue
FROM #TestTable AS p2
WHERE
p2.somevalue IS NOT NULL
AND p2.[id] <= p.[id] ORDER BY p2.[id] DESC))
FROM #TestTable AS p;
Here is the live db<>fiddle demo.

I want to convert some data to pivot in SQL Server with join and dynamic

The table, at last, is my target.
This is my demo database
create database pvtestDb;
go
use pvtestDb;
go
create table custTransaction
(
id int,
custNum int,
value nvarchar(50)
)
go
create table customers
(
id int,
custName nvarchar(50)
)
insert into Customers(id, custName)
values (1, 'aaa'), (2, 'bbb'), (3, 'ccc'), (4, 'ddd'),
(5, 'eee'), (6, 'fff'), (7, 'ggg'), (8, 'hhh'), (9, 'iii')
insert into custTransaction (id, custNum, value)
values (1, 3, 'a'), (1, 4, 'b'), (1, 5, 'c'),
(2, 3, 'd'), (2, 4, 'e'), (2, 6, 'f'),
(3, 3, 'g'), (3, 8, 'h'), (3, 9, 'i')
select * from customers
select * from custTransaction
select custName, custNum, value
from customers
join custTransaction on custTransaction.id = customers.id
I tried code like this but not worked at all
SELECT
custNum, [a], [b], [c], [d]
FROM
customers
JOIN
custTransaction ON custTransaction.id = customers.id
PIVOT
(COUNT([custName])
FOR [custName] IN ([a], [b], [c], [d])) AS p
I need to join between the two tables in first.
Any hints would be appreciated as I am stuck with this situation
Here's approach with dynamic SQL
declare #customers varchar(8000)
declare #sql varchar(8000)
select #customers = stuff((
select ',' + quotename(custName)
from customers
for xml path('')
), 1, 1, '')
set #sql = 'select
id, ' + #customers + '
from (
select
ct.id, c.custName, ct.value
from
customers c
join custTransaction ct on ct.custNum = c.id
) t
pivot (
max(value) for custName in (' + #customers + ')
) p'
exec (#sql)
Output
id aaa bbb ccc ddd eee fff ggg hhh iii
----------------------------------------------------------------
1 NULL NULL a b c NULL NULL NULL NULL
2 NULL NULL d e NULL f NULL NULL NULL
3 NULL NULL g NULL NULL NULL NULL h i

CTE Recursive Query getting Child Ancestors based on Child as parameter (bottom-up)

I have a Hierarchy Table. What I want to do is to retrieve the table hierarchy based on AgentID as a parameter.so the return result should be in descending order. Most sample I found on the net just show how to retrieve the hierarchy table based on parent as parameter.
Below is the closest sample that I can use to retrieve Child/Parent/Parent.... hierarchy.
But how can I use AgentID as a parameter to get the bottom-up hierarchy?
;WITH rCTE AS
(
SELECT AgentID ,
RootID ,
CAST(AgentID AS NVARCHAR(MAX)) AS PathIDs,
CAST(AgentName AS NVARCHAR(MAX)) AS PathText,
CAST(IntroducerID AS NVARCHAR(MAX)) AS PathCost
FROM TblHierarchy r WHERE NOT EXISTS (SELECT * FROM TblHierarchy n WHERE r.AgentID = n.RootID )
UNION ALL
SELECT n.AgentID ,
n.RootID ,
r.PathIDs + '>' + CAST(n.AgentID AS NVARCHAR(10)) AS PathIDs,
r.PathText + '>' + n.AgentName AS PathText,
r.PathCost + CAST(n.IntroducerID AS NVARCHAR(MAX)) AS PathCost
FROM rCTE r
INNER JOIN dbo.TblHierarchy n ON r.RootID = n.AgentID
)
SELECT PathIDs ,
PathText ,
PathCost
FROM rCTE r
WHERE r.RootID =0--IS NULL
ORDER BY PathCost
option (maxrecursion 0)
Table sample ;
CREATE TABLE [dbo].[TblHierarchy](
[ID] [int] IDENTITY(1,1) NOT NULL,
[AgentID] [int] NULL,
[AgentName] [varchar](50) NULL,
[RootID] [int] NULL,
[IntroducerID] [int] NULL,
[Description] [varchar](50) NULL
) ON [PRIMARY]
Data sample :
INSERT [dbo].[TblHierarchy] ([ID], [AgentID], [AgentName], [RootID], [IntroducerID], [Description], [HierarchyTree]) VALUES (1, 1, N'Toh', 0, 0, N'', NULL)
INSERT [dbo].[TblHierarchy] ([ID], [AgentID], [AgentName], [RootID], [IntroducerID], [Description], [HierarchyTree]) VALUES (2, 2, N'Man', 1, 1, N'Child of Toh', NULL)
INSERT [dbo].[TblHierarchy] ([ID], [AgentID], [AgentName], [RootID], [IntroducerID], [Description], [HierarchyTree]) VALUES (3, 3, N'Rul', 1, 1, N'Child of Toh', NULL)
INSERT [dbo].[TblHierarchy] ([ID], [AgentID], [AgentName], [RootID], [IntroducerID], [Description], [HierarchyTree]) VALUES (4, 4, N'Rafiq', 2, 2, N'Child of Man', NULL)
INSERT [dbo].[TblHierarchy] ([ID], [AgentID], [AgentName], [RootID], [IntroducerID], [Description], [HierarchyTree]) VALUES (5, 5, N'Paan', 2, 2, N'Child of Man', NULL)
INSERT [dbo].[TblHierarchy] ([ID], [AgentID], [AgentName], [RootID], [IntroducerID], [Description], [HierarchyTree]) VALUES (7857, 6, N'TohChild0', 3, 1, NULL, NULL)
INSERT [dbo].[TblHierarchy] ([ID], [AgentID], [AgentName], [RootID], [IntroducerID], [Description], [HierarchyTree]) VALUES (7858, 7, N'TohChild1', 3, 1, NULL, NULL)
INSERT [dbo].[TblHierarchy] ([ID], [AgentID], [AgentName], [RootID], [IntroducerID], [Description], [HierarchyTree]) VALUES (7859, 8, N'TohChild2', 4, 1, NULL, NULL)
INSERT [dbo].[TblHierarchy] ([ID], [AgentID], [AgentName], [RootID], [IntroducerID], [Description], [HierarchyTree]) VALUES (7860, 9, N'TohChild3', 4, 1, NULL, NULL)
This is the current result of this CTE query
255>127>63>31>15>7>3>1
254>127>63>31>15>7>3>1
253>126>63>31>15>7>3>1
The result is like what I want,but this one is too generic. how can I get a result like this by using AgentID as a parameter. So, the result will only return a single row.
EDIT
RootID is the Parent and AgentID is the Child..I want to get a bottom-up hierarchy, means all the parent for a particular AgentID
Let say the AgentID = 327
So,my expected result will be 327>163>81>40>20>10>5>2>1
Thanks in advance!
A recursive CTE has two parts:
The recursive seed. This is the first query above the union wherein you set up the starting point for the iterations. The starting point you have is to select any agentid that is not a rootid
The recursive term. This is the query that is called over and over again until the join condition fails, signalling the end of the recursion for that particular branch.
Since you want to get only the results for a particular agentid instead of - any agentid that is not a rootid - like you currently have, then change the WHERE clause on that recursive seed:
WITH rCTE
AS (
SELECT AgentID,
RootID,
CAST(AgentID AS NVARCHAR(MAX)) AS PathIDs,
CAST(AgentName AS NVARCHAR(MAX)) AS PathText,
CAST(IntroducerID AS NVARCHAR(MAX)) AS PathCost
FROM TblHierarchy r
WHERE AgentID = 327
UNION ALL
SELECT n.AgentID,
n.RootID,
r.PathIDs + '>' + CAST(n.AgentID AS NVARCHAR(10)) AS PathIDs,
r.PathText + '>' + n.AgentName AS PathText,
r.PathCost + CAST(n.IntroducerID AS NVARCHAR(MAX)) AS PathCost
FROM rCTE r
INNER JOIN dbo.TblHierarchy n ON r.RootID = n.AgentID
)
SELECT PathIDs,
PathText,
PathCost
FROM rCTE r
WHERE r.RootID = 0 --IS NULL
As a second option you could also change the SELECT statement that selects from the recursive CTE to WHERE rootID = 0 AND pathIDs like '327%'

Resources