Getting duplicate rows in SQL Server

Getting duplicate rows in SQL Server - sql-server

The screenshot contains of 3 tables all connected with other
The second screenshot shows the SQL query and the result obtained:
Query:
SELECT DISTINCT
t.topic_id
, t.topic_name
, t.topic_cover
, t.topic_viewers
, t1.subscribe_id
FROM
tbltopic t
INNER JOIN
tblsubject_grade tg ON (t.subject_garde_id = tg.subject_garde_id)
INNER JOIN
tblsubcription t1 ON (tg.subject_garde_id = t1.subject_garde_id)

The real issue is you have multiple JOIN conditions across a couple of tables
Check this image:
You ll see that table tblsubject_grade JOINS to BOTH the other tables.
Youre query should be:
if OBJECT_ID('tempdb..#topic') IS NOT NULL DROP TABLE #topic
if OBJECT_ID('tempdb..#subjGrade') IS NOT NULL DROP TABLE #subjGrade;
if OBJECT_ID('tempdb..#subscription') IS NOT NULL DROP TABLE #subscription;
CREATE TABLE #topic (topic_id int, topic_name varchar(20), topic_cover varchar(20), topic_viewers int, teacher_id int, subject_garde_id int);
CREATE TABLE #subjGrade (subject_garde_id INT, grade_id INT, subject_id INT);
CREATE TABLE #subscription (subscribe_id INT, sub_status INT, sub_date date, student_id INT, archive_status INT, teacher_id int, subject_garde_id int);
INSERT INTO #topic (topic_id, topic_name, topic_cover, topic_viewers, teacher_id, subject_garde_id)
VALUES
(4, 'numbers', 'somestring', 0,2,1),
(6, 'shapes', 'somestring', 0,9,1),
(7, 'story time', 'somestring', 0, 2, 5)
INSERT INTO #subjGrade (subject_garde_id , grade_id , subject_id)
VALUES
(1, 1, 1),
(2, 1, 2),
(3, 1, 3),
(4, 2, 1),
(5, 2, 2),
(6, 2, 3),
(7, 2, 4),
(8, 3, 1)
INSERT INTO #subscription (subscribe_id, sub_status, sub_date, student_id, archive_status , teacher_id , subject_garde_id)
VALUES
(2, 1, '9-7-2021', 1,0,9,1),
(3, 1, '9-7-2021', 1,0,2,1)
SELECT
t.topic_id
, t.topic_name
, t.topic_cover
, t.topic_viewers
, t1.subscribe_id
FROM #topic t
INNER JOIN #subjGrade tg ON t.subject_garde_id = tg.subject_garde_id
INNER JOIN #subscription t1 ON
tg.subject_garde_id = t1.subject_garde_id
AND t1.subject_garde_id = t.subject_garde_id
AND t.teacher_id = t1.teacher_id

Related

Join two columns and pick just one from the child results

I want to perform a JOIN on two tables to get the LEADTIME per ITEMNUM.
Both tables have common value ITEMNUM that I use for the JOIN operation.
The problem is that in the second table the ITEMNUM is not unique and can contain multiple. different LEADTIME values.
For example see ITEMNUM 2 in Table 2.
In case there are multiple LEADTIME values, I just want to get one of the LEADTIME values.
I don't care which one.
This is what I have so far, but it keeps returning multiple lines for ITEMNUM 2
SELECT ITEMNUM, LEADTIME
FROM TABLE1
LEFT JOIN TABLE2 on TABLE2.ITEMNUM = TABLE1.ITEMNUM
So what can I do to get just one LEADTIME for ITEMNUM 2? ( as mentioned, I don't care which value )

This approach assigns a row number to each row in #table2 resetting it for each ItemNum value. You need to have an order by clause (if you don't SQL Server raises an error) so I am ordering by NEWID() which should result in a randomized order. You will likely want to tweak what columns you are returning. Here is the dbfiddle to see it in action.
IF OBJECT_ID('tempdb.dbo.#table1', 'U') IS NOT NULL DROP TABLE #table1;
IF OBJECT_ID('tempdb.dbo.#table2', 'U') IS NOT NULL DROP TABLE #table2;
CREATE TABLE #table1
(
ID INT
, ItemNum INT
);
CREATE TABLE #table2
(
ID INT
, ItemNum INT
, LeadTime INT
);
INSERT INTO #table1 VALUES (1, 1)
INSERT INTO #table1 VALUES (2, 2)
INSERT INTO #table1 VALUES (3, 3)
INSERT INTO #table1 VALUES (4, 4)
INSERT INTO #table1 VALUES (5, 5)
INSERT INTO #table2 VALUES (1, 1, 6)
INSERT INTO #table2 VALUES (2, 2, 7)
INSERT INTO #table2 VALUES (3, 2, 2)
INSERT INTO #table2 VALUES (4, 3, 6)
INSERT INTO #table2 VALUES (5, 4, 3)
SELECT *
FROM #table1 AS t1
LEFT JOIN (
SELECT *
, ROW_NUMBER() OVER (PARTITION BY ItemNum ORDER BY NEWID()) AS rn
FROM #table2
) AS t2 ON t1.ItemNum = t2.ItemNum
AND t2.rn = 1;

There are several ways to get this done.I would use OUTER APPLY with TOP.
DROP TABLE IF EXISTS #Table1
CREATE TABLE #Table1
(
Id INT
, ItemNum INT
)
DROP TABLE IF EXISTS #Table2
CREATE TABLE #Table2
(
Id INT
, ItemNum INT
, LeadTime INT
)
INSERT INTO #Table1 VALUES
(1, 1)
, (2, 2)
, (3, 3)
, (4, 4)
, (5, 5)
INSERT INTO #Table2 VALUES
(1, 1, 6)
, (2, 2, 7)
, (3, 2, 2)
, (4, 3, 6)
, (5, 4, 3)
SELECT
*
FROM
#Table1 AS T1
OUTER APPLY
(
SELECT TOP 1 T2.LeadTime FROM #Table2 AS T2 WHERE T2.ItemNum = T1.ItemNum
) AS LT

I want to convert some data to pivot in SQL Server with join and dynamic

The table, at last, is my target.
This is my demo database
create database pvtestDb;
go
use pvtestDb;
go
create table custTransaction
(
id int,
custNum int,
value nvarchar(50)
)
go
create table customers
(
id int,
custName nvarchar(50)
)
insert into Customers(id, custName)
values (1, 'aaa'), (2, 'bbb'), (3, 'ccc'), (4, 'ddd'),
(5, 'eee'), (6, 'fff'), (7, 'ggg'), (8, 'hhh'), (9, 'iii')
insert into custTransaction (id, custNum, value)
values (1, 3, 'a'), (1, 4, 'b'), (1, 5, 'c'),
(2, 3, 'd'), (2, 4, 'e'), (2, 6, 'f'),
(3, 3, 'g'), (3, 8, 'h'), (3, 9, 'i')
select * from customers
select * from custTransaction
select custName, custNum, value
from customers
join custTransaction on custTransaction.id = customers.id
I tried code like this but not worked at all
SELECT
custNum, [a], [b], [c], [d]
FROM
customers
JOIN
custTransaction ON custTransaction.id = customers.id
PIVOT
(COUNT([custName])
FOR [custName] IN ([a], [b], [c], [d])) AS p
I need to join between the two tables in first.
Any hints would be appreciated as I am stuck with this situation

Here's approach with dynamic SQL
declare #customers varchar(8000)
declare #sql varchar(8000)
select #customers = stuff((
select ',' + quotename(custName)
from customers
for xml path('')
), 1, 1, '')
set #sql = 'select
id, ' + #customers + '
from (
select
ct.id, c.custName, ct.value
from
customers c
join custTransaction ct on ct.custNum = c.id
) t
pivot (
max(value) for custName in (' + #customers + ')
) p'
exec (#sql)
Output
id aaa bbb ccc ddd eee fff ggg hhh iii
----------------------------------------------------------------
1 NULL NULL a b c NULL NULL NULL NULL
2 NULL NULL d e NULL f NULL NULL NULL
3 NULL NULL g NULL NULL NULL NULL h i

Alternative for this Cursor SQL

How can I do this without the cursor:
SET NOCOUNT ON;
DECLARE #VAR_A BIGINT, #VAR_B TINYINT;
DECLARE _CURSOR CURSOR FOR
SELECT A, B FROM MY_TABLE
OPEN _CURSOR
FETCH NEXT FROM _CURSOR
INTO #VAR_A, #VAR_B
WHILE ##FETCH_STATUS = 0
BEGIN
SELECT TOP 2 A, B, C, ROW_NUMBER() OVER (ORDER BY A DESC) AS ROW_NUM
INTO #TMP FROM MY_TABLE_2
WHERE A = #VAR_A AND X = 0 ORDER BY A DESC
IF ((SELECT COUNT(*) FROM #TMP) = 1) BEGIN
UPDATE MY_TABLE
SET Y = (SELECT B FROM #TMP WHERE ROW_NUM = 1)
WHERE A = #VAR_A
END ELSE IF (#VAR_B = 7) BEGIN
UPDATE MY_TABLE
SET Y = (SELECT B FROM #TMP WHERE ROW_NUM = 2),
Z = (SELECT C FROM #TMP WHERE ROW_NUM = 2)
WHERE A = #VAR_A
END ELSE BEGIN
UPDATE MY_TABLE
SET Y = (SELECT B FROM #TMP WHERE ROW_NUM = 2)
WHERE A = #VAR_A
END
DROP TABLE #TMP
FETCH NEXT FROM _CURSOR
INTO #VAR_A, #VAR_B
END
CLOSE _CURSOR;
DEALLOCATE _CURSOR;
I have a PRODUCT table, this table has a status column, the last status of the product. I have another table, the PRODUCT_HISTORY, that have all information changed on product, including the older status. For each product, I need to get the last status and insert it on a new field on product table. Like this:

This should do the trick...
IF OBJECT_ID('tempdb..#Product', 'U') IS NOT NULL
DROP TABLE #Product;
CREATE TABLE #Product (
Id INT NOT NULL PRIMARY KEY CLUSTERED,
[Description] VARCHAR(20) NOT NULL,
[Status] INT NOT NULL,
OlderStatus INT NULL
);
INSERT #Product (Id, [Description], [Status]) VALUES
(1, 'Product A', 5),
(2, 'Product B', 7),
(3, 'Product C', 4),
(4, 'Product D', 3),
(5, 'Product E', 0);
IF OBJECT_ID('tempdb..#ProductHistory', 'U') IS NOT NULL
DROP TABLE #ProductHistory;
CREATE TABLE #ProductHistory (
Id INT NOT NULL PRIMARY KEY CLUSTERED,
[Date] DATE NOT NULL,
Status INT NOT NULL,
ProductId INT NOT NULL
);
INSERT #ProductHistory (Id, [Date], [Status], ProductId) VALUES
(1, '2017-01-01', 0, 1),
(2, '2017-01-02', 1, 1),
(3, '2017-01-03', 5, 1),
(4, '2017-01-04', 0, 2),
(5, '2017-01-05', 1, 2),
(6, '2017-01-06', 5, 2),
(7, '2017-01-07', 7, 2),
(8, '2017-01-08', 0, 3),
(9, '2017-01-09', 4, 3),
(10, '2017-01-10', 0, 4),
(11, '2017-01-11', 3, 4),
(12, '2017-01-12', 0, 5);
--===================================================
-- the actual solution...
WITH
cte_PH AS (
SELECT
ph.Id, ph.Date, ph.Status, ph.ProductId,
RN = ROW_NUMBER() OVER (PARTITION BY ph.ProductId ORDER BY ph.Date DESC)
FROM
#ProductHistory ph
)
UPDATE p SET
p.OlderStatus = ISNULL(ph.Status, 0)
FROM
#Product p
LEFT JOIN cte_PH ph
ON p.id = ph.ProductId
AND ph.RN = 2;
----------------------------------------
SELECT * FROM #Product p;
Results...
Id Description Status OlderStatus
----------- -------------------- ----------- -----------
1 Product A 5 1
2 Product B 7 5
3 Product C 4 0
4 Product D 3 0
5 Product E 0 0

Find duplicates with different values in T-sql?

I got situation to find duplicates from different sourceport system.
for Ex: I got table like below:
declare #table table (id int,portnumber int, [sourceport] varchar(50), sourcereportedDate datetime )
insert into #table values (1, 1111, 'north' , '2016-08-20 09:44:30.847')
insert into #table values (2, 1111, 'north' , '2016-08-21 09:44:30.847')
insert into #table values (3, 1111, 'north' , '2016-08-22 09:44:30.847')
insert into #table values (4, 2222, 'north' , '2016-08-20 09:44:30.847')
insert into #table values (5, 2222, 'north' , '2016-08-26 09:44:30.847')
insert into #table values (6, 2222, 'south' , '2016-08-22 09:44:30.847')
insert into #table values (7, 3333, 'south' , '2016-08-10 09:44:30.847')
insert into #table values (8, 3333, 'north' , '2016-08-12 09:44:30.847')
insert into #table values (9, 4444, 'north' , '2016-08-20 09:44:30.847')
insert into #table values (10, 5555, 'south' , '2016-08-21 09:44:30.847')
insert into #table values (11, 5555, 'south' , '2016-08-27 09:44:30.847')
insert into #table values (12, 6666, 'south' , '2016-08-10 09:44:30.847')
insert into #table values (13, 6666, 'north' , '2016-08-21 09:44:30.847')
insert into #table values (14, 6666, 'south' , '2016-08-09 09:44:30.847')
Now I want to find duplicates with 'portnumber' should be same and 'sourceport' should be different. if 'portnumber' same and 'sourceport' same it should not be duplicate.
and also I need additional column which holds the Id of greatest 'sourcereportedDate' date
I want get output like below:
(4, 2222, 'north' , '2016-08-20 09:44:30.847',5)
(5, 2222, 'north' , '2016-08-26 09:44:30.847','latest')
(6, 2222, 'south' , '2016-08-22 09:44:30.847',5)
(7, 3333, 'south' , '2016-08-10 09:44:30.847',8)
(8, 3333, 'north' , '2016-08-12 09:44:30.847','latest')
(12, 6666, 'south' , '2016-08-10 09:44:30.847',13)
(13, 6666, 'north' , '2016-08-21 09:44:30.847','latest')
(14, 6666, 'south' , '2016-08-09 09:44:30.847',13)
Thanks in advance.

Please try this (i still think it can be further optimized)-
;with DupWithMaxDate as (
select
a.portnumber,
sourcereportedDate = max(a.sourcereportedDate)
from #table a
left join #table b on b.portnumber = a.portnumber and b.sourceport <> a.sourceport
where b.portnumber is not null
group by a.portnumber
),
DupWithMaxID as (
select
a.portnumber,
max_id = a.id
from DupWithMaxDate x
inner join #table a on a.portnumber = x.portnumber and a.sourcereportedDate = x.sourcereportedDate
)
select
a.id,
a.portnumber,
a.sourceport,
a.sourcereportedDate,
new_column = case when a.id = x.max_id then 'Latest' else convert(varchar(10), x.max_id) end
from DupWithMaxID x
inner join #table a on a.portnumber = x.portnumber
Updated above query -
;with DuplicateWithMaxID as (
select
portnumber = a.portnumber,
max_id = a.id,
rank_id = row_number() over (partition by a.portnumber order by a.sourcereportedDate desc)
from #table a
inner join #table b on b.portnumber = a.portnumber and b.sourceport <> a.sourceport
)
select
a.id,
a.portnumber,
a.sourceport,
a.sourcereportedDate,
new_column = case when a.id = x.max_id then 'Latest' else convert(varchar(10), x.max_id) end
from
DuplicateWithMaxID x
inner join #table a on a.portnumber = x.portnumber
where
x.rank_id = 1

Try this:
;
with
dis as
(
select distinct portnumber, sourceport
from #table
),
dup as
(
select portnumber
from dis
group by portnumber
having count(1) > 1
),
mx as
(
select
dup.portnumber,
max(t.sourcereportedDate) as sourcereportedDate
from
dup
join
#table as t
on
t.portnumber = dup.portnumber
group by
dup.portnumber
),
mxi as
(
select
mx.portnumber,
t.id
from
mx
left join
#table as t
on
t.portnumber = mx.portnumber
and t.sourcereportedDate = mx.sourcereportedDate
)
select
t.id,
t.portnumber,
t.sourceport,
t.sourcereportedDate,
case when t.id = mxi.id
then 'latest'
else cast(mxi.id as varchar(10))
end as latest
from
dup
join
#table as t
on
t.portnumber = dup.portnumber
join
mxi
on
mxi.portnumber = t.portnumber
left join
mx
on
mx.portnumber = t.portnumber
and mx.sourcereportedDate = t.sourcereportedDate

T-SQL SELECT query to return combined result of multiple tables

I am wondering if it is possible and more efficient to do something that I am presently doing in code, to do in T-SQL instead.
I have a database with courses. Each course can have different offerings which are variations of the course at different locations and at different awards.
Here's my (simplified) database structure and some sample data:
CREATE TABLE tblCourse (CourseId int, CourseName varchar(50))
CREATE TABLE tblOffering (OfferingId int, CourseId int, LocationId int, AwardId int)
CREATE TABLE tblLocation (LocationId int, LocationName varchar(50))
CREATE TABLE tblAward (AwardId int, AwardName varchar(50))
INSERT INTO tblCourse VALUES (1, 'Course A')
INSERT INTO tblCourse VALUES (2, 'Course B')
INSERT INTO tblOffering VALUES (1, 1, 1, 1)
INSERT INTO tblOffering VALUES (2, 1, 2, 1)
INSERT INTO tblOffering VALUES (3, 1, 3, 1)
INSERT INTO tblOffering VALUES (4, 1, 1, 2)
INSERT INTO tblOffering VALUES (5, 2, 3, 1)
INSERT INTO tblLocation VALUES (1, 'Location A')
INSERT INTO tblLocation VALUES (2, 'Location B')
INSERT INTO tblLocation VALUES (3, 'Location C')
INSERT INTO tblAward VALUES (1, 'Award A')
INSERT INTO tblAward VALUES (2, 'Award B')
What I want to retrieve from SQL is a single row for each course/award combination. Each row would have columns for each location and whether a course of that CourseId/AwardId combination was available. There would be now rows for course/award combinations that have no offerings.
The required result, from the sample data, would be a recordset like this:
CourseId | CourseName | AwardId | AwardName | LocationA | LocationB | LocationC
---------+------------+---------+-----------+-----------+-----------+----------
1 | Course A | 1 | Award A | True | True | True
1 | Course A | 2 | Award B | True | NULL | NULL
2 | Course B | 1 | Award A | NULL | NULL | True
(NULL could also be False)
At present I am doing a simple SELECT statement with various JOINS which gives me multiple rows for each course/award combination, then I loop through all rows in my code and build the required result. However, I don't think this is so efficient as I also need to page results.
I think I could do this fairly easily in a stored procedure by creating a temporary table and a bunch of separate queries, but I don't think that would be too efficient. Wondering if there is a better way of doing it in T-SQL???
So to clarify, what I am looking for is a T-SQL query or stored procedure that will produce the above sample recordset, and which I could adapt paging to.
NB. I am using SQL Server 2008

For Dynamic columns:
DECLARE #COLUMNS VARCHAR(max)
,#query varchar(1024)
,#True varchar(6)
SELECT #COLUMNS =
COALESCE(
#Columns + ',[' + L.LocationName + ']',
'[' + L.LocationName +']'
)
FROM tblLocation L
SELECT #True = '''True'''
SELECT #QUERY = 'SELECT C.CourseName
,A.AwardName
, pvt.*
FROM (SELECT O.OfferingID AS OID
,O.AwardID AS AID
,O.CourseID AS CID
,L.LocationName AS LID
FROM tblOffering O Inner Join tblLocation L on L.LocationID = O.LocationID) AS S
PIVOT
(
count(oID) For LID IN (' +#COLUMNS+ ')
) As pvt
inner join tblCourse C on C.CourseID = CID
inner join tblAward A on A.AwardID = pvt.AID'
EXEC (#QUERY)
GO

This will produce a paginated version of your example results:
declare #tblCourse as table (CourseId int, CourseName varchar(50))
declare #tblOffering as table (OfferingId int, CourseId int, LocationId int, AwardId int)
declare #tblLocation as table (LocationId int, LocationName varchar(50))
declare #tblAward as table (AwardId int, AwardName varchar(50))
INSERT INTO #tblCourse VALUES (1, 'Course A')
INSERT INTO #tblCourse VALUES (2, 'Course B')
INSERT INTO #tblOffering VALUES (1, 1, 1, 1)
INSERT INTO #tblOffering VALUES (2, 1, 2, 1)
INSERT INTO #tblOffering VALUES (3, 1, 3, 1)
INSERT INTO #tblOffering VALUES (4, 1, 1, 2)
INSERT INTO #tblOffering VALUES (5, 2, 3, 1)
INSERT INTO #tblLocation VALUES (1, 'Location A')
INSERT INTO #tblLocation VALUES (2, 'Location B')
INSERT INTO #tblLocation VALUES (3, 'Location C')
INSERT INTO #tblAward VALUES (1, 'Award A')
INSERT INTO #tblAward VALUES (2, 'Award B') -- This had id 1 in your example.
-- Set the following parameters to control paging:
declare #PageSize as Int = 5
declare #PageNumber as Int = 1
; with CourseAwardSummary as (
select distinct C.CourseId, C.CourseName, A.AwardId, A.AwardName,
case when exists ( select 42 from #tblOffering where CourseId = C.CourseId and AwardId = A.AwardId and LocationId = 1 ) then 'True' end as LocationA,
case when exists ( select 42 from #tblOffering where CourseId = C.CourseId and AwardId = A.AwardId and LocationId = 2 ) then 'True' end as LocationB,
case when exists ( select 42 from #tblOffering where CourseId = C.CourseId and AwardId = A.AwardId and LocationId = 3 ) then 'True' end as LocationC
from #tblCourse as C inner join
#tblOffering as O on O.CourseId = C.CourseId inner join
#tblAward as A on A.AwardId = O.AwardId
),
CourseAwardSummaryRows as (
select *, Row_Number() over ( order by CourseName, AwardName ) as RowNumber
from CourseAwardSummary
)
select CourseId, CourseName, AwardId, AwardName, LocationA, LocationB, LocationC
from CourseAwardSummaryRows
where ( #PageNumber - 1 ) * #PageSize + 1 <= RowNumber and RowNumber <= #PageNumber * #PageSize
order by CourseName, AwardName

The following query does this by joining and aggregating the offering table, and then joining the result to the course and award tables:
select c.CourseId, c.CourseName, oa.AwardId, oa.AwardName,
oa.LocationA, oa.LocationB, oa.LocationC
from tblCourse c left outer join
(select o.CourseId, o.AwardId, a.awardName
max(case when LocationName = 'Location A' then 'true' end) as LocationA,
max(case when LocationName = 'Location B' then 'true' end) as LocationB,
max(case when LocationName = 'Location C' then 'true' end) as LocationC
from tblOffering o join
tblLocation l
on o.LocationId = l.LocationId join
tblAward a
on a.awardID = o.AwardId
group by o.CourseId, o.AwardId, a.awardName
) oa
on oa.CourseId = c.CourseId

Develop Reference

c reactjs sql-server angularjs arrays wpf database batch-file google-app-engine silverlight

Getting duplicate rows in SQL Server - sql-server

Related

Join two columns and pick just one from the child results

I want to convert some data to pivot in SQL Server with join and dynamic

Alternative for this Cursor SQL

Find duplicates with different values in T-sql?

T-SQL SELECT query to return combined result of multiple tables

Categories

Resources