Exclude row based in multiple column value - sql-server

i need to exclude rows based multiple column conditions like 1) first find value "rev" from column ID1, 2) find the corresponding value in column ID2 based on 1st condition i.e. Ref1, and exclude all the rows contains the value in the 2nd column.
ID1 ID2 VALUE
post Ref1 12
post Ref1 3
rev Ref1 12
rev Ref1 3
post Ref2 10
post Ref2 5
output
ID1 ID2 VALUE
post Ref2 10
post Ref2 5

This assumes that "rev" stands for reversal or something similar, and that if the ID2 and VALUE are the same for a "post" and a "rev" they negate each other.
I created your table with this:
DROP TABLE IF EXISTS #table1;
CREATE TABLE #Table1 (ID1 VARCHAR(4), ID2 VARCHAR(4), VALUE INT);
INSERT INTO #Table1 (ID1, ID2, VALUE)
VALUES ('post', 'Ref1', 12),
('post', 'Ref1', 3),
('rev', 'Ref1', 12),
('rev', 'Ref1', 3),
('post', 'Ref2', 10),
('post', 'Ref2', 5);
I've then written this query to initially fetch all the 'post' rows of data, then self join matching on ID2 and VALUE and that ID1 now has to be 'rev', then in the WHERE clause adding something to only return rows where there is no T2.ID1
SELECT T1.ID1,
T1.ID2,
T1.VALUE
FROM #Table1 AS T1
LEFT JOIN #Table1 AS T2 ON T2.ID2 = T1.ID2 AND T2.VALUE = T1.VALUE AND T2.ID1 = 'rev'
WHERE T1.ID1 = 'post' AND T2.ID1 IS NULL;

You can first select all rows having ID1 = 'post' then remove all records having ID1 = 'rev', when 'rev' is replaced with 'post'.
SELECT * FROM tab WHERE [ID1] = 'post'
EXCEPT
SELECT 'post' AS [ID1], [ID2], [VALUE] FROM tab WHERE [ID1] = 'rev'
Check the demo here.

Related

MSSQL: How to remove duplicate column values from different tables?

How to remove duplicate column values from different tables?
Given the following tables without foreign key reference:
Table A
Id IdentifierString
-------------------
1 String A
2 String B
3 String C
Table B
Id IdentifierString
-------------------
1 String A
2 String C
3 String D
I want to remove all duplicate column values:
Table A
Id IdentifierString
-------------------
1 NULL
2 String B
3 NULL
Table B
Id IdentifierString
-------------------
1 NULL
2 NULL
3 String D
How is this doable?
This could be done using UNION ALL, HAVING(COUNT ) and UPDATE statement as below:
CREATE TABLE Table1 (Id INT, IdentifierString VARCHAR (20));
INSERT INTO Table1 (Id, IdentifierString) VALUES
(1, 'String A'),
(2, 'String B'),
(3, 'String C');
CREATE TABLE Table2 (Id INT, IdentifierString VARCHAR (20));
INSERT INTO Table2 (Id, IdentifierString) VALUES
(1, 'String A'),
(2, 'String C'),
(3, 'String D');
DECLARE #DuplicateEntries TABLE (IdentifierString VARCHAR(20));
INSERT INTO #DuplicateEntries (IdentifierString)
SELECT IdentifierString FROM (
SELECT IdentifierString FROM Table1
UNION ALL
SELECT IdentifierString FROM Table2
) A
GROUP BY IdentifierString
HAVING COUNT(IdentifierString) > 1;
UPDATE T1
SET T1.IdentifierString = NULL
FROM Table1 T1
JOIN #DuplicateEntries D ON D.IdentifierString = T1.IdentifierString;
UPDATE T2
SET T2.IdentifierString = NULL
FROM Table2 T2
JOIN #DuplicateEntries D ON D.IdentifierString = T2.IdentifierString;
SELECT * FROM Table1;
SELECT * FROM Table2;
Please find the working demo on db<>fiddle
Here's your dml script to remove duplicates.
delete from table1 where id in (select id from (
select id, t1.IdentifierString, t2.IdentifierStringfrom table1 t1
left join table2 t2 on t2.IdentifierString= t1.IdentifierString) as t3
where isnull(t1.IdentifierString, '') = '' or isnull(t2.IdentifierString, '') = '')
delete from table2 where id in (select id from (
select id, t1.IdentifierString, t2.IdentifierStringfrom table2 t1
left join table1 t2 on t2.IdentifierString= t1.IdentifierString) as t3
where isnull(t1.IdentifierString, '') = '' or isnull(t2.IdentifierString, '') = '')

Delete duplicated row and update the row using the duplicated row id

This is the Scenario : I have a duplicate rows in my table with the same Id , Name and so on .
1) I have to find the duplicate row matching all the criteria ( this is done)
2) Delete them only if the criteria match
3) Use the id of the deleted record and update the existing row in the table
For this i have created a 2 temp table. Temp1 is the table with all the record. Temp2 consist of duplicated row.
IF OBJECT_ID('tempdb..#Temp1') IS NOT NULL
DROP TABLE #Temp1
IF OBJECT_ID('tempdb..#Temp2') IS NOT NULL
DROP TABLE #Temp2
IF OBJECT_ID('tempdb..#Temp3') IS NOT NULL
DROP TABLE #Temp3
CREATE Table #Temp1 (
Id int,
Name NVARCHAR(64),
StudentNo INT NULL,
ClassCode NVARCHAR(8) NULL,
Section NVARCHAR(8) NULL,
)
INSERT INTO #Temp1 (Id, Name,StudentNo,ClassCode,Section) Values(1,'Joe',123,'A1', 'I')
INSERT INTO #Temp1 (Id, Name,StudentNo,ClassCode,Section) Values(1,'Joe',123,'A1', 'I')
INSERT INTO #Temp1 (Id, Name,StudentNo,ClassCode,Section) Values(2,'Harry',113,'X2', 'H')
INSERT INTO #Temp1 (Id, Name,StudentNo,ClassCode,Section) Values(2,'Harry',113,'X2', 'H')
INSERT INTO #Temp1 (Id, Name,StudentNo,ClassCode,Section) Values(3,'Elle',121,'J1', 'E1')
INSERT INTO #Temp1 (Id, Name,StudentNo,ClassCode,Section) Values(3,'Elle',121,'J1', 'E')
INSERT INTO #Temp1 (Id, Name,StudentNo,ClassCode,Section) Values(8,'Jane',191,'A1', 'E')
INSERT INTO #Temp1 (Id, Name,StudentNo,ClassCode,Section) Values(5,'Silva',811,'S1', 'SE')
INSERT INTO #Temp1 (Id, Name,StudentNo,ClassCode,Section) Values(6,'Juan',411,'S2', 'SE')
INSERT INTO #Temp1 (Id, Name,StudentNo,ClassCode,Section) Values(7,'Carla',431,'S2', 'SE')
;WITH CTE AS (
select
ROW_NUMBER() over (partition by Id
, StudentNo
order by Id, StudentNo)as Duplicate_RowNumber
, * from #Temp1 )
select t1.Id,t1.Name,t1.StudentNo,t1.Section,t1.ClassCode
INTO #Temp2
from CTE as c INNER JOIN #Temp1 as t1 ON t1.Id = c.Id
and t1.StudentNo = t1.StudentNo
and c.Duplicate_RowNumber >1
-- this will have 6 rows all the duplicates are included
--select * from #Temp2
-- this is for output clause
DECLARE #inserted Table (Id int,
Name NVARCHAR(64),
StudentNo INT NULL,
ClassCode NVARCHAR(8) NULL,
Section NVARCHAR(8) NULL)
DELETE FROM #temp1
OUTPUT deleted.Id , deleted.Name ,deleted.StudentNo ,deleted.ClassCode ,deleted.Section into #inserted
WHERE EXISTS ( SELECT * FROM #Temp2 as t2
where #temp1.Id = t2.Id
and #temp1.Name = t2.Name
and #temp1.StudentNo = t2.StudentNo
and #temp1.ClassCode = t2.ClassCode
and #temp1.Section = t2.Section)
-- this is to check what is delete so that i can join it and update the table temp1
select * from #inserted
You can see below the query should not delete the last two highlighted column because the Section does not match. It should only delete matching criteria from Temp1 and Temp2.
Scenario 2 : Delete the duplicate record in Temp1 and use the key in order to update the data to NULL for Section and Classcode . This is what i expect with the highlighted to be NULLs .
You can run this query yourself - just copy and paste.
Yes, for scenario #1 it is going to delete the rows because the problem is in this section.
I added this table for references.
Added this #temp2 table to clarify for later use.
CREATE Table #Temp2 (
Id int,
Name Varchar(64),
StudentNo INT NULL,
ClassCode Varchar(8) NULL,
Section Varchar(8) NULL,
)
IF OBJECT_ID('tempdb..#tmp4') IS NOT NULL
DROP TABLE #tmp4
select t1.Id,t1.Name,t1.StudentNo,t1.Section,t1.ClassCode,
Duplicate_RowNumber
INTO #Duplicatedata
from CTE as c INNER JOIN #Temp1 as t1 ON t1.Id = c.Id
and t1.StudentNo = t1.StudentNo
and c.Duplicate_RowNumber >1
select * from #Duplicatedata
This is going to satisfy both condition as #temp 1 will have both rows for Elle as your join condition is only on ID and Student No.
I added row number column for clarity.
Id Name StudentNo Section ClassCode Duplicate_RowNumber
1 Joe 123 I A1 2
1 Joe 123 I A1 2
2 Harry 113 H X2 2
2 Harry 113 H X2 2
3 Elle 121 E1 J1 2
3 Elle 121 E J1 2
As your Partition is based by Student No and ID, every duplicate row will have 2 or more row numbers.
You can use this approach to delete.
select
ROW_NUMBER() over (partition by Id
, StudentNo
order by Id, StudentNo, section)as Duplicate_RowNumber
, * into #tmp4 from #Temp1
--You can add section in your order as well for consistency purpose.
delete
from #tmp4
output deleted.id, deleted.Name, deleted.StudentNo, deleted.ClassCode,
deleted.Section into #Temp2
where Duplicate_RowNumber > 1
After that it seems like you want to keep one row in your final table and put the other one in you deleted table. For Elle it will delete one of the rows from Final table and keep only one since your partition is not based on section.
To make sure that you delete 1 row from your final table you can use this.
DELETE t
OUTPUT deleted.Id , deleted.Name ,deleted.StudentNo ,deleted.ClassCode
,deleted.Section into #inserted FROM
(select *, row_number() over (Partition by tm.name, tm.studentNo Order by ID,
StudentNo, section ) rownum from #temp1 tm) t
join #Temp2 t2 on t.Id = t2.Id
and t.Name = t2.Name
and t.StudentNo = t2.StudentNo
and t.ClassCode = t2.ClassCode
and t.Section = t2.Section
where t.rownum > 1
If you notice I added this row number, so that it will not two delete the rows from final table, since Joe and Harry has all the matching attributes, and it will delete two rows.
select * from #inserted
Output you get:
Id Name StudentNo ClassCode Section
3 Elle 121 J1 E1
2 Harry 113 X2 H
1 Joe 123 A1 I
Finally you can update final table in this way. #Scenario 2
update TMP
SET ClassCode = NULL, SECTION = NULL
FROM
#Temp1 TMP
JOIN #INSERTED I ON TMP.Id = I.Id
AND TMP.StudentNo = I.StudentNo
SELECT * FROM #Temp1
Final Output:
Id Name StudentNo ClassCode Section
1 Joe 123 NULL NULL
2 Harry 113 NULL NULL
3 Elle 121 NULL NULL
8 Jane 191 A1 E
5 Silva 811 S1 SE
6 Juan 411 S2 SE
7 Carla 431 S2 SE
Please note that I have added scripts and output only for the parts where it required change, and rest part is same script provided by you.

SQL Server: How to select missing rows in table from another table?

I have two tables like below:
table1:
StoreId SKU
------------
1 abc
2 abc
3 abc
1 xyz
4 xyz
table2:
StoreId
--------
1
2
3
4
5
I want to select missing storeid from the table1 which are in table 2. But condition is that in above example for SKU abc storeid 4 and 5 are missing and for sku xyz 2,3,5 are missing. So I want below table as output
SKU,ID
------
abc 4
abc 5
xyz 2
xyz 3
xyz 5
I am able to pull only the overall missing store which is 5 using below query.
SELECT
SKU, t2.StoreId
FROM
#table1 t1
FULL OUTER JOIN
#table2 t2 ON t1.StoreId = t2.StoreId
WHERE
t1.StoreId IS NULL
Below is test create and insert query.
Declare #table1 As table
(
StoreId varchar(4),
SKU varchar(5)
)
Declare #table2 As table
(
StoreId int
)
BEGIN
Insert Into #table1(SKU,StoreId) values('abc',1)
Insert Into #table1(SKU,StoreId) values('abc',2)
Insert Into #table1(SKU,StoreId) values('abc',3)
Insert Into #table1(SKU,StoreId) values('xyz',1)
Insert Into #table1(SKU,StoreId) values('xyz',4)
Insert Into #table2(StoreId) values(1)
Insert Into #table2(StoreId) values(2)
Insert Into #table2(StoreId) values(3)
Insert Into #table2(StoreId) values(4)
Insert Into #table2(StoreId) values(5)
END
Thank you
You need to get a list of all skus and tables, and then show only rows which do not appear in table1:
select SKU, StoreID
from #table2 t2
cross join (select distinct sku from #table1) t1
where not exists (select 1 from #table1 table1
where table1.SKU = t1.SKU
and table1.StoreId = t2.StoreId)
Here is an alternative solution with the same result.
Syntax is very similar to the answer from #BeanFrog:
SELECT
t3.SKU, t2.StoreID
FROM
#table2 t2
CROSS JOIN
(SELECT distinct SKU
FROM #table1) t3
LEFT JOIN
#table1 t1
ON
t1.SKU = t3.SKU
and t1.StoreId = t2.StoreId
WHERE
t1.sku is null

SQL Server: How to select top rows of a group based on value of the column of that group?

I have two tables like below.
table 1
id rem
1 2
2 1
table 2
id value
1 abc
1 xyz
1 mno
2 mnk
2 mjd
EDIT:
#output
id value
1 abc
1 xyz
2 mnk
What i want to do is select top 2 rows of table2 with id one as rem value is 2 for id 1 and top 1 row with id 2 as its rem value is 1 and so on. I am using MS sqlserver 2012 My whole scenario is more complex than this. Please help.
Thank you.
EDIT : I know that i should have given what i have done and how i am doing it but for this particular part i don't have idea for starting. I could do this by using while loop for each unique id but i want to do it in one go if possible.
First, SQL tables represent unordered sets. There is no specification of which values you get, unless you include an order by.
For this purpose, I would go with row_number():
select t2.*
from table1 t1 join
(select t2.*,
row_number() over (partition by id order by id) as seqnum
from table2 t2
) t2
on t1.id = t2.id and t2.seqnum <= t1.rem;
Note: The order by id in the windows clause should be based on which rows you want. If you don't care which rows, then order by id or order by (select null) is fine.
Try This:
DECLARE #tbl1 TABLE (id INT, rem INT)
INSERT INTO #tbl1 VALUES (1, 2), (2, 1)
DECLARE #tbl2 TABLE (id INT, value VARCHAR(10))
INSERT INTO #tbl2 VALUES (1, 'abc'), (1, 'xyz'),
(1, 'mno'), (2, 'mnk'), (2, 'mjd')
SELECT * FROM #tbl1 -- your table 1
SELECT * FROM #tbl2 -- your table 2
SELECT id,value,rem FROM ( SELECT ROW_NUMBER() OVER (PARTITION BY T.ID ORDER BY T.ID) rowid,
T.id,T.value,F.rem FROM #tbl2 T LEFT JOIN #tbl1 F ON T.id = F.id ) A WHERE rowid = 1
-- your required output
Hope it helps.

SQL UPDATE Self-join with compound matching value

I have a MSSQL table that contains rows, some of which I need to update, depending upon the absence/presence of other rows in the same table. the table and sample data look like this:
SQL Table:
TABLE tblConnector(
ID1 [int] NOT NULL,
ID2 [int] NOT NULL,
ID3 [int] NOT NULL,
...other cols...)
having PRIMARY KEY CLUSTERED comprised of ID1, ID2 and ID3 - meaning that the compound key must be unique.
Data Structure:
ID1 ID2 ID3
111 222 1
111 222 9999
333 444 1 <--- update only this row
555 666 1
555 666 9999
777 888 2
777 888 9999
123 456 3 <--- don't update this row
I need to update any row in this table, setting ID3 = 9999, where ID3 currently is 1 and there are no other rows in the table with the same values for ID1 and ID2, with ID3 = 9999.
In my sample data, I only want to update the 3rd row, setting ID3 = 9999 - because it has ID3 = 1 and there are no other rows in the table with the same ID1 and ID2 values (where ID3 <> 1). I don't want to update the last row - so using count() to isolte rows isn't a valid approach.
I've tried numerous attempts to join the table to itself, but I can't seem to get an UPDATE statement that will only affect the rows I want.
Any advice on how to code this SQL?
UPDATE: I should have been more clear - I want to update the ID3 column to 9999 ONLY if the current value in ID3 is 1 ... AND there are no other rows that have the same ID1 and ID2 values and ID3 = 9999. ID1, ID2 and ID3 comprise a unique key and setting 9999 in ID3 can't duplicate a key value already in existence.
Maybe I'm reading that differently.
;WITH cte AS (
SELECT ID1, ID2
FROM tblConnector
GROUP BY ID1, ID2
HAVING COUNT(1) = 1
)
UPDATE tblConnector
SET ID3 = 9999
FROM tblConnector a
INNER JOIN
cte b ON a.ID1 = b.ID1
AND a.ID2 = b.ID2
sqlfiddle
EDIT (after comment)
Added your new sample records and WHERE ID3 = 1 to the UPDATE. In this case 1 is a placeholder for whatever single value you want to plugin.
;WITH cte AS (
SELECT ID1, ID2
FROM tblConnector
GROUP BY ID1, ID2
HAVING COUNT(1) = 1
)
UPDATE tblConnector
SET ID3 = 9999
FROM tblConnector a
INNER JOIN
cte b ON a.ID1 = b.ID1
AND a.ID2 = b.ID2
WHERE ID3 = 1
New fiddle
Following your exact wording, the following should work:
UPDATE tblConnector t1
SET ID3 = 9999
WHERE NOT EXISTS
(
SELECT 1
FROM tblConnector t2
WHERE t2.ID1 = t1.ID1
AND t2.ID2 = t1.ID2
AND t2.ID3 <> 9999
)
Without EXISTS, without CTE approach. I don't know if this will be most efficient.
update t1
set t1.id3 = 9999
--select *
from
tblConnector as t1
left join
(
select id1, id2, COUNT(id1) as Rowz
from tblConnector as t2
group by id1, id2
) as src
on t1.id1 = src.id1
where Rowz = 1

Resources