Related
I have a table 1:
CREATE TABLE table1
INSERT INTO table1 values('XYZ')
INSERT INTO table1 values('ABC')
INSERT INTO table1 values('XYZ~ABC~AAA')
INSERT INTO table1 values('123')
Then, I have string 'ABC~XYZ~123'. I need to split this string into each word by using SQL:
Select VALUE FROM STRING_SPLIT('ABC~XYZ~123','~')
The return is table2
ABC
XYZ
123
I want to count how many times each word in table2 existed in table 1
The expected output is
ABC|3
XYZ|2
123|1
Any ideas on this?
If I understand your case correctly, the next statement may help:
Text and table:
DECLARE #text varchar(100) = 'ABC~XYZ~123'
CREATE TABLE Data (
Id int,
[Text] varchar(100)
)
INSERT INTO Data
(Id, [Text])
VALUES
(1, 'XYZ'),
(2, 'ABC'),
(3, 'XYZ~ABC~AAA'),
(4, '123~ABC')
Statement:
SELECT t.[value] AS [Word], j.[Count]
FROM STRING_SPLIT(#text, '~') t
LEFT JOIN (
SELECT s.[value], COUNT(*) AS [Count]
FROM Data d
CROSS APPLY STRING_SPLIT(d.[Text], '~') s
GROUP BY s.[value]
) j ON t.[value] = j.[value]
Result:
-----------
Word Count
-----------
ABC 3
XYZ 2
123 1
Apart from the suggestions as in comment you can use Count() function as below. But storing in this format will give you difficulty for the extraction as well as in join with the other tables.
Select T1Value, Count(*) as [NoCount] from(
Select table1.Value as T1Value, Value FROM STRING_SPLIT('ABC~XYZ~123','~')
inner join table1 on Value = table1.Value
)a group by T1Value
Edit
CREATE TABLE table1(
TableValue varchar(max)
);
INSERT INTO table1 (TableValue) values ( 'XYZ');
INSERT INTO table1 ( TableValue) values ( 'ABC');
INSERT INTO table1 ( TableValue) values ( 'XYZ~ABC~AAA');
INSERT INTO table1 ( TableValue) values ( '123~ABC');
SELECT b.value
,Count(*)
FROM (
SELECT VALUE
FROM STRING_SPLIT('ABC~XYZ~123', '~')
) a
INNER JOIN (
SELECT *
FROM table1
CROSS APPLY STRING_SPLIT(TableValue, '~')
) b ON a.Value = b.Value
GROUP BY b.Value
Here is the given Live Demo on db <> fiddle
Setup
create table STRINGS (ID int, STRINGS varchar(max));
insert into STRINGS (ID, STRINGS) values (1, 'XYZ');
insert into STRINGS (ID, STRINGS) values (1, 'ABC');
insert into STRINGS (ID, STRINGS) values (1, 'XYZ~ABC~AAA');
insert into STRINGS (ID, STRINGS) values (1, '123~ABC');
declare #VALUES varchar(max) = 'XYZ~ABC~123';
Calculation :
select V1.VALUE, count(STRINGS.ID)
from string_split(#VALUES,'~') V1
cross join STRINGS
outer apply string_split(STRINGS.STRINGS,'~') V2
where V2.VALUE = V1.VALUE
group by V1.VALUE
Result
-----------
Value Num
-----------
ABC 3
XYZ 2
123 1
Live exemple :
https://dbfiddle.uk/?rdbms=sqlserver_2017&fiddle=15b95efcf69ea98fafbb7dda1c624551
How to remove duplicate column values from different tables?
Given the following tables without foreign key reference:
Table A
Id IdentifierString
-------------------
1 String A
2 String B
3 String C
Table B
Id IdentifierString
-------------------
1 String A
2 String C
3 String D
I want to remove all duplicate column values:
Table A
Id IdentifierString
-------------------
1 NULL
2 String B
3 NULL
Table B
Id IdentifierString
-------------------
1 NULL
2 NULL
3 String D
How is this doable?
This could be done using UNION ALL, HAVING(COUNT ) and UPDATE statement as below:
CREATE TABLE Table1 (Id INT, IdentifierString VARCHAR (20));
INSERT INTO Table1 (Id, IdentifierString) VALUES
(1, 'String A'),
(2, 'String B'),
(3, 'String C');
CREATE TABLE Table2 (Id INT, IdentifierString VARCHAR (20));
INSERT INTO Table2 (Id, IdentifierString) VALUES
(1, 'String A'),
(2, 'String C'),
(3, 'String D');
DECLARE #DuplicateEntries TABLE (IdentifierString VARCHAR(20));
INSERT INTO #DuplicateEntries (IdentifierString)
SELECT IdentifierString FROM (
SELECT IdentifierString FROM Table1
UNION ALL
SELECT IdentifierString FROM Table2
) A
GROUP BY IdentifierString
HAVING COUNT(IdentifierString) > 1;
UPDATE T1
SET T1.IdentifierString = NULL
FROM Table1 T1
JOIN #DuplicateEntries D ON D.IdentifierString = T1.IdentifierString;
UPDATE T2
SET T2.IdentifierString = NULL
FROM Table2 T2
JOIN #DuplicateEntries D ON D.IdentifierString = T2.IdentifierString;
SELECT * FROM Table1;
SELECT * FROM Table2;
Please find the working demo on db<>fiddle
Here's your dml script to remove duplicates.
delete from table1 where id in (select id from (
select id, t1.IdentifierString, t2.IdentifierStringfrom table1 t1
left join table2 t2 on t2.IdentifierString= t1.IdentifierString) as t3
where isnull(t1.IdentifierString, '') = '' or isnull(t2.IdentifierString, '') = '')
delete from table2 where id in (select id from (
select id, t1.IdentifierString, t2.IdentifierStringfrom table2 t1
left join table1 t2 on t2.IdentifierString= t1.IdentifierString) as t3
where isnull(t1.IdentifierString, '') = '' or isnull(t2.IdentifierString, '') = '')
I have two tables (Temp1 & Temp2 in this example) that join together. Some IDs in Temp1 have multiple ParentIDs that link to Temp2, but I only want 1 ParentID to actually return. It doesn't matter which one, so let's just say the top value ID in Temp2.
The following code provides a good example of where I'm starting
IF OBJECT_ID('tempdb..#Temp1') IS NOT NULL
DROP TABLE #Temp1
IF OBJECT_ID('tempdb..#Temp2') IS NOT NULL
DROP TABLE #Temp2
CREATE TABLE #Temp1
(
ID INT
,ParentID INT
,Name VARCHAR(10)
)
CREATE TABLE #Temp2
(
ID INT
,Name VARCHAR(15)
)
INSERT #Temp1 VALUES
(1, 1, 'Apples'),
(2, 1, 'Bananas'),
(3, 2, 'Milk'),
(3, 3, 'Milk'),
(4, 3, 'Wine'),
(4, 4, 'Wine')
INSERT #Temp2 VALUES
(1, 'Fruit'),
(2, 'Dairy'),
(3, 'Beverages'),
(4, 'Beer & Wine')
SELECT
T1.ID [ChildID]
,T1.Name [ChildName]
,T2.ID [ParentID]
,T2.Name [ParentName]
FROM #Temp1 T1
INNER JOIN #Temp2 T2 ON T2.ID = T1.ParentID
The desired result I'm looking for would be:
ChildID ChildName ParentID ParentName
1 Apples 1 Fruit
2 Bananas 1 Fruit
3 Milk 3 Beverages
4 Wine 4 Beer & Wine
I've seen other examples like this where people use CROSS APPLY, but I just can't seem to get it to work. Any assistance is appreciated!
Change your Final Select as below
;WITH CTE
AS
(
SELECT
RN = ROW_NUMBER() OVER(PARTITION BY T1.ID ORDER BY T1.Name),
T1.ID [ChildID]
,T1.Name [ChildName]
,T2.ID [ParentID]
,T2.Name [ParentName]
FROM #Temp1 T1
INNER JOIN #Temp2 T2 ON T2.ID = T1.ParentID
)
SELECT
[ChildID],
[ChildName],
[ParentID],
[ParentName]
FROM CTE
WHERE RN = 1
One way is to join the #Temp2 table to a cte that is a result of a group by query on #Temp1, that contains a single row for each combination of id and name, with the min (or max) parent id:
;WITH CTETemp1 AS
(
SELECT ID, Min(ParentID) As PID, Name
FROM #Temp1
GROUP BY ID, Name
)
SELECT T1.ID [ChildID]
,T1.Name [ChildName]
,T2.ID [ParentID]
,T2.Name [ParentName]
FROM CTETemp1 AS T1
JOIN #Temp2 AS T2 ON T1.PID = T2.ID
BTW, this kind of relationship is called a many to many relationship and it should be implemented by adding a new table to link the ids of #temp1 and #temp2 together - so a better schema would look like this:
CREATE TABLE #Temp1
(
ID INT
,Name VARCHAR(10)
)
CREATE TABLE #Temp2
(
ID INT
,Name VARCHAR(15)
)
CREATE TABLE #Temp1ToTemp2
(
ID1,
ID2,
PRIMARY KEY(ID1, ID2)
)
INSERT #Temp1 VALUES
(1, 'Apples'),
(2, 'Bananas'),
(3, 'Milk'),
(4, 'Wine'),
INSERT #Temp2 VALUES
(1, 'Fruit'),
(2, 'Dairy'),
(3, 'Beverages'),
(4, 'Beer & Wine')
INSERT INTO #Temp1ToTemp2 (ID1, ID2) VALUES
(1,1), (2, 1), (3, 2), (3, 3), (4, 3), (4, 4)
The key benefit is that now you do not have duplicate data stored in your database.
Your structure breaks database normal form. You clearly have many to many relationship. I suggest that you redesign your table structure. As you have it right now, table #Temp1 does not store information about a product in each row, but instead each row presents a relation to #Temp2.
There should be only one row for each id in #Temp1. For simplicity I'm omitting primary and foreign key constraints:
CREATE TABLE #Temp1 (
ID INT,
Name VARCHAR(10)
)
CREATE TABLE #Temp2 (
ID INT,
Name VARCHAR(15)
)
CREATE TABLE #TempRelation (
ID_Temp1 INT,
ID_Temp2 INT
)
Key concept is that you store each relation between temp1 and temp2 in a separate table #TempRelation.
To populate these tables you need to change your INSERT statements into:
INSERT #Temp1 VALUES
(1, 'Apples'),
(2, 'Bananas'),
(3, 'Milk'),
(4, 'Wine')
INSERT #TempRelation VALUES
(1,1),
(2,1),
(3,2),
(3,3),
(4,3),
(4,4)
Then your query is as simple as this:
SELECT
tr.id_temp1 as childid,
t1.name as childname,
tr.id_temp2 as parentid,
t2.name as parentname,
FROM (
SELECT id_temp1, max(id_temp2) as id_temp2
FROM #TempRelation tr
GROUP BY id_temp1
) tr
JOIN #Temp1 t1 ON tr.id_temp1 = t1.id
JOIN #Temp2 t2 ON tr.id_temp2 = t2.id
I Have two table name called Table1 & Table2.
Table1:
ID NAME AGE
-----------------
1 ABC 30
2 XYZ 40
3 XXX 50
4 aaa 60
Table2:
ID NAME AGE
-----------------
1 ABC 30
2 XYZ 40
3 XXX 50
I have to insert Table1 records to Table2. But I don't need to insert Existing item. I have to insert only unmatched data to Table2. How to do it.
E.g:
Table2 I am already have ID 1,2,3 but not 4. I have to insert only ID 4th rows.
Maybe
INSERT INTO Table2
SELECT * FROM Table1 WHERE ID NOT IN (SELECT ID FROM Table2)
Try this
insert into Table2
select * from Table1 t1
where not exists (select 1 from Table2 t2 where t2.ID=t1.ID)
You can achieve this by using LEFT JOIN too. Performance wise this is better than using IN operator.
INSERT INTO Table2 (ID, NAME, AGE)
SELECT T1.ID, T1.NAME, T1.AGE
FROM Table1 T1
LEFT JOIN Table2 T2 ON T2.ID = T1.ID
WHERE T2.ID IS NULL
Working execution with the given sample data:
DECLARE #Table1 TABLE (ID INT, NAME VARCHAR(50), AGE INT)
INSERT INTO #Table1 (ID, NAME, AGE)
VALUES
(1, 'ABC', 30),
(2, 'XYZ', 40),
(3, 'XXX', 50),
(4, 'aaa', 60)
DECLARE #Table2 TABLE (ID INT, NAME VARCHAR(50), AGE INT)
INSERT INTO #Table2 (ID, NAME, AGE)
VALUES
(1, 'ABC', 30),
(2, 'XYZ', 40),
(3, 'XXX', 50)
-- SELECT * FROM #Table2
INSERT INTO #Table2 (ID, NAME, AGE)
SELECT T1.ID, T1.NAME, T1.AGE
FROM #Table1 T1
LEFT JOIN #Table2 T2 ON T2.ID = T1.ID
WHERE T2.ID IS NULL
-- SELECT * FROM #Table2
My question is: I have two tables: table A has two columns (KeyA and Match) and table B has two columns (KeyB and Match). I want to compare with the "Match" column.
If table A has 3 rows with a particular "Match", and table B has 2 rows, a JOIN will return me all the combinations (6 in this case). What I want it to do is match up as many as it can, and then NULL out the others.
So, it would match the first "KeyA" with the first "KeyB", the second "KeyA" with the second "KeyB", and then match up the third "KeyA" with NULL, since table B only has two rows for this "Match". The order is actually irrelevant, just as long as 2 rows match up, and then one value from table A returns with a NULL for the table B value. This is not like an INNER or an OUTER JOIN.
I hope this makes sense, it was difficult to express clearly, and was hard to find keywords to search on.
EDIT:
An INNER/OUTER join would match all the table A values with all of the table B values it could. Once a B value is "used up" I do not want it to match it with any other A values.
Example:
Table A (KeyA, Match)
(1, "a")
(2, "a")
(3, "a")
Table B (KeyB, Match)
(11, "a")
(12, "a")
Desired output (KeyA, Match, KeyB):
(1, "a", 11)
(2, "a", 12)
(3, "a", NULL)
You can use partition by to number the rows for each value of match. Then you can use full outer join to fill up rows per Match. For example:
declare #A table (KeyA int, match int)
insert #A values (1,1), (2,1), (3,1), (4,2), (5,2), (6,2)
declare #B table (KeyB int, match int)
insert #B values (1,1), (2,1), (3,2)
select *
from (
select row_number() over (partition by match order by KeyA) as rn
, *
from #A
) as A
full outer join
(
select row_number() over (partition by match order by KeyB) as rn
, *
from #B
) as B
on A.match = B.match
and A.rn = B.rn
Working code at SE Data.
declare #TableA table(ID int, Name varchar(10))
declare #TableB table(ID int, Name varchar(10))
insert into #TableA values(1, 'a'), (1, 'b'), (1, 'c')
insert into #TableB values (1, 'A'), (1, 'B')
insert into #TableA values(2, 'a'), (2, 'b')
insert into #TableB values (2, 'A'), (2, 'B'), (2, 'C')
;with A as
(
select *,
row_number() over(partition by ID order by Name) as rn
from #TableA
),
B as
(
select *,
row_number() over(partition by ID order by Name) as rn
from #TableB
)
select A.ID as AID,
A.Name as AName,
B.ID as BID,
B.Name as BName
from A
full outer join B
on A.ID = B.ID and
A.rn = B.rn
Result:
AID AName BID BName
----------- ---------- ----------- ----------
1 a 1 A
1 b 1 B
1 c NULL NULL
2 a 2 A
2 b 2 B
NULL NULL 2 C
SELECT
ar.Match
COALESCE(ar.RowN, br.RowN) AS RowNumber
ar.KeyA
br.KeyB
FROM
( SELECT KeyA
, Match
, ROW_NUMBER() OVER(PARTITION BY Match) AS RowN
) AS ar
LEFT JOIN --- or FULL JOIN
( SELECT KeyB
, Match
, ROW_NUMBER() OVER(PARTITION BY Match) AS RowN
) AS br
ON br.Match = ar.Match
AND br.RowN = ar.RowN
I think what you are looking for is called a Cross Join, or Cartesian Product.
http://www.sqlguides.com/sql_cross_join.php
edit - Hm now actually I'm not so sure.
As far as I can understand, what you are looking for is a FULL JOIN, or also called CROSS JOIN.
Check out this link. It has good explanation of all types of joins:
http://www.w3schools.com/sql/sql_join.asp