Count how many times a word exist in column

Count how many times a word exist in column - sql-server

I have a table 1:
CREATE TABLE table1
INSERT INTO table1 values('XYZ')
INSERT INTO table1 values('ABC')
INSERT INTO table1 values('XYZ~ABC~AAA')
INSERT INTO table1 values('123')
Then, I have string 'ABC~XYZ~123'. I need to split this string into each word by using SQL:
Select VALUE FROM STRING_SPLIT('ABC~XYZ~123','~')
The return is table2
ABC
XYZ
123
I want to count how many times each word in table2 existed in table 1
The expected output is
ABC|3
XYZ|2
123|1
Any ideas on this?

If I understand your case correctly, the next statement may help:
Text and table:
DECLARE #text varchar(100) = 'ABC~XYZ~123'
CREATE TABLE Data (
Id int,
[Text] varchar(100)
)
INSERT INTO Data
(Id, [Text])
VALUES
(1, 'XYZ'),
(2, 'ABC'),
(3, 'XYZ~ABC~AAA'),
(4, '123~ABC')
Statement:
SELECT t.[value] AS [Word], j.[Count]
FROM STRING_SPLIT(#text, '~') t
LEFT JOIN (
SELECT s.[value], COUNT(*) AS [Count]
FROM Data d
CROSS APPLY STRING_SPLIT(d.[Text], '~') s
GROUP BY s.[value]
) j ON t.[value] = j.[value]
Result:
-----------
Word Count
-----------
ABC 3
XYZ 2
123 1

Apart from the suggestions as in comment you can use Count() function as below. But storing in this format will give you difficulty for the extraction as well as in join with the other tables.
Select T1Value, Count(*) as [NoCount] from(
Select table1.Value as T1Value, Value FROM STRING_SPLIT('ABC~XYZ~123','~')
inner join table1 on Value = table1.Value
)a group by T1Value
Edit
CREATE TABLE table1(
TableValue varchar(max)
);
INSERT INTO table1 (TableValue) values ( 'XYZ');
INSERT INTO table1 ( TableValue) values ( 'ABC');
INSERT INTO table1 ( TableValue) values ( 'XYZ~ABC~AAA');
INSERT INTO table1 ( TableValue) values ( '123~ABC');
SELECT b.value
,Count(*)
FROM (
SELECT VALUE
FROM STRING_SPLIT('ABC~XYZ~123', '~')
) a
INNER JOIN (
SELECT *
FROM table1
CROSS APPLY STRING_SPLIT(TableValue, '~')
) b ON a.Value = b.Value
GROUP BY b.Value
Here is the given Live Demo on db <> fiddle

Setup
create table STRINGS (ID int, STRINGS varchar(max));
insert into STRINGS (ID, STRINGS) values (1, 'XYZ');
insert into STRINGS (ID, STRINGS) values (1, 'ABC');
insert into STRINGS (ID, STRINGS) values (1, 'XYZ~ABC~AAA');
insert into STRINGS (ID, STRINGS) values (1, '123~ABC');
declare #VALUES varchar(max) = 'XYZ~ABC~123';
Calculation :
select V1.VALUE, count(STRINGS.ID)
from string_split(#VALUES,'~') V1
cross join STRINGS
outer apply string_split(STRINGS.STRINGS,'~') V2
where V2.VALUE = V1.VALUE
group by V1.VALUE
Result
-----------
Value Num
-----------
ABC 3
XYZ 2
123 1
Live exemple :
https://dbfiddle.uk/?rdbms=sqlserver_2017&fiddle=15b95efcf69ea98fafbb7dda1c624551

Related

MSSQL: How to remove duplicate column values from different tables?

How to remove duplicate column values from different tables?
Given the following tables without foreign key reference:
Table A
Id IdentifierString
-------------------
1 String A
2 String B
3 String C
Table B
Id IdentifierString
-------------------
1 String A
2 String C
3 String D
I want to remove all duplicate column values:
Table A
Id IdentifierString
-------------------
1 NULL
2 String B
3 NULL
Table B
Id IdentifierString
-------------------
1 NULL
2 NULL
3 String D
How is this doable?

This could be done using UNION ALL, HAVING(COUNT ) and UPDATE statement as below:
CREATE TABLE Table1 (Id INT, IdentifierString VARCHAR (20));
INSERT INTO Table1 (Id, IdentifierString) VALUES
(1, 'String A'),
(2, 'String B'),
(3, 'String C');
CREATE TABLE Table2 (Id INT, IdentifierString VARCHAR (20));
INSERT INTO Table2 (Id, IdentifierString) VALUES
(1, 'String A'),
(2, 'String C'),
(3, 'String D');
DECLARE #DuplicateEntries TABLE (IdentifierString VARCHAR(20));
INSERT INTO #DuplicateEntries (IdentifierString)
SELECT IdentifierString FROM (
SELECT IdentifierString FROM Table1
UNION ALL
SELECT IdentifierString FROM Table2
) A
GROUP BY IdentifierString
HAVING COUNT(IdentifierString) > 1;
UPDATE T1
SET T1.IdentifierString = NULL
FROM Table1 T1
JOIN #DuplicateEntries D ON D.IdentifierString = T1.IdentifierString;
UPDATE T2
SET T2.IdentifierString = NULL
FROM Table2 T2
JOIN #DuplicateEntries D ON D.IdentifierString = T2.IdentifierString;
SELECT * FROM Table1;
SELECT * FROM Table2;
Please find the working demo on db<>fiddle

Here's your dml script to remove duplicates.
delete from table1 where id in (select id from (
select id, t1.IdentifierString, t2.IdentifierStringfrom table1 t1
left join table2 t2 on t2.IdentifierString= t1.IdentifierString) as t3
where isnull(t1.IdentifierString, '') = '' or isnull(t2.IdentifierString, '') = '')
delete from table2 where id in (select id from (
select id, t1.IdentifierString, t2.IdentifierStringfrom table2 t1
left join table1 t2 on t2.IdentifierString= t1.IdentifierString) as t3
where isnull(t1.IdentifierString, '') = '' or isnull(t2.IdentifierString, '') = '')

SQL Server: How to select missing rows in table from another table?

I have two tables like below:
table1:
StoreId SKU
------------
1 abc
2 abc
3 abc
1 xyz
4 xyz
table2:
StoreId
--------
1
2
3
4
5
I want to select missing storeid from the table1 which are in table 2. But condition is that in above example for SKU abc storeid 4 and 5 are missing and for sku xyz 2,3,5 are missing. So I want below table as output
SKU,ID
------
abc 4
abc 5
xyz 2
xyz 3
xyz 5
I am able to pull only the overall missing store which is 5 using below query.
SELECT
SKU, t2.StoreId
FROM
#table1 t1
FULL OUTER JOIN
#table2 t2 ON t1.StoreId = t2.StoreId
WHERE
t1.StoreId IS NULL
Below is test create and insert query.
Declare #table1 As table
(
StoreId varchar(4),
SKU varchar(5)
)
Declare #table2 As table
(
StoreId int
)
BEGIN
Insert Into #table1(SKU,StoreId) values('abc',1)
Insert Into #table1(SKU,StoreId) values('abc',2)
Insert Into #table1(SKU,StoreId) values('abc',3)
Insert Into #table1(SKU,StoreId) values('xyz',1)
Insert Into #table1(SKU,StoreId) values('xyz',4)
Insert Into #table2(StoreId) values(1)
Insert Into #table2(StoreId) values(2)
Insert Into #table2(StoreId) values(3)
Insert Into #table2(StoreId) values(4)
Insert Into #table2(StoreId) values(5)
END
Thank you

You need to get a list of all skus and tables, and then show only rows which do not appear in table1:
select SKU, StoreID
from #table2 t2
cross join (select distinct sku from #table1) t1
where not exists (select 1 from #table1 table1
where table1.SKU = t1.SKU
and table1.StoreId = t2.StoreId)

Here is an alternative solution with the same result.
Syntax is very similar to the answer from #BeanFrog:
SELECT
t3.SKU, t2.StoreID
FROM
#table2 t2
CROSS JOIN
(SELECT distinct SKU
FROM #table1) t3
LEFT JOIN
#table1 t1
ON
t1.SKU = t3.SKU
and t1.StoreId = t2.StoreId
WHERE
t1.sku is null

SQL Server: How to select top rows of a group based on value of the column of that group?

I have two tables like below.
table 1
id rem
1 2
2 1
table 2
id value
1 abc
1 xyz
1 mno
2 mnk
2 mjd
EDIT:
#output
id value
1 abc
1 xyz
2 mnk
What i want to do is select top 2 rows of table2 with id one as rem value is 2 for id 1 and top 1 row with id 2 as its rem value is 1 and so on. I am using MS sqlserver 2012 My whole scenario is more complex than this. Please help.
Thank you.
EDIT : I know that i should have given what i have done and how i am doing it but for this particular part i don't have idea for starting. I could do this by using while loop for each unique id but i want to do it in one go if possible.

First, SQL tables represent unordered sets. There is no specification of which values you get, unless you include an order by.
For this purpose, I would go with row_number():
select t2.*
from table1 t1 join
(select t2.*,
row_number() over (partition by id order by id) as seqnum
from table2 t2
) t2
on t1.id = t2.id and t2.seqnum <= t1.rem;
Note: The order by id in the windows clause should be based on which rows you want. If you don't care which rows, then order by id or order by (select null) is fine.

Try This:
DECLARE #tbl1 TABLE (id INT, rem INT)
INSERT INTO #tbl1 VALUES (1, 2), (2, 1)
DECLARE #tbl2 TABLE (id INT, value VARCHAR(10))
INSERT INTO #tbl2 VALUES (1, 'abc'), (1, 'xyz'),
(1, 'mno'), (2, 'mnk'), (2, 'mjd')
SELECT * FROM #tbl1 -- your table 1
SELECT * FROM #tbl2 -- your table 2
SELECT id,value,rem FROM ( SELECT ROW_NUMBER() OVER (PARTITION BY T.ID ORDER BY T.ID) rowid,
T.id,T.value,F.rem FROM #tbl2 T LEFT JOIN #tbl1 F ON T.id = F.id ) A WHERE rowid = 1
-- your required output
Hope it helps.

Column based intersect on two tables

I'm trying to do something similar to a column based intersect on two tables.
The tables are:
LogTag: a log can have zero or more tags
MatchingRule: a matching rule consists of one or more tags that define the rule
A log can have zero or more rules matched to it. I will be passing in a MatchingRuleID and expecting to return all logs that match that rule.
Expected Result: A result set of matching LogIDs. Eg. passing in MatchingRuleID = 30 should return LogID 101. MatchingRuleID = 31 should return LogID 101 & 100.
Also, the LogTag table could have millions of rows so an efficient query is preferred.
The question: How to find all LogIDs that match with a specified rule definition?
Schema:
CREATE TABLE dbo.Tag
(
TagID INT,
TagName NVARCHAR(50)
)
INSERT INTO dbo.Tag (TagID, TagName)
VALUES (1, 'tag1'), (2, 'tag2'), (3, 'tag3')
CREATE TABLE dbo.LogTag
(
LogID INT,
TagID INT
)
INSERT INTO dbo.LogTag (LogID, TagID)
VALUES (100, 1), (101, 1), (101, 2), (101, 3), (101, 4), (102, 2), (102, 3)
CREATE TABLE dbo.MatchingRule
(
MatchingRuleID INT,
TagID INT
)
INSERT INTO dbo.MatchingRule (MatchingRuleID, TagID)
VALUES (30, 1), (30, 2), (30, 3), (31, 1)

Important to have the proper clustered index on the tables. I've put an alternative index in comments for #log_tag which might improve performance for large sets. Since I do not have the proper sample to test on, you will have to verify which is best.
CREATE TABLE #tag(tag_id INT PRIMARY KEY,tag_name NVARCHAR(50));
INSERT INTO #tag (tag_id,tag_name)VALUES
(1,'tag1'),(2,'tag2'),(3,'tag3');
-- Try this key for large sets: PRIMARY KEY(tag_id,log_id));
CREATE TABLE #log_tag(log_id INT,tag_id INT,PRIMARY KEY(log_id,tag_id))
INSERT INTO #log_tag (log_id,tag_id)VALUES
(100,1),(101,1),(101,2),(101,3),(101,4),(102,2),(102,3);
CREATE TABLE #matching_rule(matching_rule_id INT,tag_id INT,PRIMARY KEY(matching_rule_id,tag_id));
INSERT INTO #matching_rule(matching_rule_id,tag_id)VALUES
(30,1),(30,2),(30,3),(31,1);
DECLARE #matching_rule_id INT=31;
;WITH required_tags AS (
SELECT tag_id
FROM #matching_rule
WHERE matching_rule_id=#matching_rule_id
)
SELECT lt.log_id
FROM required_tags AS rt
INNER JOIN #log_tag AS lt ON
lt.tag_id=rt.tag_id
GROUP BY lt.log_id
HAVING COUNT(*)=(SELECT COUNT(*) FROM required_tags);
DROP TABLE #log_tag;
DROP TABLE #matching_rule;
DROP TABLE #tag;
The results are the ones in your Expected Result for both 30 & 31.
Execution plan for the index used in the script:

Try this query
Fiddle Here
DECLARE #InputMatchingRuleId INT = 30
;WITH CTE1
AS
(
SELECT DENSE_RANK() OVER(ORDER BY LT.TAGID) AS RN,LT.TagID,LT.LOGID
FROM MatchingRule MR INNER JOIN LogTag LT ON LT.TagID = MR.TagID
WHERE MatchingRuleID=#InputMatchingRuleId
),
CTE2
AS
(
SELECT 1 AS RN2,LOGID FROM CTE1 C1 WHERE C1.RN=1
UNION ALL
SELECT RN2+1 as RN2,C2.LOGID
FROM CTE1 C1 INNER JOIN CTE2 C2 ON C1.RN = C2.RN2+1 AND C1.LOGID = C2.LOGID
)
SELECT DISTINCT LOGID FROM CTE2
WHERE RN2>(CASE WHEN (SELECT MAX(RN2) FROM CTE2)=1 THEN 0 ELSE 1 END)

NOTE: This will only work with SQL Server 2008+
Here's the query I came up with:
DECLARE #RuleID INT
SELECT #RuleID = 30
SELECT LogID
FROM LogTag lt
INNER JOIN (
SELECT TagID, MatchingRuleID, COUNT(*) OVER (PARTITION BY MatchingRuleID) TagCount
FROM MatchingRule
) mr
ON lt.TagID = mr.TagID
AND mr.MatchingRuleID = #RuleID
GROUP BY LogID, TagCount
HAVING COUNT(*) = TagCount
So basically I match all TagID's within the specified matching rule and then once I know that all tags match I check to see if the count of tags from the MatchingRule table matches the (now filtered and grouped) count of tags from the LogTag table.

should be
; with rules as
(
select TagID, cnt = sum(count(*)) over()
from dbo.MatchingRule
where MatchingRuleID = #MatchingRuleID
group by TagID
)
select LogID
from rules r
inner join LogTag lt on r.TagID = lt.TagID
group by LogID, cnt
having count(*) = r.cnt

select l.LogID
from dbo.MatchingRule r
inner join dbo.LogTag l on l.TagID = r.TagID
where r.MatchingRuleID = 31
another approach is to identify all tags and then:
select l.LogID
from dbo.LogTag l
where exists(select 1 from #Tags t where t.TagID = l.TagID)

Can this be done with something like a JOIN?

My question is: I have two tables: table A has two columns (KeyA and Match) and table B has two columns (KeyB and Match). I want to compare with the "Match" column.
If table A has 3 rows with a particular "Match", and table B has 2 rows, a JOIN will return me all the combinations (6 in this case). What I want it to do is match up as many as it can, and then NULL out the others.
So, it would match the first "KeyA" with the first "KeyB", the second "KeyA" with the second "KeyB", and then match up the third "KeyA" with NULL, since table B only has two rows for this "Match". The order is actually irrelevant, just as long as 2 rows match up, and then one value from table A returns with a NULL for the table B value. This is not like an INNER or an OUTER JOIN.
I hope this makes sense, it was difficult to express clearly, and was hard to find keywords to search on.
EDIT:
An INNER/OUTER join would match all the table A values with all of the table B values it could. Once a B value is "used up" I do not want it to match it with any other A values.
Example:
Table A (KeyA, Match)
(1, "a")
(2, "a")
(3, "a")
Table B (KeyB, Match)
(11, "a")
(12, "a")
Desired output (KeyA, Match, KeyB):
(1, "a", 11)
(2, "a", 12)
(3, "a", NULL)

You can use partition by to number the rows for each value of match. Then you can use full outer join to fill up rows per Match. For example:
declare #A table (KeyA int, match int)
insert #A values (1,1), (2,1), (3,1), (4,2), (5,2), (6,2)
declare #B table (KeyB int, match int)
insert #B values (1,1), (2,1), (3,2)
select *
from (
select row_number() over (partition by match order by KeyA) as rn
, *
from #A
) as A
full outer join
(
select row_number() over (partition by match order by KeyB) as rn
, *
from #B
) as B
on A.match = B.match
and A.rn = B.rn
Working code at SE Data.

declare #TableA table(ID int, Name varchar(10))
declare #TableB table(ID int, Name varchar(10))
insert into #TableA values(1, 'a'), (1, 'b'), (1, 'c')
insert into #TableB values (1, 'A'), (1, 'B')
insert into #TableA values(2, 'a'), (2, 'b')
insert into #TableB values (2, 'A'), (2, 'B'), (2, 'C')
;with A as
(
select *,
row_number() over(partition by ID order by Name) as rn
from #TableA
),
B as
(
select *,
row_number() over(partition by ID order by Name) as rn
from #TableB
)
select A.ID as AID,
A.Name as AName,
B.ID as BID,
B.Name as BName
from A
full outer join B
on A.ID = B.ID and
A.rn = B.rn
Result:
AID AName BID BName
----------- ---------- ----------- ----------
1 a 1 A
1 b 1 B
1 c NULL NULL
2 a 2 A
2 b 2 B
NULL NULL 2 C

SELECT
ar.Match
COALESCE(ar.RowN, br.RowN) AS RowNumber
ar.KeyA
br.KeyB
FROM
( SELECT KeyA
, Match
, ROW_NUMBER() OVER(PARTITION BY Match) AS RowN
) AS ar
LEFT JOIN --- or FULL JOIN
( SELECT KeyB
, Match
, ROW_NUMBER() OVER(PARTITION BY Match) AS RowN
) AS br
ON br.Match = ar.Match
AND br.RowN = ar.RowN

I think what you are looking for is called a Cross Join, or Cartesian Product.
http://www.sqlguides.com/sql_cross_join.php
edit - Hm now actually I'm not so sure.

As far as I can understand, what you are looking for is a FULL JOIN, or also called CROSS JOIN.
Check out this link. It has good explanation of all types of joins:
http://www.w3schools.com/sql/sql_join.asp