I'm looking for a join expression for matching strings from two different tables which both contain the same sub-string of 4 consective characters.
For example, the following should match:
String1 String2
-------- -----------
xxjohnyy abcjohnabc [common substring: "john"]
xxjohnyy johnny [common substring: "john"]
birdsings ravenbird [common substring: "bird"]
singbird a singer [common substring: "sing"]
This problem is very similar to finding the Longest Common Substring problem. You find the Longest Common Substring and then you pick those with common strings of 4. You will definitely find this link and this link helpful for you.
This is a very good exercise. Here is my attempt using Tally Table.
SQL Fiddle
;WITH E1(N) AS(
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
),
E2(N) AS(SELECT 1 FROM E1 a CROSS JOIN E1 b),
E4(N) AS(SELECT 1 FROM E2 a CROSS JOIN E2 b),
E8(N) AS(SELECT 1 FROM E4 a CROSS JOIN E4 b),
Tally(N) AS(
SELECT TOP (
SELECT
CASE
WHEN MAX(LEN(String1)) > MAX(LEN(String2)) THEN MAX(LEN(String1))
ELSE MAX(LEN(String2))
END
FROM TestTable
)
ROW_NUMBER() OVER(ORDER BY (SELECT NULL))
FROM E8
),
CteTable AS( -- Added an ID to uniquely identify each row
SELECT *, Id = ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) FROM TestTable
),
CteSubStr1 AS(
SELECT
ct.*,
substr = SUBSTRING(ct.String1, t.N, 4)
FROM CteTable ct
CROSS APPLY(
SELECT N FROM Tally
WHERE N <= LEN(ct.String1) - 3
)t
),
CteSubStr2 AS(
SELECT
ct.*,
substr = SUBSTRING(ct.String2, t.N, 4)
FROM CteTable ct
CROSS APPLY(
SELECT N FROM Tally
WHERE N <= LEN(ct.String2) - 3
)t
),
CteCommon AS(
SELECT * FROM CteSubStr1 c1
WHERE EXISTS(
SELECT 1 FROM CteSubStr2
WHERE
Id = c1.Id
AND substr = c1.substr
)
)
SELECT
String1, String2, substr
FROM (
SELECT *, RN = ROW_NUMBER() OVER(PARTITION BY Id ORDER BY LEN(substr) DESC)
FROM CteCommon
)t
WHERE RN = 1
Result
| String1 | String2 | substr |
|-----------|------------|--------|
| xxjohnyy | abcjohnabc | john |
| xxjohnyy | johnny | john |
| birdsings | ravenbird | bird |
| singbird | a singer | sing |
This part looks for the longest common substring.
SELECT
String1, String2, substr
FROM (
SELECT *, RN = ROW_NUMBER() OVER(PARTITION BY Id ORDER BY LEN(substr) DESC)
FROM CteCommon
)t
WHERE RN = 1
To get all the common substrings, use this instead:
SELECT * FROM CteCommon
;with pos as(select 1 as p
union all
select p + 1 from pos where p < 100),
uni as(select *, row_number() over(order by (select null)) id from t)
select t1.s1, t1.s2, ca.s
from uni t1
cross apply(select substring(t2.s2, p, 4) s
from uni t2
cross join pos
where t1.id = t2.id and
len(substring(t2.s2, p, 4)) = 4 and
t1.s1 like '%' + substring(t2.s2, p, 4) + '%')ca
Fiddlee http://sqlfiddle.com/#!3/bd4dd/16
Just change 100 to actual length of your columns...
Related
My expected result is quit difficult to explain so here i have shown sample data.
SourceTable: (I have alphabets in HeadNo column)
HeadNo | Start | End
---------+-----------+----------
AA | AA0000 | AA9999
AB | AB0000 | AB9999
AC | AC0000 | AC9999
AD | AD0000 | AD9999
--------------------
--------------------
------- so on ------
ZZ | ZZ0000 | ZZ9999
From this source table, I want to create kind of loop result, where each HeadNo will give return 10000 result for each, starts from 0000 to 9999.
Result should look like:
HeadNo | Actual Code
---------+---------------
AA | AA0000
AA | AB0001
AA | AC0002
AA | AD0003
--------------------
--------------------
------- so on ------
AA | AA9998
AA | AA9999
like wise for each HeadNo
ZZ | ZZ0000
ZZ | ZZ0001
ZZ | ZZ0002
ZZ | ZZ0003
--------------------
--------------------
------- so on ------
ZZ | ZZ9999
I want to merge and insert into one separate single table.
IF every row requires the values 0-9999 then you simply need to CROSS JOIN to a tally table:
WITH N AS(
SELECT *
FROM (VALUES(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL)) N(N)),
Tally AS(
SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) -1 AS I
FROM N N1
CROSS JOIN N N2 --100
CROSS JOIN N N3 --1000
CROSS JOIN N N4 --10000
)
SELECT YT.HeadNo,
YT.HeadNo + RIGHT('0000' + CONVERT(varchar(4),T.I),4) AS ActualCode
FROM YourTable YT
CROSS JOIN Tally T;
If, however, you have actual start and end ranges per HeadNo (like the example below), you'll need to use a little more logic in the JOIN:
WITH VTE AS (
SELECT *
FROM (VALUES('AA','AA0000','AA9999'),
('AB','AB0000','AB5000'), --Guesssing this is more realistic
('AC','AC1000','AC8000'),
('AD','AD0000','AD0100'),
('ZZ','ZZ0000','ZZ9999')) V(HeadNo, HeadStart, HeadEnd)),
N AS(
SELECT *
FROM (VALUES(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL)) N(N)),
Tally AS(
SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) -1 AS I
FROM N N1
CROSS JOIN N N2 --100
CROSS JOIN N N3 --1000
CROSS JOIN N N4 --10000
)
SELECT V.HeadNo,
V.HeadNo + RIGHT('0000' + CONVERT(varchar(4),T.I),4) AS ActualCode
FROM VTE V
JOIN Tally T ON T.I BETWEEN STUFF(V.HeadStart,1,2,'') AND STUFF(V.HeadEnd,1,2,'')
ORDER BY V.HeadNo,
ActualCode;
The second examples assumes that HeadNo will always have the format AA0000; if it doesn't then we're missing important information that should be included in your question.
Try below code. I used recursive CTE to obtain numebrs from 0 to 9999 and then cross joined to your HeadNo column:
;with cte as (
select 0 n
union all
select n + 1 from cte
where n < 9999
)
select HeadNo, HeadNo + right('0000' + cast(n as varchar(4)), 4) from MyTable
cross join cte option (maxrecursion 0)
I need to generate combinations from the string of numbers
3,4,5,6,7 digit combinations
for example from this string
01;05;06;03;02;10;11;
here 7 numbers are there. for 3 digit 35 combinations will be there and it should be in order of order numbers in the string.
like
01;05;06;|
01;05;03;|
01;05;02;|
01;05;10;|
01;05;11;|
01;06;03;|
01;06;02;|
01;06;10;|
01;06;11;|
01;03;02;|
01;03;10;|
01;03;11;|
01;02;10;|
01;02;11;|
01;10;11;|
05;06;03;|
05;06;02;|
05;06;10;|
05;06;11;|
05;03;02;|
05;03;10;|
05;03;11;|
05;02;10;|
05;02;11;|
05;10;11;|
06;03;02;|
06;03;10;|
06;03;11;|
06;02;10;|
06;02;11;|
06;10;11;|
03;02;10;|
03;02;11;|
03;10;11;|
02;10;11;|
You can do this with two inner joins after splitting the string.
rextester: http://rextester.com/JJGKI77804
String Splitter for the test:
/* Jeff Moden's http://www.sqlservercentral.com/articles/Tally+Table/72993/ */
create function dbo.DelimitedSplitN4K (#pString nvarchar(4000), #pDelimiter nchar(1))
returns table with schemabinding as
return
with e1(n) as (
select 1 union all select 1 union all select 1 union all
select 1 union all select 1 union all select 1 union all
select 1 union all select 1 union all select 1 union all select 1
)
, e2(n) as (select 1 from e1 a, e1 b)
, e4(n) as (select 1 from e2 a, e2 b)
, cteTally(n) as (select top (isnull(datalength(#pString)/2,0))
row_number() over (order by (select null)) from e4)
, cteStart(n1) as (select 1 union all
select t.n+1 from cteTally t where substring(#pString,t.n,1) = #pDelimiter)
, ctelen(n1,l1) as(select s.n1
, isnull(nullif(charindex(#pDelimiter,#pString,s.n1),0)-s.n1,4000)
from cteStart s
)
select Itemnumber = row_number() over(order by l.n1)
, Item = substring(#pString, l.n1, l.l1)
from ctelen l;
go
the query
declare #str nvarchar(4000)= '01;05;06;03;02;10;11;';
with cte as (
select ItemNumber, Item
from dbo.DelimitedSplitN4K(#str,';')
where Item != ''
)
select combo=a.Item+';'+b.Item+';'+c.Item
from cte as a
inner join cte as b on a.ItemNumber<b.ItemNumber
inner join cte as c on b.ItemNumber<c.ItemNumber;
order by a.ItemNumber, b.ItemNumber, c.ItemNumber
ordered by ItemNumber results:
01;05;06
01;05;03
01;05;02
01;05;10
01;05;11
01;06;03
01;06;02
01;06;10
01;06;11
01;03;02
01;03;10
01;03;11
01;02;10
01;02;11
01;10;11
05;06;03
05;06;02
05;06;10
05;06;11
05;03;02
05;03;10
05;03;11
05;02;10
05;02;11
05;10;11
06;03;02
06;03;10
06;03;11
06;02;10
06;02;11
06;10;11
03;02;10
03;02;11
03;10;11
02;10;11
If you want to return a single string, pipe delimited then:
with cte as (
select ItemNumber, Item
from dbo.DelimitedSplitN4K(#str,';')
where Item != ''
)
select combo=stuff(
(select '|'+a.Item+';'+b.Item+';'+c.Item
from cte as a
inner join cte as b on a.ItemNumber<b.ItemNumber
inner join cte as c on b.ItemNumber<c.ItemNumber
order by a.ItemNumber, b.ItemNumber, c.ItemNumber
for xml path (''), type).value('.','nvarchar(max)')
,1,1,'')
results:
01;05;06|01;05;03|01;05;02|01;05;10|01;05;11|01;06;03|01;06;02|01;06;10|01;06;11|01;03;02|01;03;10|01;03;11|01;02;10|01;02;11|01;10;11|05;06;03|05;06;02|05;06;10|05;06;11|05;03;02|05;03;10|05;03;11|05;02;10|05;02;11|05;10;11|06;03;02|06;03;10|06;03;11|06;02;10|06;02;11|06;10;11|03;02;10|03;02;11|03;10;11|02;10;11
splitting strings reference:
Tally OH! An Improved SQL 8K “CSV Splitter” Function
Splitting Strings : A Follow-Up - Aaron Bertrand
Split strings the right way – or the next best way
I had nearly the same query but resulted somehow different
Please check
/*
create table Combination (id char(2))
insert into Combination values ('01'),('05'),('06'),('03'),('02'),('10'),('11')
*/
select c1.id, c2.id, c3.id, c1.id + ';' + c2.id + ';' + c3.id Combination
from Combination c1, Combination c2, Combination c3
where
c2.id between c1.id and c3.id
and c1.id <> c2.id
and c2.id <> c3.id
order by c1.id, c2.id, c3.id
The output is
I have a database table like this
A || B || C
------------------------------------------
1 ABC 10
1 XYZ 5
2 EFG 100
2 LMN 150
2 WER 50
3 ABC 50
3 XYZ 75
Now i want to have a result set like this,where i want to have the max value of column C for each value in column A
A || B || C
-----------------------------------------
1 ABC 10
2 LMN 150
3 XYZ 75
I have tried using distinct and max() but it did not work. like this
select distinct #table.A,#table.B,MAX(#table.C) from #table group by #table.A,#table.B
Is there a simple way to achieve this?
Using MAX() as a window function:
SELECT t.A, t.B, t.C
FROM
(
SELECT A, B, C, MAX(C) OVER (PARTITION BY A) max_C
FROM yourTable
) t
WHERE t.C = t.max_C
If you want to retrieve only a single max record for each group of A values, then you should use the method suggested by #GurV, which is the row number:
SELECT t.A, t.B, t.C
FROM
(
SELECT A, B, C, ROW_NUMBER() OVER (PARTITION BY A ORDER BY C, B DESC) row_num
FROM yourTable
) t
WHERE t.row_num = 1
Note carefully the ORDER BY C, B inside the call to ROW_NUMBER(). This will place max C records at the top of each partition, and will then also order descending by B values. Only one value will be retained though.
If you order by both C and B the combination of both may or may not give you the highest value of Column C. So I feel the below query should work for your specific requirement.
SELECT table.A, table.B, table.C
FROM
(
SELECT A, B, C, ROW_NUMBER() OVER (PARTITION BY A ORDER BY C DESC) row_num
FROM yourTable
) table
WHERE table.row_num = 1
You can use window function to do this:
select * from (select
t.*,
row_number() over (partition by A order by C desc) rn
from your_table t) t where rn = 1;
If those aren't supported, use JOIN:
select t1.*
from your_table t1
inner join (
select A, max(C) C
from your_table
group by A
) t2 on t1.A = t2.A
and t1.C = t2.C;
Just an another way with a simple Join and Group BY
Schema:
SELECT * INTO #TAB1 FROM (
SELECT 1 A, 'ABC' B , 10 C
UNION ALL
SELECT 1 , 'XYZ' , 5
UNION ALL
SELECT 2 , 'EFG' , 100
UNION ALL
SELECT 2 , 'LMN' , 150
UNION ALL
SELECT 2 , 'WER' , 50
UNION ALL
SELECT 3 , 'ABC' , 50
UNION ALL
SELECT 3 , 'XYZ' , 75
)A
Do join to sub query
SELECT C2.A,C1.B, C2.MC
FROM #TAB1 C1
INNER JOIN
(
SELECT A, MAX(C) MC
FROM #TAB1
GROUP BY A
)AS C2 ON C1.A=C2.A AND C1.C= C2.MC
And the result will be
+---+-----+-----+
| A | B | MC |
+---+-----+-----+
| 1 | ABC | 10 |
| 2 | LMN | 150 |
| 3 | XYZ | 75 |
+---+-----+-----+
I'd like to know how can I replace multiple text values from a string in SQL?
I have a formula that I get from a table but inside that formula there are some text values with apostrophes that I need to replace for numeric values from another table, example:
Table_Values
ID| DESC |VALUE
01 | ABC | 5
02 | DEF | 10
03 | GHI | 15
TABLE_FORMULA
ID | FORMULA
01 | X='ABC'+'DEF'+'GHI'
The basic idea is to get the same formula with a result like this:
X='5'+'10'+'15'
Any idea or example would be great. Thanks.
I don't know why your data is stored like that but here is my attempt to solve your problem.
First, you need a Pattern Splitter to parse your FORMULA. Here is one taken from Dwain Camp's article.
-- PatternSplitCM will split a string based on a pattern of the form
-- supported by LIKE and PATINDEX
--
-- Created by: Chris Morris 12-Oct-2012
CREATE FUNCTION [dbo].[PatternSplitCM]
(
#List VARCHAR(8000) = NULL
,#Pattern VARCHAR(50)
) RETURNS TABLE WITH SCHEMABINDING
AS
RETURN
WITH numbers AS (
SELECT TOP(ISNULL(DATALENGTH(#List), 0))
n = ROW_NUMBER() OVER(ORDER BY (SELECT NULL))
FROM
(VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) d (n),
(VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) e (n),
(VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) f (n),
(VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) g (n)
)
SELECT
ItemNumber = ROW_NUMBER() OVER(ORDER BY MIN(n)),
Item = SUBSTRING(#List,MIN(n),1+MAX(n)-MIN(n)),
[Matched]
FROM (
SELECT n, y.[Matched], Grouper = n - ROW_NUMBER() OVER(ORDER BY y.[Matched],n)
FROM numbers
CROSS APPLY (
SELECT [Matched] = CASE WHEN SUBSTRING(#List,n,1) LIKE #Pattern THEN 1 ELSE 0 END
) y
) d
GROUP BY [Matched], Grouper
Here is your final query. This uses a combination of string functions like CHARINDEX, LEFT, RIGHT and string concatenation using FOR XML PATH(''):
WITH Cte AS(
SELECT
f.*,
LHS = LEFT(f.FORMULA, CHARINDEX('=', f.FORMULA) - 1),
RHS = RIGHT(f.FORMULA, LEN(f.FORMULA) - CHARINDEX('=', f.FORMULA)),
s.*,
v.VALUE
FROM Table_Formula f
CROSS APPLY dbo.PatternSplitCM(RIGHT(f.FORMULA, LEN(f.FORMULA) - CHARINDEX('=', f.FORMULA)), '[+-/\*]') s
LEFT JOIN Table_Values v
ON v.[DESC] = REPLACE(s.Item, '''', '')
)
--SELECT * FROM Cte
SELECT
c.ID,
c.FORMULA,
LHS + '=' + STUFF((
SELECT ISNULL('''' + CONVERT(VARCHAR(5), VALUE) + '''', ITEM)
FROM Cte
WHERE ID = c.ID
ORDER BY ItemNumber
FOR XML PATH(''), TYPE).value('.', 'NVARCHAR(MAX)')
, 1, 0, '')
FROM Cte c
GROUP BY C.ID, c.FORMULA, c.LHS
SQL Fiddle
RESULT
| ID | FORMULA | |
|----|---------------------|-----------------|
| 1 | X='ABC'+'DEF'+'GHI' | X='5'+'10'+'15' |
How would I select c_user_id who have made back to back entries in a SQL Server 2008 database ?
Preferably people who have made more than 3 back to back entries like pras.chla#gmail.com below (sorting by c_id desc and c_id is an identity column)
c_id c_user_id c_entry
1427 xermadr.asdf#me.com 155575
1426 pras.chla#gmail.com 155829
1425 pras.chla#gmail.com 155826
1424 pras.chla#gmail.com 155828
1423 pras.chla#gmail.com 155830
1422 sdfe.qqol#gmail.com 155559
thanks again ?
One way
SELECT DISTINCT c_user_id
FROM tab t1
CROSS APPLY (SELECT 1 AS C
FROM (SELECT TOP 2 *
FROM tab t2
WHERE t2.c_id < t1.c_id
ORDER BY t2.c_id DESC) T
HAVING COUNT(c_user_id) = 2 AND COUNT(DISTINCT c_user_id) = 1 AND MIN(c_user_id) = t1.c_user_id) CA
Or another
WITH T AS
(
SELECT *,
ROW_NUMBER() OVER (order by c_id) -
ROW_NUMBER() OVER (PARTITION BY c_user_id order by c_id) AS Grp
FROM tab t1
)
SELECT DISTINCT c_user_id
FROM T
GROUP BY c_user_id, Grp
HAVING COUNT(*) >=3
;WITH someUserTableWithOrderNumber as
(
SELECT ROW_NUMBER ( ) OVER (order by c_id) OrderNumber,
c_id,
c_user_id,
c_entry
FROM someUserTable
)
SELECT DISTINCT a.c_user_id
FROM someUserTableWithOrderNumber a
JOIN someUserTableWithOrderNumber b on a.OrderNumber = b.OrderNumber + 1 AND a.c_user_id = b.c_user_id
JOIN someUserTableWithOrderNumber c on b.OrderNumber = c.OrderNumber + 1 AND b.c_user_id = c.c_user_id
JOIN someUserTableWithOrderNumber d on c.OrderNumber = d.OrderNumber + 1 AND c.c_user_id = d.c_user_id