Return Regex Matches from a sql query - sql-server

I have a table that stores html templates which contain markup with placeholders in key locations, something like this ...
<div>
<div>{FirstName}</div>
<div>{LastName}</div>
</div>
I want to write a query that returns from the table all of the placeholders used from all rows.
SELECT Template
FROM MyTable
WHERE ????
So for the above example the result I want is ...
{FirstName}
{LastName}
I have seen people using regex in SQL but can't figure out how to only return the matches and not the whole column value.
It's also worth noting that I want a result per match ideally but if I got a comma separated list per row that matched or something that would do.

I would approach this using a numbers table, which are very useful anyway, so if you don't have one, I would consider creating one, but for the sake of a complete answer I will assume you don't have one and can't create one. In such scenarios you can generate a list of numbers on the fly quite easily using:
WITH N1 AS (SELECT N FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) n (N)),
N2 (N) AS (SELECT 1 FROM N1 AS N1 CROSS JOIN N1 AS N2),
N3 (N) AS (SELECT 1 FROM N2 AS N1 CROSS JOIN N2 AS N2),
--N4 (N) AS (SELECT 1 FROM N3 AS N1 CROSS JOIN N3 AS N2)
Numbers (Number) AS (SELECT ROW_NUMBER() OVER(ORDER BY N) FROM N3)
SELECT Number
FROM Numbers;
This starts with a table of 10 rows created with a table value constructor (N1), it then joins this table with itself to get a table of 100 rows (N2), then joins N2 to itself to get 10,000 rows (N3), this can be repeated as required, before finally using ROW_NUMBER() to get a sequential number in each row. Aaron Bertrand has done a pretty comprehensive series on generating a set or sequence without loops, and this method comes out on top (as a method of creating the table on the fly).
Once you have this numbers table you can join it to your template to find the position of each "{" using SUBSTRING:
SELECT t.Template,
StartPosition = n.Number
FROM dbo.T
INNER JOIN Numbers n
ON SUBSTRING(t.Template, n.Number, 1) = '{';
With your example this will return 16, and 43. Then you can use CHARINDEX to find the "}" that follows each "{":
SELECT t.Template,
StartPosition = n.Number,
EndPosition = CHARINDEX('}', t.template, n.Number) + 1
FROM dbo.T
INNER JOIN Numbers n
ON SUBSTRING(t.Template, n.Number, 1) = '{';
Then you can use SUBSTRING again to extract the term between each start and end position. So a full working example would be:
DECLARE #T TABLE (Template NVARCHAR(MAX));
INSERT #T (Template)
VALUES ('<div>
<div>{FirstName}</div>
<div>{LastName}</div>
</div>');
WITH N1 AS (SELECT N FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) n (N)),
N2 (N) AS (SELECT 1 FROM N1 AS N1 CROSS JOIN N1 AS N2),
N3 (N) AS (SELECT 1 FROM N2 AS N1 CROSS JOIN N2 AS N2),
--N4 (N) AS (SELECT 1 FROM N3 AS N1 CROSS JOIN N3 AS N2)
Numbers (Number) AS (SELECT ROW_NUMBER() OVER(ORDER BY N) FROM N3)
SELECT t.Template,
StartPosition = n.Number,
EndPosition = CHARINDEX('}', t.template, n.Number) + 1,
Term = SUBSTRING(t.template, n.Number, CHARINDEX('}', t.template, n.Number) + 1 - n.Number)
FROM #T t
INNER JOIN Numbers n
ON SUBSTRING(t.Template, n.Number, 1) = '{';

See this:
CREATE TABLE #temp(id int identity(1,1), template nvarchar(max))
INSERT INTO #temp(template)
SELECT REPLICATE(N'<div>
<div>{FirstName}</div>
<div>{LastName}</div>
</div>',1000)
;WITH cte AS(
SELECT id,
SUBSTRING(template,CHARINDEX(N'{',template),CHARINDEX(N'}',template)-CHARINDEX(N'{',template)+1) as match,
SUBSTRING(template,CHARINDEX(N'}',template)+1,LEN(template)) as templateRest
FROM #temp
UNION ALL
SELECT id,
SUBSTRING(templateRest,CHARINDEX(N'{',templateRest),CHARINDEX(N'}',templateRest)-CHARINDEX(N'{',templateRest)+1) as match,
SUBSTRING(templateRest,CHARINDEX(N'}',templateRest)+1,LEN(templateRest)) as templateRest
FROM cte
WHERE templateRest LIKE N'%}%'
)
SELECT t.id, t.template, c.match
-- Only distinctive:
-- SELECT DISTINCT t.id, t.template c.match
FROM cte AS c
INNER JOIN #temp AS t
ON c.id = t.id
OPTION(MAXRECURSION 1000) -- if needed, this value could still be raised
DROP TABLE #temp
GO
You can filter it for the template and retrieve all matches.

Related

Join tables but allow use of records once only

CREATE TABLE #A (UpperLimit NUMERIC(4))
CREATE TABLE #B (Id NUMERIC(4), Amount NUMERIC(4))
INSERT INTO #A VALUES
(1000), (2000), (3000)
INSERT INTO #B VALUES
(1, 3100),
(2, 1900),
(3, 1800),
(4, 1700),
(5, 900),
(6, 800)
Given these 2 tables, I want to join Table A to B ON B.Amount < A.UpperLimit but each record from Table B can only be used once, so the desired output would be:
I could easily do this by plopping Table B's records into a temp table, cursor over table A taking top record < UpperLimit and Deleting that record from the temp table or some other programmatic solution, but I'd like to avoid that and I'm pretty sure this could be done with a "normal" (recursive CTE? Partition?) query.
You could achieve your desired output using below recursive CTE
WITH
DATA AS
(
SELECT * FROM #A A1 INNER JOIN #B B1 ON A1.UpperLimit >= B1.Amount
),
MA AS
(
SELECT MIN(UpperLimit) AS MinLimit, MAX(UpperLimit) AS MaxLimit FROM #A
),
RESULT AS
(
-- Get the first record corresponding with maximum upper limit
SELECT *
FROM DATA D1
WHERE NOT EXISTS
(SELECT 1
FROM DATA D2
WHERE D2.UpperLimit = D1.UpperLimit AND D2.Amount > D1.Amount)
AND D1.UpperLimit = (SELECT MaxLimit FROM MA)
-- Recursive get remain record corresponding with other upper limit
UNION ALL
SELECT D1.*
FROM RESULT R1 INNER JOIN DATA D1
ON (R1.UpperLimit > D1.UpperLimit AND R1.Id != D1.Id)
WHERE D1.UpperLimit >= (SELECT MinLimit FROM MA)
AND NOT EXISTS
(SELECT 1
FROM DATA D2
WHERE D2.UpperLimit = D1.UpperLimit AND D2.Amount > D1.Amount AND D2.Id != R1.Id)
)
SELECT DISTINCT * FROM RESULT ORDER BY UpperLimit DESC;
Demo: https://dbfiddle.uk/Y-m0K6Mk
Might be a bit lengthy but hopefully clear enough.
with a as
(select -- order and number rows in table A in some way
row_number() over (order by UpperLimit) as RnA,
*
from #a),
b as
(select -- order and number rows in table B in the same way
row_number() over (order by Amount) as RnB,
*
from #b),
m as
(select -- get and number all possible pairs of values from both tables considering the restriction
row_number() over (order by a.UpperLimit desc, b.Amount desc) as RnM,
*
from a
join b on
b.Amount < a.UpperLimit),
r as
(select -- use recursion to get all possible combinations of the value pairs with metrics of interest for comparison
convert(varchar(max), RnA) as ListA,
convert(varchar(max), RnB) as ListB,
RnA,
RnB,
1 as CountB,
convert(int, Amount) as SumB
from m
where RnM = 1
union all
select
r.ListA + ' ' + convert(varchar(max), m.RnA),
r.ListB + ' ' + convert(varchar(max), m.RnB),
m.RnA,
m.RnB,
r.CountB + 1,
r.SumB + convert(int, m.Amount)
from m
join r on
m.RnA < r.RnA and
m.RnB < r.RnB),
e as
(select top(1) -- select combinations of interest using metrics
ListA,
ListB
from r
order by CountB desc, SumB desc),
ea as
(select -- turn id list into table for table A
ea.Rn,
ea.Value
from e
cross apply(select row_number() over (order by (select null)) as Rn, Value from string_split(e.ListA, ' ')) as ea),
eb as
(select -- turn id list into table for table B
eb.Rn,
eb.Value
from e
cross apply(select row_number() over (order by (select null)) as Rn, Value from string_split(e.ListB, ' ')) as eb)
select -- get output table with actual values from the original tables
a.UpperLimit,
b.Amount,
b.Id
from ea
join eb on
ea.Rn = eb.Rn
join a on
ea.Value = a.RnA
join b on
eb.Value = b.RnB;
You can use an APPLY with a TOP 1 for this. Each row in the outer table gets only one row from the APPLY.
SELECT
*
FROM #A a
OUTER APPLY (
SELECT TOP (1) *
FROM #B b
WHERE b.Amount < a.UpperLimit
) b;
To simulate an inner-join (rather than a left-join) use CROSS APPLY.
This query returns very close to desired outcome.
WITH CTE AS (SELECT B.*,
ROW_NUMBER() OVER (PARTITION BY B.Value ORDER BY B.Value DESC) AS RowNum
FROM #B B),
cc as (SELECT A.Limit, CTE.*
FROM #A A
LEFT JOIN CTE ON CTE.Value < A.Limit AND CTE.RowNum = 1),
cc2 as (select *, MAX(Value) OVER ( PARTITION BY cc.Limit) as l1 from cc)
select Limit, ID, Value
from cc2
where Value = l1
This query use 3 Common Table Expressions. First sort Table B with ROW_NUMBER() function and PARTITION BY clause, second one JOIN Table A with Table B with the condition given and the third one filters the record that is in Limit on Table A and use the Limit only once.

merge two array column and keep the distinct in BigQuery

I have a query output like that. I want to have N3 column is the distinct merge of N1 and N2.
Query output example
My current query is this:
SELECT rd.cId
, rd.dId
,ARRAY_AGG(ei.IncentiveName IGNORE NULLS) AS N1
,ARRAY_AGG(ai.IncentiveName IGNORE NULLS) AS N2
FROM rd
join ei on...
join ai on ...
GROUP BY rd.cId, rd.dId
We found the problem, in many of the records, there was null value in 2nd column which was returning overall outcome as null.
Fixed that by checking with help of IFNull(N1, []) which solved the problem.
SELECT rd.cId
, rd.dId
ARRAY_CONCAT(
IFNULL(ARRAY_AGG(DISTINCT ei.IncentiveName IGNORE NULLS),[]),
IFNULL(ARRAY_AGG(DISTINCT ai.IncentiveName IGNORE NULLS),[])
)
FROM rd
join ei on...
join ai on ...
GROUP BY rd.cId, rd.dId
You can merge both arrays with array_concat. Then unnest the array and regroup to an arrray, but keep only distinct elements.
with tbl as
(select 1 id, [1,2,2,3] N1, [1,5,7,8] N2
union all select 2 ,[],[5,5,5,6])
select *,
array_concat(N1,N2),
(select array_agg(distinct x) from unnest(array_concat(N1,N2))x)
from tbl
In case you want to group by id column:
with tbl as
(select 1 id, [1,2,2,3] N1, [1,5,7,8] N2
union all select 2 ,[],[5,5,5,6]
union all select 2 ,[7,8],[9]),
helper as (
select id, array_agg(N1_) N1, array_agg(N2_) N2
from tbl,unnest(N1) N1_,unnest(N2) N2_
group by 1
)
select *,
#array_concat(N1,N2),
(select array_agg(distinct x) from unnest(array_concat(N1,N2))x)
from helper
Consider below
with your_current_query as (
SELECT rd.cId
, rd.dId
,ARRAY_AGG(ei.IncentiveName IGNORE NULLS) AS N1
,ARRAY_AGG(ai.IncentiveName IGNORE NULLS) AS N2
FROM rd
join ei on...
join ai on ...
GROUP BY rd.cId, rd.dId
)
select *, array(
select * from t.N1 union distinct
select * from t.N2
) as N3
from your_current_query t

How to achieve increment in SELECT

I need to show a column with existing table records or rows, That column value need to be in a format as below
PC/01-10-2019/0000-000001
Where as PC is a static text, 01-10-2019 is the date value we receive as a input parameter for the procedure and 0000-000001 need to be auto generated increment value.
Example :
PC/01-10-2019/0000-000001
PC/01-10-2019/0000-000002
PC/01-10-2019/0000-000003
PC/01-10-2019/0000-000004
How to achieve this in SQL Server?
Try this:
WITH N1 AS (SELECT N = NULL UNION ALL SELECT N = NULL), -- 2
N2 AS (SELECT N = NULL FROM N1 CROSS JOIN N1 AS N), -- 4
N3 AS (SELECT N = NULL FROM N2 CROSS JOIN N2 AS N), -- 16
N4 AS (SELECT N = NULL FROM N3 CROSS JOIN N3 AS N), -- 256
N5 AS (SELECT N = NULL FROM N4 CROSS JOIN N4 AS N), -- 65536
Numbers AS (SELECT Number = ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM N5)
SELECT 'PC/01-10-2019/0000-' + FORMAT(number, 'd:000000', 'en-US')
FROM Numbers

TSQL USE ROW_NUMBER IN CTE, SELECT ORDER BY NOT WORKING

I preapare string from row of nubmers. When I use the row_number function, the order by clause not working
DECLARE #text VARCHAR(MAX)
IF OBJECT_ID('tempdb..#numbers') IS NOT NULL DROP TABLE #numbers
SELECT CAST(ROW_NUMBER() OVER (ORDER BY name) AS INT) AS number INTO #numbers FROM master..spt_values
SET #text = ''
;WITH
numbers (number)
AS
(
SELECT CAST(ROW_NUMBER() OVER (ORDER BY name) AS INT) AS number FROM master..spt_values
),
a
AS
(
SELECT number FROM numbers WHERE number < 10
),
b
AS
(
SELECT number FROM numbers WHERE number < 10
)
SELECT #text = #text + LTRIM(STR(a.number*b.number))
FROM a
CROSS JOIN b
ORDER BY a.number, b.number DESC
SELECT #text
result "9"
SET #text = ''
;WITH
numbers (number)
AS
(
SELECT number FROM #numbers
),
a
AS
(
SELECT number FROM numbers WHERE number < 10
),
b
AS
(
SELECT number FROM numbers WHERE number < 10
)
SELECT #text = #text + LTRIM(STR(a.number*b.number))
FROM a
CROSS JOIN b
ORDER BY a.number, b.number DESC
SELECT #text
result "9876543211816141210864227242118151296336322824201612844540353025201510554484236302418126635649423528211477264564840322416881726354453627189"
Where is diference ?
I expect this is related to this issue, in summary when you use variable concatenation, e.g.
SELECT #Variable = #Variable + someField
FROM Table
ORDER BY AnotherField;
The results are dependant on physical implementation and internal access paths. I am currently struggling to find benchmark tests on the internet, but I think the fastest, reliable approach in SQL Server is to use XML extensions to concatenate rows to columns:
WITH Numbers AS (SELECT * FROM (VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9)) t (Number))
SELECT [Text] = (SELECT LTRIM(STR(a.number*b.number))
FROM Numbers AS A
CROSS JOIN Numbers AS B
ORDER BY A.Number, b.Number DESC
FOR XML PATH(''), TYPE).value('.', 'VARCHAR(MAX)');
N.B. I have also removed the reference to master..spt_values and replaced with a table value constructor - this just adds unnecessary reads to generate a sequence from 1 to 9.
If you need more numbers for your sequence I would still not use system tables, use Iztik Ben-Gan's stacked CTE approach, as described in this article:
DECLARE #Numbers INT = 100000;
WITH N1 AS (SELECT N FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t (N)),
N2 (N) AS (SELECT 1 FROM N1 AS N1 CROSS JOIN N1 AS N2),
N3 (N) AS (SELECT 1 FROM N2 AS N1 CROSS JOIN N2 AS N2),
N4 (N) AS (SELECT 1 FROM N3 AS N1 CROSS JOIN N3 AS N2),
Numbers (Number) AS (SELECT TOP (#Numbers) ROW_NUMBER() OVER(ORDER BY N) FROM N4)
SELECT Number
FROM Numbers;
Do not use cast on ROW_NUMBER(). This will return same as your second query:
DECLARE #text VARCHAR(MAX) = ''
;WITH
numbers (number)
AS
(
SELECT ROW_NUMBER() OVER (ORDER BY name) AS number FROM master..spt_values
),
a
AS
(
SELECT number FROM numbers WHERE number < 10
),
b
AS
(
SELECT number FROM numbers WHERE number < 10
)
SELECT #text = #text + LTRIM(STR(a.number*b.number))
FROM a
CROSS JOIN b
ORDER BY a.number, b.number DESC
Also don't define twice the same in CTE use aliases instead:
DECLARE #text VARCHAR(MAX) = ''
;WITH
numbers (number)
AS
(
SELECT ROW_NUMBER() OVER (ORDER BY name) AS number FROM master..spt_values
),
a
AS
(
SELECT number FROM numbers WHERE number < 10
)
SELECT #text = #text + LTRIM(STR(a.number*b.number))
FROM a AS a
CROSS JOIN a AS b
ORDER BY a.number, b.number DESC
SELECT #text

Insert row for each integer between 0 and <value> without cursor

I have a source table with id and count.
id count
a 5
b 2
c 31
I need to populate a destination table with each integer up to the count for each id.
id value
a 1
a 2
a 3
a 4
a 5
b 1
b 2
c 1
c 2
etc...
My current solution is like so:
INSERT INTO destination (id,value)
source.id
sequence.number
FROM
(VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9)) AS sequence(number)
INNER JOIN
source ON sequence.number <= source.count
This solution has an upper limit and is plain lame. Is there anyway to replace the sequence with a set of all integers? Or another solution that does not use looping.
this should work:
WITH r AS (
SELECT id, count, 1 AS n FROM SourceTable
UNION ALL
SELECT id, count, n+1 FROM r WHERE n<count
)
SELECT id,n FROM r
order by id,n
OPTION (MAXRECURSION 0)
Unfortunately, there is not set of all integers in SQL Server. However, using a little trickery, you can easily generate such a set:
select N from (
select ROW_NUMBER() OVER (ORDER BY t1.object_id) AS N
from sys.all_objects t1, sys.all_objects t2
) AS numbers
where N between 1 and 1000000
will generate a set of all numbers from 1 through 1000000. If you need more than a few million numbers, add sys.all_objects to the cross join a third time.
You can find many examples in this page:
DECLARE #table TABLE (ID VARCHAR(1), counter INT)
INSERT INTO #table SELECT 'a', 5
INSERT INTO #table SELECT 'b', 3
INSERT INTO #table SELECT 'c', 31
;WITH cte (ID, counter) AS (
SELECT id, 1
FROM #table
UNION ALL
SELECT c.id, c.counter +1
FROM cte AS c
INNER JOIN #table AS t
ON t.id = c.id
WHERE c.counter + 1 <= t.counter
)
SELECT *
FROM cte
ORDER BY ID, Counter

Resources