Comma-delimited string - sql-server

Using T-SQL, I have a comma delimited string like this
'Value1,Value2,"Value3,Value4,Value5",Value6'
and I want to put it into an array like this:
Array[0] = 'Value1'
Array[1] = 'Value2'
Array[2] = 'Value3,Value4,Value5'
Array[3] = 'Value6'
Any help is appreciated! Thanks!

This is a bit convoluted method.
But it can transform the string in the format of a JSON array.
Then it'll be easier to use JSON functions on it.
The example uses a UDF fnPattern_Split that accepts a pattern to unnest the string.
The source code can be found here
declare #str varchar(max), #js varchar(max);
set #str ='Value1,Value2,"Value3,Value4,Value5",Value6';
;with cte1 as (
select *
from dbo.fnPattern_Split(#str,'"%"') ps
)
, cte2 as (
select ordinal as ord1, 0 as ord2, value
from cte1
where match = 1
union all
select c.ordinal, ps.ordinal, quotename(ps.value,'"') as value
from cte1 c
cross apply fnPattern_Split(c.value,',') ps
where c.match = 0 and ps.match = 0
)
select #js = '['+string_agg(value, ',')
within group (order by ord1, ord2)+']'
from cte2;
print(#js);
select *
from OPENJSON(#js) js;
key
value
type
0
Value1
1
1
Value2
1
2
Value3,Value4,Value5
1
3
Value6
1
Test on db<>fiddle here

Related

Check if element in list string BigQuery

I have a comma separated list in BigQuery
select '1,2,3' as number_list
I want to return true if 1 is in the list without splitting into an array then unnesting
I want to be able to say
select if(1 in split('1,2,3'),1,0)
I also want to avoid saying
select if('1,2,3' like '%,1,%' or '1,2,3' like '1,%' or '1,2,3' like '%,1',1,0)
Below example for BigQuery Standard SQL
#standardSQL
CREATE TEMP FUNCTION InList(list STRING, num INT64) AS ((
SELECT COUNTIF(num = CAST(number AS INT64)) FROM UNNEST(SPLIT(list)) number
));
WITH `project.dataset.table` AS (
SELECT '1,2,3' AS number_list UNION ALL
SELECT '2,3,4'
)
SELECT number_list, InList(number_list, 1) in_list
FROM `project.dataset.table`
with result
Row number_list in_list
1 1,2,3 1
2 2,3,4 0
I also want to avoid saying
SELECT IF('1,2,3' LIKE '%,1,%' OR '1,2,3' LIKE '1,%' OR '1,2,3' LIKE '%,1',1,0)
to avoid such redundancy you can use below version
SELECT IF(CONCAT(',', number_list, ',') LIKE CONCAT('%,1,%'), 1, 0)
... And, finally - and most likely the winner :o)
I want to be able to say select if(1 in split('1,2,3'),1,0)
The closest is
SELECT IF('1' IN UNNEST(SPLIT(number_list)), 1, 0)
You can use subquery with MAX function matching your searched value
SELECT id,
(SELECT MAX(IF(n = 1, n, null)) = 1 FROM UNNEST(number_list) AS n)
FROM (
SELECT
1 AS id,
[1,2,3] AS number_list
)
or with
SELECT id,
(SELECT MAX(IF(n = '1', n, null)) = '1' FROM UNNEST(number_list) AS n)
FROM (
SELECT
1 AS id,
SPLIT('1,2,3',',') AS number_list
)

Split a string in SQL by hyphen in 2012 version

I have multiple string in a column where I have get last string after column
Below are three example like same I have different number hyphen that can occur in a string but desired result is I have string before last hyphen
1. abc-def-Opto
2. abc-def-ijk-5C-hello-Opto
3. abc-def-ijk-4C-hi-Build
4. abc-def-ijk-4C-123-suppymanagement
Desired result set is
def
hello
hi
123
How to do this in SQL query to get this result set. I have MSSQL 2012 version
Require a generic sql which can get the result set
There are many ways to split/parse a string. ParseName() would fail because you may have more than 4 positions.
One option (just for fun), is to use a little XML.
We reverse the string
Convert into XML
Grab the second node
Reverse the desired value for the final presentation
Example
Declare #YourTable Table ([SomeCol] varchar(50))
Insert Into #YourTable Values
('abc-def-Opto')
,('abc-def-ijk-5C-hello-Opto')
,('abc-def-ijk-4C-hi-Build')
,('abc-def-ijk-4C-123-suppymanagement')
Select *
,Value = reverse(convert(xml,'<x>'+replace(reverse(SomeCol),'-','</x><x>')+'</x>').value('x[2]','varchar(150)'))
from #YourTable
Returns
SomeCol Value
abc-def-Opto def
abc-def-ijk-5C-hello-Opto hello
abc-def-ijk-4C-hi-Build hi
abc-def-ijk-4C-123-suppymanagement 123
Without getting into XML stuff, simply using string functions of sql server.
Declare #YourTable Table ([SomeCol] varchar(50))
Insert Into #YourTable Values
('abc-def-Opto')
,('abc-def-ijk-5C-hello-Opto')
,('abc-def-ijk-4C-hi-Build')
,('abc-def-ijk-4C-123-suppymanagement');
SELECT *
,RTRIM(LTRIM(REVERSE(
SUBSTRING(
SUBSTRING(REVERSE([SomeCol]) , CHARINDEX('-', REVERSE([SomeCol])) +1 , LEN([SomeCol]) )
, 1 , CHARINDEX('-', SUBSTRING(REVERSE([SomeCol]) , CHARINDEX('-', REVERSE([SomeCol])) +1 , LEN([SomeCol]) ) ) -1
)
)))
FROM #YourTable
i am not sure this script will exactly useful to your requirement but i am just trying to give an idea how to split the data
IF OBJECT_ID('tempdb..#Temp')IS NOT NULL
DROP TABLE #Temp
;WITH CTE(Id,data)
AS
(
SELECT 1,'abc-def-Opto' UNION ALL
SELECT 2,'abc-def-ijk-5C-hello-Opto' UNION ALL
SELECT 3,'abc-def-ijk-4C-hi-Build' UNION ALL
SELECT 4,'abc-def-ijk-4C-123-suppymanagement'
)
,Cte2
AS
(
SELECT Id, CASE WHEN Id=1 AND Setdata=1 THEN data
WHEN Id=2 AND Setdata=2 THEN data
WHEN Id=3 AND Setdata=3 THEN data
WHEN Id=4 AND Setdata=4 THEN data
ELSE NULL
END AS Data
FROM
(
SELECT Id,
Split.a.value('.','nvarchar(1000)') AS Data,
ROW_NUMBER()OVER(PARTITION BY id ORDER BY id) AS Setdata
FROM(
SELECT Id,
CAST('<S>'+REPLACE(data ,'-','</S><S>')+'</S>' AS XML) AS data
FROM CTE
) AS A
CROSS APPLY data.nodes('S') AS Split(a)
)dt
)
SELECT * INTO #Temp FROM Cte2
SELECT STUFF((SELECT DISTINCT ', '+ 'Set_'+CAST(Id AS VARCHAR(10))+':'+Data
FROM #Temp WHERE ISNULL(Data,'')<>'' FOR XML PATH ('')),1,1,'')
Result
Set_1:abc, Set_2:def, Set_3:ijk, Set_4:4C
You can do like
WITH CTE AS
(
SELECT 1 ID,'abc-def-Opto' Str
UNION
SELECT 2, 'abc-def-ijk-5C-hello-Opto'
UNION
SELECT 3, 'abc-def-ijk-4C-hi-Build'
UNION
SELECT 4, 'abc-def-ijk-4C-123-suppymanagement'
)
SELECT ID,
REVERSE(LEFT(REPLACE(P2, P1, ''), CHARINDEX('-', REPLACE(P2, P1, ''))-1)) Result
FROM (
SELECT LEFT(REVERSE(Str), CHARINDEX('-', REVERSE(Str))) P1,
REVERSE(Str) P2,
ID
FROM CTE
) T;
Returns:
+----+--------+
| ID | Result |
+----+--------+
| 1 | def |
| 2 | hello |
| 3 | hi |
| 4 | 123 |
+----+--------+
Demo

Get the highest conformity between two strings

Is the Levenshtein function the correct/best function to find the highest conformity between two strings?
eg:
string1 = CCC14E0APJ
string2 = CCC14E0APJ123
My end result should say that CCC14E0APJ is the master product of CCC14E0APJ123.
I can not do a exact match because some products will look like this.
CCC14E0AP
CCC14E0APJ
CCC14E0APK
which are all totally different products.
The master is always a 100% matching string for the longest found string.
For product abcde123, if there is a abcde in my master table, thats the master. If there is only abc, thats the master.
You do not need fancy How-close-is-the-string-functions, but rather compare the beginning of a string with all other strings, if they start with the same string. If so, the shorter is the parent of the longer...
With the following query you would get the ParentID, even in a hierarchical system:
DECLARE #dummy TABLE(YourID VARCHAR(100),ParentID VARCHAR(100));
INSERT INTO #dummy(YourID) VALUES
('CCC14E0AP')
,('CCC14E0APJ')
,('CCC14E0APK')
,('CCC14E0APK_1')
,('CCC14E');
WITH DependingIDs AS
(
SELECT d.ParentID
,d.YourID
,d2.YourID AS dependingID
,RANK() OVER(PARTITION BY d.YourID ORDER BY LEN(d2.YourID) DESC) AS NextLenght
FROM #dummy AS d
INNER JOIN #dummy AS d2 ON d.YourID LIKE d2.YourID + '%' AND d.YourID<>d2.YourID
)
UPDATE DependingIDs SET ParentID=dependingID
WHERE NextLenght=1;
SELECT * FROM #dummy
This is the result
YourID ParentID
CCC14E0AP CCC14E
CCC14E0APJ CCC14E0AP
CCC14E0APK CCC14E0AP
CCC14E0APK_1 CCC14E0APK
CCC14E NULL
For each row you just detect the max substring using APPLY operator:
DECLARE #t TABLE ( p VARCHAR(MAX) );
INSERT INTO #t
VALUES ( 'A' ),
( 'AAAA' ),
( 'AA' ),
( 'BBB' ),
( 'BBBB' ),
( 'BBBBB' ),
( 'BBBBB' ),
( 'C' )
SELECT *
FROM #t t
OUTER APPLY ( SELECT TOP 1 p
FROM #t
WHERE t.p <> p AND t.p LIKE p + '%'
ORDER BY LEN(p) DESC
) ca
Output:
A NULL
AAAA AA
AA A
BBB NULL
BBBB BBB
BBBBB BBBB
BBBBB BBBB
C NULL

T-SQL select value where value contains less than 3 of the declared characters

Im trying to write a select statement which returns the value if it doesnt have at least 3 of the declared characters but I cant think of how to get it working, can someone point me in the right direction?
One thing to consider, I am not allowed to create a temporary table for this exercise.
I havn't really got any SQL so far as I cant think of a way to do it without a temp table.
the declared characters are any alpha characters between a and z, so if the value in the db is '1873' then it would return the value because it doesnt have at least 3 of the declared characters, but if the value was 'abcdefg' then it would not be returned as it has at least 3 of the declared characters.
Is anyone able to point me in a starting direction for this?
This will find all sys.objects with an x or a z:
Some explanations, as this is an exercise and you want to learn something:
You can split a delimitted string by transforming it into XML. x,z comes out as <x>x</x><x>z</x>. You can use this to create a derived table.
I use a CTE to avoid a created or declared table...
You can use CROSS APPLY for row-wise actions. Here I use CHARINDEX to find the position(s) of the chars you are looking for.
If all of them are not found, there SUM is zero. I use GROUP BY and HAVING to check this.
Hope this is clear :-)
DECLARE #chars VARCHAR(100)='x,z';
WITH Splitted AS
(
SELECT A.B.value('.','char') AS TheChar
FROM
(
SELECT CAST('<x>' + REPLACE(#chars,',','</x><x>')+ '</x>' AS XML) AS AsXml
) AS tbl
CROSS APPLY AsXml.nodes('/x') AS A(B)
)
SELECT name
FROM sys.objects
CROSS APPLY (SELECT CHARINDEX(TheChar,name) AS Found FROM Splitted) AS Found
GROUP BY name,Found
HAVING SUM(Found)>0
With
SrcTab As (
Select *
From (values ('Contains x y z')
, ('Contains x and y')
, ('Contains y only')) v (SrcField)),
CharList As ( --< CTE instead of temporary table
Select *
From (values ('x')
, ('y')
, ('z')) v (c))
Select SrcField
From SrcTab, CharList
Group By SrcField
Having SUM(SIGN(CharIndex(C, SrcField))) < 3 --< Count hits
;
If Distinct is not desirable and we need to only check count for each row:
With
SrcTab As ( --< Sample Data CTE
Select *
From (values ('Contains x y z')
, ('Contains x and y')
, ('Contains y only')
, ('Contains y only')) v (SrcField))
Select SrcField
From SrcTab
Where (
Select Count(*) --< Count hits
From (Values ('x'), ('y'), ('z')) v (c)
Where CharIndex(C, SrcField) > 0
) < 3
;
Using Numbers Table and Joins..I used declared characters as only 4 for demo purposes
Input:
12345
abcdef
ab
Declared table:used only 3 for demo..
a
b
c
Output:
12345
ab
Demo:
---Table population Scripts
Create table #t
(
val varchar(20)
)
insert into #t
select '12345'
union all
select 'abcdef'
union all
select 'ab'
create table #declarecharacters
(
dc char(1)
)
insert into #declarecharacters
select 'a'
union all
select 'b'
union all
select 'c'
Query used
;with cte
as
(
select * from #t
cross apply
(
select substring(val,n,1) as strr from numbers where n<=len(val))b(outputt)
)
select val from
cte c
left join
#declarecharacters dc1
on
dc1.dc=c.outputt
group by val
having
sum(case when dc is null then 0 else 1 end ) <3

complex SQL string parsing

I have the following text field in SQL Server table:
1!1,3!0,23!0,288!0,340!0,521!0,24!0,38!0,26!0,27!0,281!0,19!0,470!0,568!0,601!0,2!1,251!0,7!2,140!0,285!0,11!2,33!0
Would like to retrieve only the part before the exclamation mark (!). So for 1!1 I only want 1, for 3!0 I only want 3, for 23!0 I only want 23.
Would also like to retrieve only the part after the exclamation mark (!). So for 1!1 I only want 1, for 3!0 I only want 0, for 23!0 I only want 0.
Both point 1 and point 2 should be inserted into separate columns of a SQL Server table.
I LOVE SQL Server's XML capabilities. It is a great way to parse data. Try this one out:
--Load the original string
DECLARE #string nvarchar(max) = '1!2,3!4,5!6,7!8,9!10';
--Turn it into XML
SET #string = REPLACE(#string,',','</SecondNumber></Pair><Pair><FirstNumber>') + '</SecondNumber></Pair>';
SET #string = '<Pair><FirstNumber>' + REPLACE(#string,'!','</FirstNumber><SecondNumber>');
--Show the new version of the string
SELECT #string AS XmlIfiedString;
--Load it into an XML variable
DECLARE #xml XML = #string;
--Now, First and Second Number from each pair...
SELECT
Pairs.Pair.value('FirstNumber[1]','nvarchar(1024)') AS FirstNumber,
Pairs.Pair.value('SecondNumber[1]','nvarchar(1024)') AS SecondNumber
FROM #xml.nodes('//*:Pair') Pairs(Pair);
The above query turned the string into XML like this:
<Pair><FirstNumber>1</FirstNumber><SecondNumber>2</SecondNumber></Pair> ...
Then parsed it to return a result like:
FirstNumber | SecondNumber
----------- | ------------
1 | 2
3 | 4
5 | 6
7 | 8
9 | 10
I completely agree with the guys complaining about this sort of data.
The fact however, is that we often don't have any control of the format of our sources.
Here's my approach...
First you need a tokeniser. This one is very efficient (probably the fastest non-CLR). Found at http://www.sqlservercentral.com/articles/Tally+Table/72993/
CREATE FUNCTION [dbo].[DelimitedSplit8K]
--===== Define I/O parameters
(#pString VARCHAR(8000), #pDelimiter CHAR(1))
--WARNING!!! DO NOT USE MAX DATA-TYPES HERE! IT WILL KILL PERFORMANCE!
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
--===== "Inline" CTE Driven "Tally Table" produces values from 1 up to 10,000...
-- enough to cover VARCHAR(8000)
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(#pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(#pString,t.N,1) = #pDelimiter
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(#pDelimiter,#pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(#pString, l.N1, l.L1)
FROM cteLen l
;
GO
Then you consume it like so...
DECLARE #Wtf VARCHAR(1000) = '1!1,3!0,23!0,288!0,340!0,521!0,24!0,38!0,26!0,27!0,281!0,19!0,470!0,568!0,601!0,2!1,251!0,7!2,140!0,285!0,11!2,33!0'
SELECT LEFT(Item, CHARINDEX('!', Item)-1)
,RIGHT(Item, CHARINDEX('!', REVERSE(Item))-1)
FROM [dbo].[DelimitedSplit8K](#Wtf, ',')
The function posted and logic for parsing can be integrated in to a single function of course.
I agree to normaliz the data is the best way. However, here is the XML solution to parse the data
DECLARE #str VARCHAR(1000) = '1!1,3!0,23!0,288!0,340!0,521!0,24!0,38!0,26!0,27!0,281!0,19!0,470!0,568!0,601!0,2!1,251!0,7!2,140!0,285!0,11!2,33!0'
,#xml XML
SET #xml = CAST('<row><col>' + REPLACE(REPLACE(#str,'!','</col><col>'),',','</col></row><row><col>') + '</col></row>' AS XML)
SELECT
line.col.value('col[1]', 'varchar(1000)') AS col1
,line.col.value('col[2]', 'varchar(1000)') AS col2
FROM #xml.nodes('/row') AS line(col)

Resources