Split String using XML in SQL Server - sql-server

Question: how to split below string using XML?
Input:
'7-VPN Connectivity 7.8 - Ready to Elixir Connector install 9-Unified Installation'
Expected output:
7-VPN Connectivity
7.8 - Ready to Elixir Connector install
9-Unified Installation
My code:
DECLARE #xml AS XML,
#str AS VARCHAR(100)
SET #str = '7-VPN Connectivity 7.8 - Ready to Elixir Connector install 9-Unified Installation'
SET #xml = CAST(('<X>'+replace(#str,' ','</X><X>')+'</X>') AS XML)
SELECT
N.value('.', 'VARCHAR(10)') AS value
FROM
#xml.nodes('X') AS T(N)

--Provide the comma From Where you wan't to split The Data
-- For Eg:
BEGIN TRAN
DECLARE #S varchar(max),
#Split char(1),
#X xml
SELECT #S = '7-VPN Connectivity ,7.8- Ready to Elixir Connector install, 9-Unified Installation',
#Split = ','
SELECT #X = CONVERT(xml,' <root> <s>' + REPLACE(#S,#Split,'</s> <s>') + '</s> </root> ')
SELECT [Value] = T.c.value('.','varchar(255)')
FROM #X.nodes('/root/s') T(c)
ROLLBACK TRAN

This is a horrible design! If there is the slightest chance to fix this you should change this the sooner the better...
You might try something like this, but use it only to clean up that mess!
DECLARE #YourString VARCHAR(100)='7-VPN Connectivity 7.8 - Ready to Elixir Connector install 9-Unified Installation';
WITH CutAtHyphen(Nr,part) AS
(
SELECT ROW_NUMBER() OVER(ORDER BY (SELECT NULL))
,LTRIM(RTRIM(A.part.value('text()[1]','nvarchar(max)')))
FROM
(
SELECT CAST('<x>' + REPLACE((SELECT #YourString AS [*] FOR XML PATH('')),'-','</x><x>') + '</x>' AS XML) AS Casted
) AS t
CROSS APPLY t.Casted.nodes('/x') AS A(part)
)
,CutOffFinal AS
(
SELECT Nr
,part
,LEFT(part,LEN(part)-PositionOf.LastBlank) AS Remainder
,CASE WHEN Nr>1 THEN RIGHT(part,PositionOf.LastBlank) ELSE part END AS Tail
FROM CutAtHyphen
OUTER APPLY (SELECT CHARINDEX(' ',REVERSE(part))) AS PositionOf(LastBlank)
)
,recCTE AS
(
SELECT Nr, CAST(N'' AS NVARCHAR(MAX)) AS String,Tail FROM CutOffFinal WHERE Nr=1
UNION ALL
SELECT cof.Nr
,r.Tail + '-' + cof.Remainder
,cof.Tail
FROM recCTE AS r
INNER JOIN CutOffFinal AS cof ON cof.Nr=r.Nr+1
)
SELECT String + CASE WHEN Nr=(SELECT MAX(Nr) FROM CutOffFinal) THEN Tail ELSE '' END AS FinalString
FROM recCTE
WHERE Nr>1;
This code will first of all cut the string at the hyphens and trim it. The it will search for the last blank and cut of the number, which belongs to the next row.
The recursive CTE will travel down the line and concatenate the tail of the previous row, with the remainder of the current.
The first and the last line need special treatment.

Related

wants string output which is in between 5th and 6th ‘/’

Input looks like
/nfs/site/disks/mtl_workdisk_003/mtlmedia/filter_bundle2846/regress/
/nfs/site/disks/mtl_workdisk_003/mtl3d/filter_bundle2846/regress/
/nfs/site/disks/mtl_workdisk_003/etl66/filter_bundle2846/regress/
Output should be
mtlmedia
mtl3d
etl66
I am able to successfully get the output but is there anyway to simplify it using some other functions or method ?
declare #T table
(
InputString varchar(max)
);
insert into #T values
('/nfs/site/ssd/mtl_workdisk_1/mediatek/filter_candle46/regression/'),
('/nfs/location/disks/mtl_workdisk_003/mtl3d/filter_candle2846/regress/'),
('/nfs/place/disks/mtl_workdisk_003/etl1266/bundle2846/regress/') ;
select -- P1.Pos, P2.Pos, P3.Pos,P4.Pos,P5.Pos,P6.Pos,
substring(InputString, P5.Pos + 1, P6.Pos - P5.Pos - 1) as OutputString
from #T
cross apply (select (charindex('/', InputString))) as P1(Pos)
cross apply (select (charindex('/', InputString, P1.Pos+1))) as P2(Pos)
cross apply (select (charindex('/', InputString, P2.Pos+1))) as P3(Pos)
cross apply (select (charindex('/', InputString, P3.Pos+1))) as P4(Pos)
cross apply (select (charindex('/', InputString, P4.Pos+1))) as P5(Pos)
cross apply (select (charindex('/', InputString, P5.Pos+1))) as P6(Pos) ;
Approaches, based on built-in XML or JSON support, are possible options:
Statement, based on XML:
SELECT CAST('<x>' + REPLACE(InputString, '/', '</x><x>') + '</x>' AS XML).value('/x[6]','varchar(max)')
FROM #t
Statement, based on JSON:
SELECT JSON_VALUE(CONCAT('["', REPLACE(InputString, '/', '","'), '"]'), '$[5]')
FROM #T
Result:
mediatek
mtl3d
etl1266
Notes:
JSON support is available from SQL Server 2016.
If you always know the root of the path and just want the first subfolder name after that, you could get a substring based on the length of the root combined with a charindex looking for the first slash after the root.
declare #paths as table(fullpath varchar(max))
insert into #paths values ('/nfs/site/disks/mtl_workdisk_003/mtlmedia/filter_bundle2846/regress/')
insert into #paths values ('/nfs/site/disks/mtl_workdisk_003/mtl3d/filter_bundle2846/regress/')
insert into #paths values ('/nfs/site/disks/mtl_workdisk_003/etl66/filter_bundle2846/regress/')
declare #root as varchar(max) = '/nfs/site/disks/mtl_workdisk_003/'
declare #startPos as int = len(#root) + 1
select substring(fullpath, #startPos, CHARINDEX('/',fullpath,#startPos) - #startPos) filename
from #paths
where CHARINDEX('/',fullpath,#startPos) > 0
I did the where clause at the end to prevent any errors if for some reason, there was no subfolder (slash) found after the root.
If you always know the ending of the path as well, you could just use a REPLACE to remove the ending and not even have to search for the "/" with the CHARINDEX.

Combine string_split column results in table SQL

I'm trying to create a stored procedure for updating a table in a batch. I want to take parameters in as a nvarchar and call string_split on them.
#ParamList1 NVARCHAR(max) = '1,2,3,4,5'
#ParamList2 NVARCHAR(max) = 'a,b,c,d,e'
I want to get a temporary table like
Param1 Param2
1 a
2 b
3 c
...
How would I do this?
Unfortunately, string_split() does not guarantee ordering or provide a position argument (Microsoft are you listening?).
So, the safest method is a recursive CTE (or perhaps another approach using XML):
with cte as (
select convert(nvarchar(max), NULL) as x1, convert(nvarchar(max), NULL) as x2, #paramlist1 as rest1, #paramlist2 as rest2, 1 as lev
union all
select convert(nvarchar(max), left(rest1, charindex(',', rest1 + ',') - 1)),
convert(nvarchar(max), left(rest2, charindex(',', rest2 + ',') - 1)),
stuff(rest1, 1, charindex(',', rest1 + ','), ''),
stuff(rest2, 1, charindex(',', rest2 + ','), ''),
lev + 1
from cte
where rest1 <> '' and rest2 <> ''
)
select *
from cte
where x1 is not null;
Here is a db<>fiddle.
You've got an answer already, which is working fine, but this should be faster and easier:
You did not specify your SQL-Server's version, but - talking about STRING_SPLIT() - I assume it's at least v2016. If this is correct, you can use OPENJSON. Your list of numbers needs nothing more than brackets to be a JSON-array ([1,2,3]), while an array of words/letters can be transformed with some easy string operations (["a","b","c"]).
Following the docs, OPENJSON returns the elements position in [key], while the element itself is returned in [value]. You can simply JOIN these sets:
DECLARE #ParamList1 NVARCHAR(max) = '1,2,3,4,5';
DECLARE #ParamList2 NVARCHAR(max) = 'a,b,c,d,e';
SELECT p1.[key] AS FragmentNr
,p1.[value] AS P1
,p2.[value] AS P2
FROM OPENJSON(CONCAT('[',#ParamList1 + ']')) p1
INNER JOIN OPENJSON(CONCAT('["',REPLACE(#ParamList2,',','","'),'"]')) p2 ON p1.[key]=p2.[key] ;
In this answer you will find some details (UPDATE section 1 and 2).

How to remove a string that left of character `;` and the contained string `U` and then display it?

I have a table and the values like this
000001U;000002;000003U;000004;000005U;000006U
and I want display the field is like
000002;000004;
Try This
DECLARE #Table AS TABLE (Data nvarchar(1000))
INSERT INTO #Table
SELECT '000001U;000002;000003U;000004;000005U;000006U'
SELECT STUFF((SELECT '; '+Data
FROM
(
SELECT Split.a.value('.','nvarchar(1000)') AS Data
FROM
(
SELECT
CAST('<S>'+REPLACE(Data,';','</S><S>') +'</S>' AS XML ) AS Data
FROM #Table
)AS A
CROSS APPLY Data.nodes('S') AS Split(a)
)dt
WHERE CHARINDEX('U',Data)=0 FOR XML PATH('')),1,1,'') AS Data
Result
Data
---------
000002; 000004
As mentioned in the comments, SQL Server does not have any native regex replacement support. But, if you can get a dump of your entire table/column, then you can easily do a regex replacement in another tool, such as Notepad++.
Do a find on this pattern:
[0-9]+U;?
And then just replace with empty string. This should leave each row with the data you want to see. Here is a demo showing that this works in Java.
Demo
for SQL Server 2016 and later.
select stuff (
(select ',' + value
from STRING_SPLIT ('000001U;000002;000003U;000004;000005U;000006U', ';')
where right(value, 1) <> 'U'
for xml path('')),
1, 1, '')
for earlier version, you may use any CSV Spliter like this from Jeff Moden http://www.sqlservercentral.com/articles/Tally+Table/72993/
Simple way is to determine the value by IsNumeric function.
DECLARE #GIVEN VARCHAR(MAX)='000001U;000002;000003U;000004;000005U;000006U';
DECLARE #FINAL VARCHAR(MAX)='';
SELECT #FINAL =#FINAL+ case when ISNUMERIC(val)=1 then val+';' else '' end FROM (
SELECT split.x.value('.','varchar(max)') VAL FROM(
SELECT CAST('<M>'+REPLACE(#GIVEN,';','</M><M>')+'</M>' AS XML) AS VAL
)A
CROSS APPLY a.VAL.nodes('/M') as split(x)
)AA
PRINT #FINAL
Result: 000002;000004;

select a particular string from a semi-colon delimited list [duplicate]

This question already has answers here:
Using T-SQL, return nth delimited element from a string
(14 answers)
Closed 6 years ago.
I want to extract a string which has semi-colon as a delimiter. I tried using Substring, Charindex and Left function. But I'm not able to get the desired result. Below is my select statement. Output result must be "Unsure how to perform task. Meter read 10 in office before testing". Thanks
Declare #string Varchar(max)='Sampling:45;Traveling:30;CalibratedNo;uncalibratedReason:: ' +
'Unsure how to perform task. Meter read 10 in office before ' +
'testing.;pH1:6.5;pH2:6.5;Dis.Oxygen1:7.4'
Select SubString(#string, (CHARINDEX('uncalibratedReason:', #string, 0) + 19),
(CharIndex('uncalibratedReason:', LEFT(#string, (LEN(#string) -
(CharIndex(';', #string, 0)))), 0) - 0)) As New
Try it like this:
Declare #string Varchar(max) = 'Sampling:45;Traveling:30;CalibratedNo;uncalibratedReason:Unsure how to perform task. Meter read 10 in office before testing.;pH1:6.5;pH2:6.5;Dis.Oxygen1:7.4';
SELECT CAST('<x>' + REPLACE(#string,';','</x><x>') + '</x>' AS XML).value('x[4]','nvarchar(max)')
The result is:
uncalibratedReason:Unsure how to perform task. Meter read 10 in office before testing.
You can take away the leading uncalibratedReason: simply with SUBSTRING and CHARINDEX looking for : if you need this.
UPDATE
Here is the full code:
DECLARE #result NVARCHAR(MAX)=
(SELECT CAST('<x>' + REPLACE(#string,';','</x><x>') + '</x>' AS XML).value('x[4]','nvarchar(max)'));
SELECT SUBSTRING(#result,CHARINDEX(':',#result)+1,10000)
UPDATE 2: Find position by starting string
DECLARE #result NVARCHAR(MAX)=
(SELECT CAST('<x>' + REPLACE(#string,';','</x><x>') + '</x>' AS XML).value('(x[substring(.,1,string-length("uncalibratedReason:")) eq "uncalibratedReason:"])[1]','nvarchar(max)'));
SELECT SUBSTRING(#result,CHARINDEX(':',#result)+1,10000)
UPDATE 3 The ultimative solution :-)
Declare #string Varchar(max) = 'Sampling:45;Traveling:30;CalibratedNo;uncalibratedReason:Unsure how to perform task. Meter read 10 in office before testing.;pH1:6.5;pH2:6.5;Dis.Oxygen1:7.4';
WITH Casted(ThePart) AS
(
SELECT Node.value('.','nvarchar(max)')
FROM
(
SELECT CAST('<x>' + REPLACE(#string,';','</x><x>') + '</x>' AS XML)
) AS tbl(AsXML)
CROSS APPLY AsXML.nodes('/x') AS The(Node)
)
,Splitted(SpecificPart) AS
(
SELECT CAST('<x>' + REPLACE(ThePart,':','</x><x>') + '</x>' AS XML)
FROM Casted
)
SELECT SpecificPart.value('x[1]','nvarchar(max)') AS Caption
,SpecificPart.value('x[2]','nvarchar(max)') AS Data
FROM Splitted
The result
Caption Data
CalibratedNo NULL
Dis.Oxygen1 7.4
pH1 6.5
pH2 6.5
Sampling 45
Traveling 30
uncalibratedReason Unsure how to perform task. Meter read 10 in office before testing.
Shnugo anwser very cool. (UpVote)
However, this UDF Parser returns the sequence and value
Declare #string Varchar(max)='Sampling:45;Traveling:30;CalibratedNo;uncalibratedReason:: ' +
'Unsure how to perform task. Meter read 10 in office before ' +
'testing.;pH1:6.5;pH2:6.5;Dis.Oxygen1:7.4'
Select * from [dbo].[udf-Str-Parse](#String,';')
--Where Key_PS = 5
--Where Key_Value Like '%:%'
--Where Key_Value Like 'pH1%'
Returns
Key_PS Key_Value
1 Sampling:45
2 Traveling:30
3 CalibratedNo
4 uncalibratedReason:: Unsure how to perform task. Meter read 10 in office before testing.
5 pH1:6.5
6 pH2:6.5
7 Dis.Oxygen1:7.4
The UDF
CREATE FUNCTION [dbo].[udf-Str-Parse] (#String varchar(max),#Delimeter varchar(10))
--Usage: Select * from [dbo].[udf-Str-Parse]('Dog,Cat,House,Car',',')
-- Select * from [dbo].[udf-Str-Parse]('John Cappelletti was here',' ')
Returns #ReturnTable Table (Key_PS int IDENTITY(1,1), Key_Value varchar(max))
As
Begin
Declare #XML xml;Set #XML = Cast('<x>' + Replace(#String,#Delimeter,'</x><x>')+'</x>' as XML)
Insert Into #ReturnTable Select ltrim(rtrim(String.value('.', 'varchar(max)'))) FROM #XML.nodes('x') as T(String)
Return
End

SQL Server : select all after specific character

How I can select
"ALT1" if value is "W61N03D20V0-WHIH-ALT1"
"ALT2" if for "W61N03D20V0-WHIH-ALT2"
"SW" for "W61N03D20V0-WHIH-SW"
"Default" for "W61N26D1YA1-VICU" (without prefix)
"Defailt" for "W61N27D21V2-AZTD"
In other words I'm looking for a way extract last part after second suffix, but if I have't second suffix - then default
Thanks for advice
Try it like this:
First you "split" the string on its minus signs with the XML trick.
Then you read the third node from you XML - voila!
CREATE TABLE #tbl(content VARCHAR(100));
INSERT INTO #tbl VALUES('W61N03D20V0-WHIH-ALT1')
,('W61N03D20V0-WHIH-SW')
,('W61N26D1YA1-VICU');
WITH SplittedAsXml AS
(
SELECT CAST('<x>' + REPLACE(content,'-','</x><x>') + '</x>' AS XML) AS Content
FROM #tbl
)
SELECT ISNULL(Content.value('/x[3]','varchar(max)'),'default') AS TheThirdPart
FROM SplittedAsXml;
DROP TABLE #tbl;
The result
ALT1
SW
default
Going this ways would also give you the chance to get the other parts in one go just querying /x[1] and /x[2] too
I did it using the built-in substring() function:
declare #str VARCHAR(40) = 'W61N03D20V0-WHIH-ALT1' -- also works for the other examples
declare #sep VARCHAR(1) = '-'
declare #middleToEnd VARCHAR(40) = substring(#str, charindex(#sep, #str) + 1, len(#str))
declare #pos INT = charindex(#sep, #middleToEnd)
declare #lastPart VARCHAR(40) =
CASE WHEN #pos = 0
THEN 'Default'
ELSE substring(#middleToEnd, #pos + 1, len(#middleToEnd))
END
select #lastPart
For best performance, you can solve it with this one-liner(calculation is one line)
SELECT
COALESCE(STUFF(col,1,NULLIF(CHARINDEX('-',col, CHARINDEX('-',col)+1), 0),''),'Default')
FROM (values
('W61N03D20V0-WHIH-ALT1'),('W61N03D20V0-WHIH-ALT2'),
('W61N03D20V0-WHIH-SW'),('W61N26D1YA1-VICU'),
('W61N27D21V2-AZTD')) x(col)
Result:
ALT1
ALT2
SW
Default
Default
If I understand what you are asking for, the following does what you need:
-- fake table
WITH SomeTable AS (
SELECT 'W61N03D20V0-WHIH-ALT1' AS Field1
UNION ALL
SELECT 'W61N03D20V0-WHIH-SW'
UNION ALL
SELECT 'W61N26D1YA1-VICU'
)
-- select
SELECT
CASE CHARINDEX('-WHIH-', Field1)
WHEN 0 THEN 'Default'
ELSE SUBSTRING(Field1, CHARINDEX('-WHIH-', Field1) + 6, LEN(Field1) - (CHARINDEX('-WHIH-', Field1) + 5))
END
FROM SomeTable
Use can use a CASE expression to check whether the string starts with W61N03D20V0-WHIH.
If it starts with it use a combination of RIGHT, REVERSE and CHARINDEX functions to get last part from the string, else Default.
Query
select case when [your_column_name] like 'W61N03D20V0-WHIH%'
then right([your_column_name], charindex('-', reverse([your_column_name]), 1) - 1)
else 'Default' end as new_column_name
from your_table_name;
SQl Fiddle demo

Resources