SQL: iterating through a list that contains some ranges - sql-server

I am trying to get information for products that have an ID that is contained in a list. The problem is that the list contains some single values and some range values:
PX03 - PX069, PX20, PX202, PX25 - PX270, PX250 - PX2509, PX251, PX2511 -
PX2513
Basically what I am looking for is some way to take a list or string containing both values and ranges and the end output is a table or list that has all of the values within the ranges individually so that I can loop through them.
I have a stored procedure that loops through all the ID's in the main products table that use the 'PX' prefix, but the table has all ids (i.e. PX 1 - 9999, LX 00001 - 99999) and I only want to search through those contained in the above list. I could write it out all the id's individually but some of the ranges contain many values, which would be time consuming to go through.
My idea was to create a separate table containing this list, in which there would be three columns: an identity column, and then one column each for the beginning and end of the range. Any items that do not have a range would just have the same value for beginning and end range, i.e.:
----------------------------------
rownum | range_start | range_end|
----------------------------------
1 PX03 PX069
2 PX20 PX20
3 PX202 PX202
4 PX25 PX25
5 PX250 PX2509
and then populating a table using something like:
SELECT id from product_table
WHERE id BETWEEN listtable.range_start AND listtable.range_end
where product_table is my original table with the product id's and their information and listtable is the new table I just created. This would give me:
id|
---
PX03
PX030
PX031
PX032
PX033
.
.
.
PX067
PX068
PX069
PX20
PX202
PX25
PX250
PX251
etc.
but I am thinking I would need to iterate through the list and I am not sure how to do that. Any ideas, hints or suggestions?
UPDATE
After creating the table using the solution given by #asantaballa, it was as simple as using an inner join:
SELECT d.id
FROM product_table d
INNER JOIN #RangeTable r
ON d.id BETWEEN r.RangeFrom AND r.RangeTo

See if this works for you for the part about converting the string to a table.
Declare #StrList Varchar(1000) = 'PX03 - PX069, PX20, PX202, PX25 - PX270, PX250 - PX2509, PX251, PX2511 - PX2513'
Declare #RangeTable Table (RangeFrom VarChar(32), RangeTo VarChar(32))
Select #StrList = Replace(#StrList,' ', '') + ','
Declare #StrListItem Varchar(32)
While CHARINDEX(',', #StrList) > 0
Begin
Select #StrListItem = SUBSTRING(#StrList,1,CHARINDEX(',', #StrList) - 1)
Declare
#RangeFrom VarChar(32)
, #RangeTo VarChar(32)
If CHARINDEX('-', #StrListItem) = 0
Begin
Select
#RangeFrom = #StrListItem
, #RangeTo = #StrListItem
End
Else
Begin
Select
#RangeFrom = SUBSTRING(#StrListItem, 1, CHARINDEX('-', #StrListItem) - 1)
, #RangeTo = SUBSTRING(#StrListItem, CHARINDEX('-', #StrListItem) + 1, LEN(#StrListItem) - CHARINDEX('-', #StrListItem))
End
Insert Into #RangeTable (RangeFrom, RangeTo) Values (#RangeFrom, #RangeTo)
Select #StrList = SUBSTRING(#StrList, CHARINDEX(',', #StrList) + 1, LEN(#StrList) - CHARINDEX(',', #StrList))
End
Select * From #RangeTable

Here is your string and product_table
DECLARE #STR VARCHAR(100) = 'PX03 - PX069, PX20, PX202, PX25 - PX270, PX250 - PX2509, PX251, PX2511 - PX2513'
SELECT * INTO #product_table
FROM
(
SELECT 'PX4' PRODID
UNION ALL
SELECT 'PX26'
UNION ALL
SELECT 'PX75'
UNION ALL
SELECT 'PX77'
)TAB
Now create a table to hold the value
CREATE TABLE #listtable(ROWNUM int IDENTITY(1,1),range_start VARCHAR(100),range_end VARCHAR(100))
Now insert the splitted value to the table.
INSERT INTO #listtable
SELECT
ISNULL(PARSENAME(REPLACE(Split.a.value('.', 'VARCHAR(100)'),'-','.'),2),Split.a.value('.', 'VARCHAR(100)')) 'range_start' ,
PARSENAME(REPLACE(Split.a.value('.', 'VARCHAR(100)'),'-','.'),1) 'range_end'
FROM
(
SELECT CAST ('<M>' + REPLACE(#STR, ',', '</M><M>') + '</M>' AS XML) AS Data
) AS A
CROSS APPLY Data.nodes ('/M') AS Split(a)
Since Id is string, you need a function to extract numbers from Id(function created by God of SQL Server - Pinal Dave)
CREATE FUNCTION dbo.udf_GetNumeric
(#strAlphaNumeric VARCHAR(256))
RETURNS VARCHAR(256)
AS
BEGIN
DECLARE #intAlpha INT
SET #intAlpha = PATINDEX('%[^0-9]%', #strAlphaNumeric)
BEGIN
WHILE #intAlpha > 0
BEGIN
SET #strAlphaNumeric = STUFF(#strAlphaNumeric, #intAlpha, 1, '' )
SET #intAlpha = PATINDEX('%[^0-9]%', #strAlphaNumeric )
END
END
RETURN ISNULL(#strAlphaNumeric,0)
END
First of all keep in mind that we will not get PX1,PX2,PX3,PX4 if you give id BETWEEN listtable.range_start AND listtable.range_end because those are of varchar type and not numbers. So we need to extract numbers from each PX and get the values between them and append PX.
Here is the query which filters the IDs in product_table which are in the range between listtable
;WITH CTE AS
(
SELECT ROWNUM,CAST(dbo.udf_GetNumeric(range_start)AS INT) NUMBERS,
CAST(dbo.udf_GetNumeric(range_end)AS INT) RTO1
FROM #listtable
UNION ALL
SELECT T.ROWNUM,NUMBERS+1,RTO1
FROM #listtable T
JOIN CTE ON CTE.ROWNUM = T.ROWNUM
WHERE NUMBERS < RTO1
)
SELECT PRODID IDS--,ROWNUM,NUMBERS NUMS,'PX'+CAST(NUMBERS AS VARCHAR(10)) IDS2
FROM CTE
JOIN #product_table ON PRODID='PX'+CAST(NUMBERS AS VARCHAR(10))
ORDER BY NUMBERS
option (MaxRecursion 0)
SQL FIDDLE

Related

PIVOT the data from single column in sql server [duplicate]

I have a table like this
Value String
-------------------
1 Cleo, Smith
I want to separate the comma delimited string into two columns
Value Name Surname
-------------------
1 Cleo Smith
I need only two fixed extra columns
Your purpose can be solved using following query -
Select Value , Substring(FullName, 1,Charindex(',', FullName)-1) as Name,
Substring(FullName, Charindex(',', FullName)+1, LEN(FullName)) as Surname
from Table1
There is no readymade Split function in sql server, so we need to create user defined function.
CREATE FUNCTION Split (
#InputString VARCHAR(8000),
#Delimiter VARCHAR(50)
)
RETURNS #Items TABLE (
Item VARCHAR(8000)
)
AS
BEGIN
IF #Delimiter = ' '
BEGIN
SET #Delimiter = ','
SET #InputString = REPLACE(#InputString, ' ', #Delimiter)
END
IF (#Delimiter IS NULL OR #Delimiter = '')
SET #Delimiter = ','
--INSERT INTO #Items VALUES (#Delimiter) -- Diagnostic
--INSERT INTO #Items VALUES (#InputString) -- Diagnostic
DECLARE #Item VARCHAR(8000)
DECLARE #ItemList VARCHAR(8000)
DECLARE #DelimIndex INT
SET #ItemList = #InputString
SET #DelimIndex = CHARINDEX(#Delimiter, #ItemList, 0)
WHILE (#DelimIndex != 0)
BEGIN
SET #Item = SUBSTRING(#ItemList, 0, #DelimIndex)
INSERT INTO #Items VALUES (#Item)
-- Set #ItemList = #ItemList minus one less item
SET #ItemList = SUBSTRING(#ItemList, #DelimIndex+1, LEN(#ItemList)-#DelimIndex)
SET #DelimIndex = CHARINDEX(#Delimiter, #ItemList, 0)
END -- End WHILE
IF #Item IS NOT NULL -- At least one delimiter was encountered in #InputString
BEGIN
SET #Item = #ItemList
INSERT INTO #Items VALUES (#Item)
END
-- No delimiters were encountered in #InputString, so just return #InputString
ELSE INSERT INTO #Items VALUES (#InputString)
RETURN
END -- End Function
GO
---- Set Permissions
--GRANT SELECT ON Split TO UserRole1
--GRANT SELECT ON Split TO UserRole2
--GO
;WITH Split_Names (Value,Name, xmlname)
AS
(
SELECT Value,
Name,
CONVERT(XML,'<Names><name>'
+ REPLACE(Name,',', '</name><name>') + '</name></Names>') AS xmlname
FROM tblnames
)
SELECT Value,
xmlname.value('/Names[1]/name[1]','varchar(100)') AS Name,
xmlname.value('/Names[1]/name[2]','varchar(100)') AS Surname
FROM Split_Names
and also check the link below for reference
http://jahaines.blogspot.in/2009/06/converting-delimited-string-of-values.html
xml based answer is simple and clean
refer this
DECLARE #S varchar(max),
#Split char(1),
#X xml
SELECT #S = 'ab,cd,ef,gh,ij',
#Split = ','
SELECT #X = CONVERT(xml,' <root> <myvalue>' +
REPLACE(#S,#Split,'</myvalue> <myvalue>') + '</myvalue> </root> ')
SELECT T.c.value('.','varchar(20)'), --retrieve ALL values at once
T.c.value('(/root/myvalue)[1]','VARCHAR(20)') , --retrieve index 1 only, which is the 'ab'
T.c.value('(/root/myvalue)[2]','VARCHAR(20)')
FROM #X.nodes('/root/myvalue') T(c)
I think this is cool
SELECT value,
PARSENAME(REPLACE(String,',','.'),2) 'Name' ,
PARSENAME(REPLACE(String,',','.'),1) 'Surname'
FROM table WITH (NOLOCK)
With CROSS APPLY
select ParsedData.*
from MyTable mt
cross apply ( select str = mt.String + ',,' ) f1
cross apply ( select p1 = charindex( ',', str ) ) ap1
cross apply ( select p2 = charindex( ',', str, p1 + 1 ) ) ap2
cross apply ( select Nmame = substring( str, 1, p1-1 )
, Surname = substring( str, p1+1, p2-p1-1 )
) ParsedData
There are multiple ways to solve this and many different ways have been proposed already. Simplest would be to use LEFT / SUBSTRING and other string functions to achieve the desired result.
Sample Data
DECLARE #tbl1 TABLE (Value INT,String VARCHAR(MAX))
INSERT INTO #tbl1 VALUES(1,'Cleo, Smith');
INSERT INTO #tbl1 VALUES(2,'John, Mathew');
Using String Functions like LEFT
SELECT
Value,
LEFT(String,CHARINDEX(',',String)-1) as Fname,
LTRIM(RIGHT(String,LEN(String) - CHARINDEX(',',String) )) AS Lname
FROM #tbl1
This approach fails if there are more 2 items in a String.
In such a scenario, we can use a splitter and then use PIVOT or convert the string into an XML and use .nodes to get string items. XML based solution have been detailed out by aads and bvr in their solution.
The answers for this question which use splitter, all use WHILE which is inefficient for splitting. Check this performance comparison. One of the best splitters around is DelimitedSplit8K, created by Jeff Moden. You can read more about it here
Splitter with PIVOT
DECLARE #tbl1 TABLE (Value INT,String VARCHAR(MAX))
INSERT INTO #tbl1 VALUES(1,'Cleo, Smith');
INSERT INTO #tbl1 VALUES(2,'John, Mathew');
SELECT t3.Value,[1] as Fname,[2] as Lname
FROM #tbl1 as t1
CROSS APPLY [dbo].[DelimitedSplit8K](String,',') as t2
PIVOT(MAX(Item) FOR ItemNumber IN ([1],[2])) as t3
Output
Value Fname Lname
1 Cleo Smith
2 John Mathew
DelimitedSplit8K by Jeff Moden
CREATE FUNCTION [dbo].[DelimitedSplit8K]
/**********************************************************************************************************************
Purpose:
Split a given string at a given delimiter and return a list of the split elements (items).
Notes:
1. Leading a trailing delimiters are treated as if an empty string element were present.
2. Consecutive delimiters are treated as if an empty string element were present between them.
3. Except when spaces are used as a delimiter, all spaces present in each element are preserved.
Returns:
iTVF containing the following:
ItemNumber = Element position of Item as a BIGINT (not converted to INT to eliminate a CAST)
Item = Element value as a VARCHAR(8000)
Statistics on this function may be found at the following URL:
http://www.sqlservercentral.com/Forums/Topic1101315-203-4.aspx
CROSS APPLY Usage Examples and Tests:
--=====================================================================================================================
-- TEST 1:
-- This tests for various possible conditions in a string using a comma as the delimiter. The expected results are
-- laid out in the comments
--=====================================================================================================================
--===== Conditionally drop the test tables to make reruns easier for testing.
-- (this is NOT a part of the solution)
IF OBJECT_ID('tempdb..#JBMTest') IS NOT NULL DROP TABLE #JBMTest
;
--===== Create and populate a test table on the fly (this is NOT a part of the solution).
-- In the following comments, "b" is a blank and "E" is an element in the left to right order.
-- Double Quotes are used to encapsulate the output of "Item" so that you can see that all blanks
-- are preserved no matter where they may appear.
SELECT *
INTO #JBMTest
FROM ( --# & type of Return Row(s)
SELECT 0, NULL UNION ALL --1 NULL
SELECT 1, SPACE(0) UNION ALL --1 b (Empty String)
SELECT 2, SPACE(1) UNION ALL --1 b (1 space)
SELECT 3, SPACE(5) UNION ALL --1 b (5 spaces)
SELECT 4, ',' UNION ALL --2 b b (both are empty strings)
SELECT 5, '55555' UNION ALL --1 E
SELECT 6, ',55555' UNION ALL --2 b E
SELECT 7, ',55555,' UNION ALL --3 b E b
SELECT 8, '55555,' UNION ALL --2 b B
SELECT 9, '55555,1' UNION ALL --2 E E
SELECT 10, '1,55555' UNION ALL --2 E E
SELECT 11, '55555,4444,333,22,1' UNION ALL --5 E E E E E
SELECT 12, '55555,4444,,333,22,1' UNION ALL --6 E E b E E E
SELECT 13, ',55555,4444,,333,22,1,' UNION ALL --8 b E E b E E E b
SELECT 14, ',55555,4444,,,333,22,1,' UNION ALL --9 b E E b b E E E b
SELECT 15, ' 4444,55555 ' UNION ALL --2 E (w/Leading Space) E (w/Trailing Space)
SELECT 16, 'This,is,a,test.' --E E E E
) d (SomeID, SomeValue)
;
--===== Split the CSV column for the whole table using CROSS APPLY (this is the solution)
SELECT test.SomeID, test.SomeValue, split.ItemNumber, Item = QUOTENAME(split.Item,'"')
FROM #JBMTest test
CROSS APPLY dbo.DelimitedSplit8K(test.SomeValue,',') split
;
--=====================================================================================================================
-- TEST 2:
-- This tests for various "alpha" splits and COLLATION using all ASCII characters from 0 to 255 as a delimiter against
-- a given string. Note that not all of the delimiters will be visible and some will show up as tiny squares because
-- they are "control" characters. More specifically, this test will show you what happens to various non-accented
-- letters for your given collation depending on the delimiter you chose.
--=====================================================================================================================
WITH
cteBuildAllCharacters (String,Delimiter) AS
(
SELECT TOP 256
'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
CHAR(ROW_NUMBER() OVER (ORDER BY (SELECT NULL))-1)
FROM master.sys.all_columns
)
SELECT ASCII_Value = ASCII(c.Delimiter), c.Delimiter, split.ItemNumber, Item = QUOTENAME(split.Item,'"')
FROM cteBuildAllCharacters c
CROSS APPLY dbo.DelimitedSplit8K(c.String,c.Delimiter) split
ORDER BY ASCII_Value, split.ItemNumber
;
-----------------------------------------------------------------------------------------------------------------------
Other Notes:
1. Optimized for VARCHAR(8000) or less. No testing or error reporting for truncation at 8000 characters is done.
2. Optimized for single character delimiter. Multi-character delimiters should be resolvedexternally from this
function.
3. Optimized for use with CROSS APPLY.
4. Does not "trim" elements just in case leading or trailing blanks are intended.
5. If you don't know how a Tally table can be used to replace loops, please see the following...
http://www.sqlservercentral.com/articles/T-SQL/62867/
6. Changing this function to use NVARCHAR(MAX) will cause it to run twice as slow. It's just the nature of
VARCHAR(MAX) whether it fits in-row or not.
7. Multi-machine testing for the method of using UNPIVOT instead of 10 SELECT/UNION ALLs shows that the UNPIVOT method
is quite machine dependent and can slow things down quite a bit.
-----------------------------------------------------------------------------------------------------------------------
Credits:
This code is the product of many people's efforts including but not limited to the following:
cteTally concept originally by Iztek Ben Gan and "decimalized" by Lynn Pettis (and others) for a bit of extra speed
and finally redacted by Jeff Moden for a different slant on readability and compactness. Hat's off to Paul White for
his simple explanations of CROSS APPLY and for his detailed testing efforts. Last but not least, thanks to
Ron "BitBucket" McCullough and Wayne Sheffield for their extreme performance testing across multiple machines and
versions of SQL Server. The latest improvement brought an additional 15-20% improvement over Rev 05. Special thanks
to "Nadrek" and "peter-757102" (aka Peter de Heer) for bringing such improvements to light. Nadrek's original
improvement brought about a 10% performance gain and Peter followed that up with the content of Rev 07.
I also thank whoever wrote the first article I ever saw on "numbers tables" which is located at the following URL
and to Adam Machanic for leading me to it many years ago.
http://sqlserver2000.databases.aspfaq.com/why-should-i-consider-using-an-auxiliary-numbers-table.html
-----------------------------------------------------------------------------------------------------------------------
Revision History:
Rev 00 - 20 Jan 2010 - Concept for inline cteTally: Lynn Pettis and others.
Redaction/Implementation: Jeff Moden
- Base 10 redaction and reduction for CTE. (Total rewrite)
Rev 01 - 13 Mar 2010 - Jeff Moden
- Removed one additional concatenation and one subtraction from the SUBSTRING in the SELECT List for that tiny
bit of extra speed.
Rev 02 - 14 Apr 2010 - Jeff Moden
- No code changes. Added CROSS APPLY usage example to the header, some additional credits, and extra
documentation.
Rev 03 - 18 Apr 2010 - Jeff Moden
- No code changes. Added notes 7, 8, and 9 about certain "optimizations" that don't actually work for this
type of function.
Rev 04 - 29 Jun 2010 - Jeff Moden
- Added WITH SCHEMABINDING thanks to a note by Paul White. This prevents an unnecessary "Table Spool" when the
function is used in an UPDATE statement even though the function makes no external references.
Rev 05 - 02 Apr 2011 - Jeff Moden
- Rewritten for extreme performance improvement especially for larger strings approaching the 8K boundary and
for strings that have wider elements. The redaction of this code involved removing ALL concatenation of
delimiters, optimization of the maximum "N" value by using TOP instead of including it in the WHERE clause,
and the reduction of all previous calculations (thanks to the switch to a "zero based" cteTally) to just one
instance of one add and one instance of a subtract. The length calculation for the final element (not
followed by a delimiter) in the string to be split has been greatly simplified by using the ISNULL/NULLIF
combination to determine when the CHARINDEX returned a 0 which indicates there are no more delimiters to be
had or to start with. Depending on the width of the elements, this code is between 4 and 8 times faster on a
single CPU box than the original code especially near the 8K boundary.
- Modified comments to include more sanity checks on the usage example, etc.
- Removed "other" notes 8 and 9 as they were no longer applicable.
Rev 06 - 12 Apr 2011 - Jeff Moden
- Based on a suggestion by Ron "Bitbucket" McCullough, additional test rows were added to the sample code and
the code was changed to encapsulate the output in pipes so that spaces and empty strings could be perceived
in the output. The first "Notes" section was added. Finally, an extra test was added to the comments above.
Rev 07 - 06 May 2011 - Peter de Heer, a further 15-20% performance enhancement has been discovered and incorporated
into this code which also eliminated the need for a "zero" position in the cteTally table.
**********************************************************************************************************************/
--===== Define I/O parameters
(#pString VARCHAR(8000), #pDelimiter CHAR(1))
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
--===== "Inline" CTE Driven "Tally Table" produces values from 0 up to 10,000...
-- enough to cover NVARCHAR(4000)
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(#pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(#pString,t.N,1) = #pDelimiter
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(#pDelimiter,#pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(#pString, l.N1, l.L1)
FROM cteLen l
;
GO
With SQL Server 2016 we can use string_split to accomplish this:
create table commasep (
id int identity(1,1)
,string nvarchar(100) )
insert into commasep (string) values ('John, Adam'), ('test1,test2,test3')
select id, [value] as String from commasep
cross apply string_split(string,',')
CREATE FUNCTION [dbo].[fn_split_string_to_column] (
#string NVARCHAR(MAX),
#delimiter CHAR(1)
)
RETURNS #out_put TABLE (
[column_id] INT IDENTITY(1, 1) NOT NULL,
[value] NVARCHAR(MAX)
)
AS
BEGIN
DECLARE #value NVARCHAR(MAX),
#pos INT = 0,
#len INT = 0
SET #string = CASE
WHEN RIGHT(#string, 1) != #delimiter
THEN #string + #delimiter
ELSE #string
END
WHILE CHARINDEX(#delimiter, #string, #pos + 1) > 0
BEGIN
SET #len = CHARINDEX(#delimiter, #string, #pos + 1) - #pos
SET #value = SUBSTRING(#string, #pos, #len)
INSERT INTO #out_put ([value])
SELECT LTRIM(RTRIM(#value)) AS [column]
SET #pos = CHARINDEX(#delimiter, #string, #pos + #len) + 1
END
RETURN
END
SELECT id,
Substring(NAME, 0, Charindex(',', NAME)) AS firstname,
Substring(NAME, Charindex(',', NAME), Len(NAME) + 1) AS lastname
FROM spilt
Try this (change instances of ' ' to ',' or whatever delimiter you want to use)
CREATE FUNCTION dbo.Wordparser
(
#multiwordstring VARCHAR(255),
#wordnumber NUMERIC
)
returns VARCHAR(255)
AS
BEGIN
DECLARE #remainingstring VARCHAR(255)
SET #remainingstring=#multiwordstring
DECLARE #numberofwords NUMERIC
SET #numberofwords=(LEN(#remainingstring) - LEN(REPLACE(#remainingstring, ' ', '')) + 1)
DECLARE #word VARCHAR(50)
DECLARE #parsedwords TABLE
(
line NUMERIC IDENTITY(1, 1),
word VARCHAR(255)
)
WHILE #numberofwords > 1
BEGIN
SET #word=LEFT(#remainingstring, CHARINDEX(' ', #remainingstring) - 1)
INSERT INTO #parsedwords(word)
SELECT #word
SET #remainingstring= REPLACE(#remainingstring, Concat(#word, ' '), '')
SET #numberofwords=(LEN(#remainingstring) - LEN(REPLACE(#remainingstring, ' ', '')) + 1)
IF #numberofwords = 1
BREAK
ELSE
CONTINUE
END
IF #numberofwords = 1
SELECT #word = #remainingstring
INSERT INTO #parsedwords(word)
SELECT #word
RETURN
(SELECT word
FROM #parsedwords
WHERE line = #wordnumber)
END
Example usage:
SELECT dbo.Wordparser(COLUMN, 1),
dbo.Wordparser(COLUMN, 2),
dbo.Wordparser(COLUMN, 3)
FROM TABLE
I think PARSENAME is the neat function to use for this example, as described in this article: http://www.sqlshack.com/parsing-and-rotating-delimited-data-in-sql-server-2012/
The PARSENAME function is logically designed to parse four-part object names. The nice thing about PARSENAME is that it’s not limited to parsing just SQL Server four-part object names – it will parse any function or string data that is delimited by dots.
The first parameter is the object to parse, and the second is the integer value of the object piece to return. The article is discussing parsing and rotating delimited data - company phone numbers, but it can be used to parse name/surname data also.
Example:
USE COMPANY;
SELECT PARSENAME('Whatever.you.want.parsed',3) AS 'ReturnValue';
The article also describes using a Common Table Expression (CTE) called ‘replaceChars’, to run PARSENAME against the delimiter-replaced values. A CTE is useful for returning a temporary view or result set.
After that, the UNPIVOT function has been used to convert some columns into rows; SUBSTRING and CHARINDEX functions have been used for cleaning up the inconsistencies in the data, and the LAG function (new for SQL Server 2012) has been used in the end, as it allows referencing of previous records.
We can create a function as this
CREATE Function [dbo].[fn_CSVToTable]
(
#CSVList Varchar(max)
)
RETURNS #Table TABLE (ColumnData VARCHAR(100))
AS
BEGIN
IF RIGHT(#CSVList, 1) <> ','
SELECT #CSVList = #CSVList + ','
DECLARE #Pos BIGINT,
#OldPos BIGINT
SELECT #Pos = 1,
#OldPos = 1
WHILE #Pos < LEN(#CSVList)
BEGIN
SELECT #Pos = CHARINDEX(',', #CSVList, #OldPos)
INSERT INTO #Table
SELECT LTRIM(RTRIM(SUBSTRING(#CSVList, #OldPos, #Pos - #OldPos))) Col001
SELECT #OldPos = #Pos + 1
END
RETURN
END
We can then seperate the CSV values into our respective columns using a SELECT statement
You can use a table-valued function STRING_SPLIT, which is available only under compatibility level 130. If your database compatibility level is lower than 130, SQL Server will not be able to find and execute the STRING_SPLIT function. You can change a compatibility level of the database using the following command:
ALTER DATABASE DatabaseName SET COMPATIBILITY_LEVEL = 130
Syntax
SELECT * FROM STRING_SPLIT ( string, separator )
see documentation here
I think following function will work for you:
You have to create a function in SQL first. Like this
CREATE FUNCTION [dbo].[fn_split](
#str VARCHAR(MAX),
#delimiter CHAR(1)
)
RETURNS #returnTable TABLE (idx INT PRIMARY KEY IDENTITY, item VARCHAR(8000))
AS
BEGIN
DECLARE #pos INT
SELECT #str = #str + #delimiter
WHILE LEN(#str) > 0
BEGIN
SELECT #pos = CHARINDEX(#delimiter,#str)
IF #pos = 1
INSERT #returnTable (item)
VALUES (NULL)
ELSE
INSERT #returnTable (item)
VALUES (SUBSTRING(#str, 1, #pos-1))
SELECT #str = SUBSTRING(#str, #pos+1, LEN(#str)-#pos)
END
RETURN
END
You can call this function, like this:
select * from fn_split('1,24,5',',')
Implementation:
Declare #test TABLE (
ID VARCHAR(200),
Data VARCHAR(200)
)
insert into #test
(ID, Data)
Values
('1','Cleo,Smith')
insert into #test
(ID, Data)
Values
('2','Paul,Grim')
select ID,
(select item from fn_split(Data,',') where idx in (1)) as Name ,
(select item from fn_split(Data,',') where idx in (2)) as Surname
from #test
Result will like this:
Use Parsename() function
with cte as(
select 'Aria,Karimi' as FullName
Union
select 'Joe,Karimi' as FullName
Union
select 'Bab,Karimi' as FullName
)
SELECT PARSENAME(REPLACE(FullName,',','.'),2) as Name,
PARSENAME(REPLACE(FullName,',','.'),1) as Family
FROM cte
Result
Name Family
----- ------
Aria Karimi
Bab Karimi
Joe Karimi
Try this:
declare #csv varchar(100) ='aaa,bb,csda,daass';
set #csv = #csv+',';
with cte as
(
select SUBSTRING(#csv,1,charindex(',',#csv,1)-1) as val, SUBSTRING(#csv,charindex(',',#csv,1)+1,len(#csv)) as rem
UNION ALL
select SUBSTRING(a.rem,1,charindex(',',a.rem,1)-1)as val, SUBSTRING(a.rem,charindex(',',a.rem,1)+1,len(A.rem))
from cte a where LEN(a.rem)>=1
) select val from cte
This function is most fast:
CREATE FUNCTION dbo.F_ExtractSubString
(
#String VARCHAR(MAX),
#NroSubString INT,
#Separator VARCHAR(5)
)
RETURNS VARCHAR(MAX) AS
BEGIN
DECLARE #St INT = 0, #End INT = 0, #Ret VARCHAR(MAX)
SET #String = #String + #Separator
WHILE CHARINDEX(#Separator, #String, #End + 1) > 0 AND #NroSubString > 0
BEGIN
SET #St = #End + 1
SET #End = CHARINDEX(#Separator, #String, #End + 1)
SET #NroSubString = #NroSubString - 1
END
IF #NroSubString > 0
SET #Ret = ''
ELSE
SET #Ret = SUBSTRING(#String, #St, #End - #St)
RETURN #Ret
END
GO
Example usage:
SELECT dbo.F_ExtractSubString(COLUMN, 1, ', '),
dbo.F_ExtractSubString(COLUMN, 2, ', '),
dbo.F_ExtractSubString(COLUMN, 3, ', ')
FROM TABLE
I encountered a similar problem but a complex one and since this is the first thread i found regarding that issue i decided to post my finding. i know it is complex solution to a simple problem but i hope that i could help other people who go to this thread looking for a more complex solution. i had to split a string containing 5 numbers (column name: levelsFeed) and to show each number in a separate column.
for example: 8,1,2,2,2
should be shown as :
1 2 3 4 5
-------------
8 1 2 2 2
Solution 1: using XML functions:
this solution for the slowest solution by far
SELECT Distinct FeedbackID,
, S.a.value('(/H/r)[1]', 'INT') AS level1
, S.a.value('(/H/r)[2]', 'INT') AS level2
, S.a.value('(/H/r)[3]', 'INT') AS level3
, S.a.value('(/H/r)[4]', 'INT') AS level4
, S.a.value('(/H/r)[5]', 'INT') AS level5
FROM (
SELECT *,CAST (N'<H><r>' + REPLACE(levelsFeed, ',', '</r><r>') + '</r> </H>' AS XML) AS [vals]
FROM Feedbacks
) as d
CROSS APPLY d.[vals].nodes('/H/r') S(a)
Solution 2: using Split function and pivot. (the split function split a string to rows with the column name Data)
SELECT FeedbackID, [1],[2],[3],[4],[5]
FROM (
SELECT *, ROW_NUMBER() OVER (PARTITION BY feedbackID ORDER BY (SELECT null)) as rn
FROM (
SELECT FeedbackID, levelsFeed
FROM Feedbacks
) as a
CROSS APPLY dbo.Split(levelsFeed, ',')
) as SourceTable
PIVOT
(
MAX(data)
FOR rn IN ([1],[2],[3],[4],[5])
)as pivotTable
Solution 3: using string manipulations functions - fastest by small margin over solution 2
SELECT FeedbackID,
SUBSTRING(levelsFeed,0,CHARINDEX(',',levelsFeed)) AS level1,
PARSENAME(REPLACE(SUBSTRING(levelsFeed,CHARINDEX(',',levelsFeed)+1,LEN(levelsFeed)),',','.'),4) AS level2,
PARSENAME(REPLACE(SUBSTRING(levelsFeed,CHARINDEX(',',levelsFeed)+1,LEN(levelsFeed)),',','.'),3) AS level3,
PARSENAME(REPLACE(SUBSTRING(levelsFeed,CHARINDEX(',',levelsFeed)+1,LEN(levelsFeed)),',','.'),2) AS level4,
PARSENAME(REPLACE(SUBSTRING(levelsFeed,CHARINDEX(',',levelsFeed)+1,LEN(levelsFeed)),',','.'),1) AS level5
FROM Feedbacks
since the levelsFeed contains 5 string values i needed to use the substring function for the first string.
i hope that my solution will help other that got to this thread looking for a more complex split to columns methods
Using instring function :)
select Value,
substring(String,1,instr(String," ") -1) Fname,
substring(String,instr(String,",") +1) Sname
from tablename;
Used two functions,
1. substring(string, position, length) ==> returns string from positon to length
2. instr(string,pattern) ==> returns position of pattern.
If we don’t provide length argument in substring it returns until end of string
This worked for me
CREATE FUNCTION [dbo].[SplitString](
#delimited NVARCHAR(MAX),
#delimiter NVARCHAR(100)
) RETURNS #t TABLE ( val NVARCHAR(MAX))
AS
BEGIN
DECLARE #xml XML
SET #xml = N'<t>' + REPLACE(#delimited,#delimiter,'</t><t>') + '</t>'
INSERT INTO #t(val)
SELECT r.value('.','varchar(MAX)') as item
FROM #xml.nodes('/t') as records(r)
RETURN
END
mytable:
Value ColOne
--------------------
1 Cleo, Smith
The following should work if there aren't too many columns
ALTER TABLE mytable ADD ColTwo nvarchar(256);
UPDATE mytable SET ColTwo = LEFT(ColOne, Charindex(',', ColOne) - 1);
--'Cleo' = LEFT('Cleo, Smith', Charindex(',', 'Cleo, Smith') - 1)
UPDATE mytable SET ColTwo = REPLACE(ColOne, ColTwo + ',', '');
--' Smith' = REPLACE('Cleo, Smith', 'Cleo' + ',')
UPDATE mytable SET ColOne = REPLACE(ColOne, ',' + ColTwo, ''), ColTwo = LTRIM(ColTwo);
--'Cleo' = REPLACE('Cleo, Smith', ',' + ' Smith', '')
Result:
Value ColOne ColTwo
--------------------
1 Cleo Smith
DECLARE #INPUT VARCHAR (MAX)='N,A,R,E,N,D,R,A'
DECLARE #ELIMINATE_CHAR CHAR (1)=','
DECLARE #L_START INT=1
DECLARE #L_END INT=(SELECT LEN (#INPUT))
DECLARE #OUTPUT CHAR (1)
WHILE #L_START <=#L_END
BEGIN
SET #OUTPUT=(SUBSTRING (#INPUT,#L_START,1))
IF #OUTPUT!=#ELIMINATE_CHAR
BEGIN
PRINT #OUTPUT
END
SET #L_START=#L_START+1
END
You may find the solution in SQL User Defined Function to Parse a Delimited String helpful (from The Code Project).
This is the code part from this page:
CREATE FUNCTION [fn_ParseText2Table]
(#p_SourceText VARCHAR(MAX)
,#p_Delimeter VARCHAR(100)=',' --default to comma delimited.
)
RETURNS #retTable
TABLE([Position] INT IDENTITY(1,1)
,[Int_Value] INT
,[Num_Value] NUMERIC(18,3)
,[Txt_Value] VARCHAR(MAX)
,[Date_value] DATETIME
)
AS
/*
********************************************************************************
Purpose: Parse values from a delimited string
& return the result as an indexed table
Copyright 1996, 1997, 2000, 2003 Clayton Groom (Clayton_Groom#hotmail.com)
Posted to the public domain Aug, 2004
2003-06-17 Rewritten as SQL 2000 function.
Reworked to allow for delimiters > 1 character in length
and to convert Text values to numbers
2016-04-05 Added logic for date values based on "new" ISDATE() function, Updated to use XML approach, which is more efficient.
********************************************************************************
*/
BEGIN
DECLARE #w_xml xml;
SET #w_xml = N'<root><i>' + replace(#p_SourceText, #p_Delimeter,'</i><i>') + '</i></root>';
INSERT INTO #retTable
([Int_Value]
, [Num_Value]
, [Txt_Value]
, [Date_value]
)
SELECT CASE
WHEN ISNUMERIC([i].value('.', 'VARCHAR(MAX)')) = 1
THEN CAST(CAST([i].value('.', 'VARCHAR(MAX)') AS NUMERIC) AS INT)
END AS [Int_Value]
, CASE
WHEN ISNUMERIC([i].value('.', 'VARCHAR(MAX)')) = 1
THEN CAST([i].value('.', 'VARCHAR(MAX)') AS NUMERIC(18, 3))
END AS [Num_Value]
, [i].value('.', 'VARCHAR(MAX)') AS [txt_Value]
, CASE
WHEN ISDATE([i].value('.', 'VARCHAR(MAX)')) = 1
THEN CAST([i].value('.', 'VARCHAR(MAX)') AS DATETIME)
END AS [Num_Value]
FROM #w_xml.nodes('//root/i') AS [Items]([i]);
RETURN;
END;
GO
ALTER function get_occurance_index(#delimiter varchar(1),#occurence int,#String varchar(100))
returns int
AS Begin
--Declare #delimiter varchar(1)=',',#occurence int=2,#String varchar(100)='a,b,c'
Declare #result int
;with T as (
select 1 Rno,0 as row, charindex(#delimiter, #String) pos,#String st
union all
select Rno+1,pos + 1, charindex(#delimiter, #String, pos + 1), #String
from T
where pos > 0
)
select #result=pos
from T
where pos > 0 and rno = #occurence
return isnull(#result,0)
ENd
declare #data as table (data varchar(100))
insert into #data values('1,2,3')
insert into #data values('aaa,bbbbb,cccc')
select top 3 Substring (data,0,dbo.get_occurance_index( ',',1,data)) ,--First Record always starts with 0
Substring (data,dbo.get_occurance_index( ',',1,data)+1,dbo.get_occurance_index( ',',2,data)-dbo.get_occurance_index( ',',1,data)-1) ,
Substring (data,dbo.get_occurance_index( ',',2,data)+1,len(data)) , -- Last record cant be more than len of actual data
data
From #data
I found that using PARSENAME as above caused any name with a period to get nulled.
So if there was an initial or a title in the name followed by a dot they return NULL.
I found this worked for me:
SELECT
REPLACE(SUBSTRING(FullName, 1,CHARINDEX(',', FullName)), ',','') as Name,
REPLACE(SUBSTRING(FullName, CHARINDEX(',', FullName), LEN(FullName)), ',', '') as Surname
FROM Table1
it is so easy, you can take it by below query:
DECLARE #str NVARCHAR(MAX)='ControlID_05436b78-04ba-9667-fa01-9ff8c1b7c235,3'
SELECT LEFT(#str, CHARINDEX(',',#str)-1),RIGHT(#str,LEN(#str)-(CHARINDEX(',',#str)))
select distinct modelFileId,F4.*
from contract
cross apply (select XmlList=convert(xml, '<x>'+replace(modelFileId,';','</x><x>')+'</x>').query('.')) F2
cross apply (select mfid1=XmlNode.value('/x[1]','varchar(512)')
,mfid2=XmlNode.value('/x[2]','varchar(512)')
,mfid3=XmlNode.value('/x[3]','varchar(512)')
,mfid4=XmlNode.value('/x[4]','varchar(512)') from XmlList.nodes('x') F3(XmlNode)) F4
where modelFileId like '%;%'
order by modelFileId
Select distinct PROJ_UID,PROJ_NAME,RES_UID from E2E_ProjectWiseTimesheetActuals
where CHARINDEX(','+cast(PROJ_UID as varchar(8000))+',', #params) > 0 and CHARINDEX(','+cast(RES_UID as varchar(8000))+',', #res) > 0
I re-wrote an answer above and made it better:
CREATE FUNCTION [dbo].[CSVParser]
(
#s VARCHAR(255),
#idx NUMERIC
)
RETURNS VARCHAR(12)
BEGIN
DECLARE #comma int
SET #comma = CHARINDEX(',', #s)
WHILE 1=1
BEGIN
IF #comma=0
IF #idx=1
RETURN #s
ELSE
RETURN ''
IF #idx=1
BEGIN
DECLARE #word VARCHAR(12)
SET #word=LEFT(#s, #comma - 1)
RETURN #word
END
SET #s = RIGHT(#s,LEN(#s)-#comma)
SET #comma = CHARINDEX(',', #s)
SET #idx = #idx - 1
END
RETURN 'not used'
END
Example usage:
SELECT dbo.CSVParser(COLUMN, 1),
dbo.CSVParser(COLUMN, 2),
dbo.CSVParser(COLUMN, 3)
FROM TABLE
question is simple, but problem is hot :)
So I create some wrapper for string_split() which pivot result in more generic way. It's table function which returns values (nn, value1, value2, ... , value50) - enough for most CSV lines. If there are more values, they will wrap to next line - nn indicate line number. Set third parameter #columnCnt = [yourNumber] to wrap at specific position:
alter FUNCTION fn_Split50
(
#str varchar(max),
#delim char(1),
#columnCnt int = 50
)
RETURNS TABLE
AS
RETURN
(
SELECT *
FROM (SELECT
nn = (nn - 1) / #columnCnt + 1,
nnn = 'value' + cast(((nn - 1) % #columnCnt) + 1 as varchar(10)),
value
FROM (SELECT
nn = ROW_NUMBER() over (order by (select null)),
value
FROM string_split(#str, #delim) aa
) aa
where nn > 0
) bb
PIVOT
(
max(value)
FOR nnn IN (
value1, value2, value3, value4, value5, value6, value7, value8, value9, value10,
value11, value12, value13, value14, value15, value16, value17, value18, value19, value20,
value21, value22, value23, value24, value25, value26, value27, value28, value29, value30,
value31, value32, value33, value34, value35, value36, value37, value38, value39, value40,
value41, value42, value43, value44, value45, value46, value47, value48, value49, value50
)
) AS PivotTable
)
Example of using:
select * from dbo.fn_split50('zz1,aa2,ss3,dd4,ff5', ',', DEFAULT)
select * from dbo.fn_split50('zz1,aa2,ss3,dd4,ff5,gg6,hh7,jj8,ww9,qq10', ',', 3)
select * from dbo.fn_split50('zz1,11,aa2,22,ss3,33,dd4,44,ff5,55,gg6,66,hh7,77,jj8,88,ww9,99,qq10,1010', ',',2)
Hope, it will helps :)

Split Delimited strings and insert them into table columns SQL 2014 [duplicate]

I have a table like this
Value String
-------------------
1 Cleo, Smith
I want to separate the comma delimited string into two columns
Value Name Surname
-------------------
1 Cleo Smith
I need only two fixed extra columns
Your purpose can be solved using following query -
Select Value , Substring(FullName, 1,Charindex(',', FullName)-1) as Name,
Substring(FullName, Charindex(',', FullName)+1, LEN(FullName)) as Surname
from Table1
There is no readymade Split function in sql server, so we need to create user defined function.
CREATE FUNCTION Split (
#InputString VARCHAR(8000),
#Delimiter VARCHAR(50)
)
RETURNS #Items TABLE (
Item VARCHAR(8000)
)
AS
BEGIN
IF #Delimiter = ' '
BEGIN
SET #Delimiter = ','
SET #InputString = REPLACE(#InputString, ' ', #Delimiter)
END
IF (#Delimiter IS NULL OR #Delimiter = '')
SET #Delimiter = ','
--INSERT INTO #Items VALUES (#Delimiter) -- Diagnostic
--INSERT INTO #Items VALUES (#InputString) -- Diagnostic
DECLARE #Item VARCHAR(8000)
DECLARE #ItemList VARCHAR(8000)
DECLARE #DelimIndex INT
SET #ItemList = #InputString
SET #DelimIndex = CHARINDEX(#Delimiter, #ItemList, 0)
WHILE (#DelimIndex != 0)
BEGIN
SET #Item = SUBSTRING(#ItemList, 0, #DelimIndex)
INSERT INTO #Items VALUES (#Item)
-- Set #ItemList = #ItemList minus one less item
SET #ItemList = SUBSTRING(#ItemList, #DelimIndex+1, LEN(#ItemList)-#DelimIndex)
SET #DelimIndex = CHARINDEX(#Delimiter, #ItemList, 0)
END -- End WHILE
IF #Item IS NOT NULL -- At least one delimiter was encountered in #InputString
BEGIN
SET #Item = #ItemList
INSERT INTO #Items VALUES (#Item)
END
-- No delimiters were encountered in #InputString, so just return #InputString
ELSE INSERT INTO #Items VALUES (#InputString)
RETURN
END -- End Function
GO
---- Set Permissions
--GRANT SELECT ON Split TO UserRole1
--GRANT SELECT ON Split TO UserRole2
--GO
;WITH Split_Names (Value,Name, xmlname)
AS
(
SELECT Value,
Name,
CONVERT(XML,'<Names><name>'
+ REPLACE(Name,',', '</name><name>') + '</name></Names>') AS xmlname
FROM tblnames
)
SELECT Value,
xmlname.value('/Names[1]/name[1]','varchar(100)') AS Name,
xmlname.value('/Names[1]/name[2]','varchar(100)') AS Surname
FROM Split_Names
and also check the link below for reference
http://jahaines.blogspot.in/2009/06/converting-delimited-string-of-values.html
xml based answer is simple and clean
refer this
DECLARE #S varchar(max),
#Split char(1),
#X xml
SELECT #S = 'ab,cd,ef,gh,ij',
#Split = ','
SELECT #X = CONVERT(xml,' <root> <myvalue>' +
REPLACE(#S,#Split,'</myvalue> <myvalue>') + '</myvalue> </root> ')
SELECT T.c.value('.','varchar(20)'), --retrieve ALL values at once
T.c.value('(/root/myvalue)[1]','VARCHAR(20)') , --retrieve index 1 only, which is the 'ab'
T.c.value('(/root/myvalue)[2]','VARCHAR(20)')
FROM #X.nodes('/root/myvalue') T(c)
I think this is cool
SELECT value,
PARSENAME(REPLACE(String,',','.'),2) 'Name' ,
PARSENAME(REPLACE(String,',','.'),1) 'Surname'
FROM table WITH (NOLOCK)
With CROSS APPLY
select ParsedData.*
from MyTable mt
cross apply ( select str = mt.String + ',,' ) f1
cross apply ( select p1 = charindex( ',', str ) ) ap1
cross apply ( select p2 = charindex( ',', str, p1 + 1 ) ) ap2
cross apply ( select Nmame = substring( str, 1, p1-1 )
, Surname = substring( str, p1+1, p2-p1-1 )
) ParsedData
There are multiple ways to solve this and many different ways have been proposed already. Simplest would be to use LEFT / SUBSTRING and other string functions to achieve the desired result.
Sample Data
DECLARE #tbl1 TABLE (Value INT,String VARCHAR(MAX))
INSERT INTO #tbl1 VALUES(1,'Cleo, Smith');
INSERT INTO #tbl1 VALUES(2,'John, Mathew');
Using String Functions like LEFT
SELECT
Value,
LEFT(String,CHARINDEX(',',String)-1) as Fname,
LTRIM(RIGHT(String,LEN(String) - CHARINDEX(',',String) )) AS Lname
FROM #tbl1
This approach fails if there are more 2 items in a String.
In such a scenario, we can use a splitter and then use PIVOT or convert the string into an XML and use .nodes to get string items. XML based solution have been detailed out by aads and bvr in their solution.
The answers for this question which use splitter, all use WHILE which is inefficient for splitting. Check this performance comparison. One of the best splitters around is DelimitedSplit8K, created by Jeff Moden. You can read more about it here
Splitter with PIVOT
DECLARE #tbl1 TABLE (Value INT,String VARCHAR(MAX))
INSERT INTO #tbl1 VALUES(1,'Cleo, Smith');
INSERT INTO #tbl1 VALUES(2,'John, Mathew');
SELECT t3.Value,[1] as Fname,[2] as Lname
FROM #tbl1 as t1
CROSS APPLY [dbo].[DelimitedSplit8K](String,',') as t2
PIVOT(MAX(Item) FOR ItemNumber IN ([1],[2])) as t3
Output
Value Fname Lname
1 Cleo Smith
2 John Mathew
DelimitedSplit8K by Jeff Moden
CREATE FUNCTION [dbo].[DelimitedSplit8K]
/**********************************************************************************************************************
Purpose:
Split a given string at a given delimiter and return a list of the split elements (items).
Notes:
1. Leading a trailing delimiters are treated as if an empty string element were present.
2. Consecutive delimiters are treated as if an empty string element were present between them.
3. Except when spaces are used as a delimiter, all spaces present in each element are preserved.
Returns:
iTVF containing the following:
ItemNumber = Element position of Item as a BIGINT (not converted to INT to eliminate a CAST)
Item = Element value as a VARCHAR(8000)
Statistics on this function may be found at the following URL:
http://www.sqlservercentral.com/Forums/Topic1101315-203-4.aspx
CROSS APPLY Usage Examples and Tests:
--=====================================================================================================================
-- TEST 1:
-- This tests for various possible conditions in a string using a comma as the delimiter. The expected results are
-- laid out in the comments
--=====================================================================================================================
--===== Conditionally drop the test tables to make reruns easier for testing.
-- (this is NOT a part of the solution)
IF OBJECT_ID('tempdb..#JBMTest') IS NOT NULL DROP TABLE #JBMTest
;
--===== Create and populate a test table on the fly (this is NOT a part of the solution).
-- In the following comments, "b" is a blank and "E" is an element in the left to right order.
-- Double Quotes are used to encapsulate the output of "Item" so that you can see that all blanks
-- are preserved no matter where they may appear.
SELECT *
INTO #JBMTest
FROM ( --# & type of Return Row(s)
SELECT 0, NULL UNION ALL --1 NULL
SELECT 1, SPACE(0) UNION ALL --1 b (Empty String)
SELECT 2, SPACE(1) UNION ALL --1 b (1 space)
SELECT 3, SPACE(5) UNION ALL --1 b (5 spaces)
SELECT 4, ',' UNION ALL --2 b b (both are empty strings)
SELECT 5, '55555' UNION ALL --1 E
SELECT 6, ',55555' UNION ALL --2 b E
SELECT 7, ',55555,' UNION ALL --3 b E b
SELECT 8, '55555,' UNION ALL --2 b B
SELECT 9, '55555,1' UNION ALL --2 E E
SELECT 10, '1,55555' UNION ALL --2 E E
SELECT 11, '55555,4444,333,22,1' UNION ALL --5 E E E E E
SELECT 12, '55555,4444,,333,22,1' UNION ALL --6 E E b E E E
SELECT 13, ',55555,4444,,333,22,1,' UNION ALL --8 b E E b E E E b
SELECT 14, ',55555,4444,,,333,22,1,' UNION ALL --9 b E E b b E E E b
SELECT 15, ' 4444,55555 ' UNION ALL --2 E (w/Leading Space) E (w/Trailing Space)
SELECT 16, 'This,is,a,test.' --E E E E
) d (SomeID, SomeValue)
;
--===== Split the CSV column for the whole table using CROSS APPLY (this is the solution)
SELECT test.SomeID, test.SomeValue, split.ItemNumber, Item = QUOTENAME(split.Item,'"')
FROM #JBMTest test
CROSS APPLY dbo.DelimitedSplit8K(test.SomeValue,',') split
;
--=====================================================================================================================
-- TEST 2:
-- This tests for various "alpha" splits and COLLATION using all ASCII characters from 0 to 255 as a delimiter against
-- a given string. Note that not all of the delimiters will be visible and some will show up as tiny squares because
-- they are "control" characters. More specifically, this test will show you what happens to various non-accented
-- letters for your given collation depending on the delimiter you chose.
--=====================================================================================================================
WITH
cteBuildAllCharacters (String,Delimiter) AS
(
SELECT TOP 256
'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
CHAR(ROW_NUMBER() OVER (ORDER BY (SELECT NULL))-1)
FROM master.sys.all_columns
)
SELECT ASCII_Value = ASCII(c.Delimiter), c.Delimiter, split.ItemNumber, Item = QUOTENAME(split.Item,'"')
FROM cteBuildAllCharacters c
CROSS APPLY dbo.DelimitedSplit8K(c.String,c.Delimiter) split
ORDER BY ASCII_Value, split.ItemNumber
;
-----------------------------------------------------------------------------------------------------------------------
Other Notes:
1. Optimized for VARCHAR(8000) or less. No testing or error reporting for truncation at 8000 characters is done.
2. Optimized for single character delimiter. Multi-character delimiters should be resolvedexternally from this
function.
3. Optimized for use with CROSS APPLY.
4. Does not "trim" elements just in case leading or trailing blanks are intended.
5. If you don't know how a Tally table can be used to replace loops, please see the following...
http://www.sqlservercentral.com/articles/T-SQL/62867/
6. Changing this function to use NVARCHAR(MAX) will cause it to run twice as slow. It's just the nature of
VARCHAR(MAX) whether it fits in-row or not.
7. Multi-machine testing for the method of using UNPIVOT instead of 10 SELECT/UNION ALLs shows that the UNPIVOT method
is quite machine dependent and can slow things down quite a bit.
-----------------------------------------------------------------------------------------------------------------------
Credits:
This code is the product of many people's efforts including but not limited to the following:
cteTally concept originally by Iztek Ben Gan and "decimalized" by Lynn Pettis (and others) for a bit of extra speed
and finally redacted by Jeff Moden for a different slant on readability and compactness. Hat's off to Paul White for
his simple explanations of CROSS APPLY and for his detailed testing efforts. Last but not least, thanks to
Ron "BitBucket" McCullough and Wayne Sheffield for their extreme performance testing across multiple machines and
versions of SQL Server. The latest improvement brought an additional 15-20% improvement over Rev 05. Special thanks
to "Nadrek" and "peter-757102" (aka Peter de Heer) for bringing such improvements to light. Nadrek's original
improvement brought about a 10% performance gain and Peter followed that up with the content of Rev 07.
I also thank whoever wrote the first article I ever saw on "numbers tables" which is located at the following URL
and to Adam Machanic for leading me to it many years ago.
http://sqlserver2000.databases.aspfaq.com/why-should-i-consider-using-an-auxiliary-numbers-table.html
-----------------------------------------------------------------------------------------------------------------------
Revision History:
Rev 00 - 20 Jan 2010 - Concept for inline cteTally: Lynn Pettis and others.
Redaction/Implementation: Jeff Moden
- Base 10 redaction and reduction for CTE. (Total rewrite)
Rev 01 - 13 Mar 2010 - Jeff Moden
- Removed one additional concatenation and one subtraction from the SUBSTRING in the SELECT List for that tiny
bit of extra speed.
Rev 02 - 14 Apr 2010 - Jeff Moden
- No code changes. Added CROSS APPLY usage example to the header, some additional credits, and extra
documentation.
Rev 03 - 18 Apr 2010 - Jeff Moden
- No code changes. Added notes 7, 8, and 9 about certain "optimizations" that don't actually work for this
type of function.
Rev 04 - 29 Jun 2010 - Jeff Moden
- Added WITH SCHEMABINDING thanks to a note by Paul White. This prevents an unnecessary "Table Spool" when the
function is used in an UPDATE statement even though the function makes no external references.
Rev 05 - 02 Apr 2011 - Jeff Moden
- Rewritten for extreme performance improvement especially for larger strings approaching the 8K boundary and
for strings that have wider elements. The redaction of this code involved removing ALL concatenation of
delimiters, optimization of the maximum "N" value by using TOP instead of including it in the WHERE clause,
and the reduction of all previous calculations (thanks to the switch to a "zero based" cteTally) to just one
instance of one add and one instance of a subtract. The length calculation for the final element (not
followed by a delimiter) in the string to be split has been greatly simplified by using the ISNULL/NULLIF
combination to determine when the CHARINDEX returned a 0 which indicates there are no more delimiters to be
had or to start with. Depending on the width of the elements, this code is between 4 and 8 times faster on a
single CPU box than the original code especially near the 8K boundary.
- Modified comments to include more sanity checks on the usage example, etc.
- Removed "other" notes 8 and 9 as they were no longer applicable.
Rev 06 - 12 Apr 2011 - Jeff Moden
- Based on a suggestion by Ron "Bitbucket" McCullough, additional test rows were added to the sample code and
the code was changed to encapsulate the output in pipes so that spaces and empty strings could be perceived
in the output. The first "Notes" section was added. Finally, an extra test was added to the comments above.
Rev 07 - 06 May 2011 - Peter de Heer, a further 15-20% performance enhancement has been discovered and incorporated
into this code which also eliminated the need for a "zero" position in the cteTally table.
**********************************************************************************************************************/
--===== Define I/O parameters
(#pString VARCHAR(8000), #pDelimiter CHAR(1))
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
--===== "Inline" CTE Driven "Tally Table" produces values from 0 up to 10,000...
-- enough to cover NVARCHAR(4000)
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(#pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(#pString,t.N,1) = #pDelimiter
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(#pDelimiter,#pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(#pString, l.N1, l.L1)
FROM cteLen l
;
GO
With SQL Server 2016 we can use string_split to accomplish this:
create table commasep (
id int identity(1,1)
,string nvarchar(100) )
insert into commasep (string) values ('John, Adam'), ('test1,test2,test3')
select id, [value] as String from commasep
cross apply string_split(string,',')
CREATE FUNCTION [dbo].[fn_split_string_to_column] (
#string NVARCHAR(MAX),
#delimiter CHAR(1)
)
RETURNS #out_put TABLE (
[column_id] INT IDENTITY(1, 1) NOT NULL,
[value] NVARCHAR(MAX)
)
AS
BEGIN
DECLARE #value NVARCHAR(MAX),
#pos INT = 0,
#len INT = 0
SET #string = CASE
WHEN RIGHT(#string, 1) != #delimiter
THEN #string + #delimiter
ELSE #string
END
WHILE CHARINDEX(#delimiter, #string, #pos + 1) > 0
BEGIN
SET #len = CHARINDEX(#delimiter, #string, #pos + 1) - #pos
SET #value = SUBSTRING(#string, #pos, #len)
INSERT INTO #out_put ([value])
SELECT LTRIM(RTRIM(#value)) AS [column]
SET #pos = CHARINDEX(#delimiter, #string, #pos + #len) + 1
END
RETURN
END
SELECT id,
Substring(NAME, 0, Charindex(',', NAME)) AS firstname,
Substring(NAME, Charindex(',', NAME), Len(NAME) + 1) AS lastname
FROM spilt
Try this (change instances of ' ' to ',' or whatever delimiter you want to use)
CREATE FUNCTION dbo.Wordparser
(
#multiwordstring VARCHAR(255),
#wordnumber NUMERIC
)
returns VARCHAR(255)
AS
BEGIN
DECLARE #remainingstring VARCHAR(255)
SET #remainingstring=#multiwordstring
DECLARE #numberofwords NUMERIC
SET #numberofwords=(LEN(#remainingstring) - LEN(REPLACE(#remainingstring, ' ', '')) + 1)
DECLARE #word VARCHAR(50)
DECLARE #parsedwords TABLE
(
line NUMERIC IDENTITY(1, 1),
word VARCHAR(255)
)
WHILE #numberofwords > 1
BEGIN
SET #word=LEFT(#remainingstring, CHARINDEX(' ', #remainingstring) - 1)
INSERT INTO #parsedwords(word)
SELECT #word
SET #remainingstring= REPLACE(#remainingstring, Concat(#word, ' '), '')
SET #numberofwords=(LEN(#remainingstring) - LEN(REPLACE(#remainingstring, ' ', '')) + 1)
IF #numberofwords = 1
BREAK
ELSE
CONTINUE
END
IF #numberofwords = 1
SELECT #word = #remainingstring
INSERT INTO #parsedwords(word)
SELECT #word
RETURN
(SELECT word
FROM #parsedwords
WHERE line = #wordnumber)
END
Example usage:
SELECT dbo.Wordparser(COLUMN, 1),
dbo.Wordparser(COLUMN, 2),
dbo.Wordparser(COLUMN, 3)
FROM TABLE
I think PARSENAME is the neat function to use for this example, as described in this article: http://www.sqlshack.com/parsing-and-rotating-delimited-data-in-sql-server-2012/
The PARSENAME function is logically designed to parse four-part object names. The nice thing about PARSENAME is that it’s not limited to parsing just SQL Server four-part object names – it will parse any function or string data that is delimited by dots.
The first parameter is the object to parse, and the second is the integer value of the object piece to return. The article is discussing parsing and rotating delimited data - company phone numbers, but it can be used to parse name/surname data also.
Example:
USE COMPANY;
SELECT PARSENAME('Whatever.you.want.parsed',3) AS 'ReturnValue';
The article also describes using a Common Table Expression (CTE) called ‘replaceChars’, to run PARSENAME against the delimiter-replaced values. A CTE is useful for returning a temporary view or result set.
After that, the UNPIVOT function has been used to convert some columns into rows; SUBSTRING and CHARINDEX functions have been used for cleaning up the inconsistencies in the data, and the LAG function (new for SQL Server 2012) has been used in the end, as it allows referencing of previous records.
We can create a function as this
CREATE Function [dbo].[fn_CSVToTable]
(
#CSVList Varchar(max)
)
RETURNS #Table TABLE (ColumnData VARCHAR(100))
AS
BEGIN
IF RIGHT(#CSVList, 1) <> ','
SELECT #CSVList = #CSVList + ','
DECLARE #Pos BIGINT,
#OldPos BIGINT
SELECT #Pos = 1,
#OldPos = 1
WHILE #Pos < LEN(#CSVList)
BEGIN
SELECT #Pos = CHARINDEX(',', #CSVList, #OldPos)
INSERT INTO #Table
SELECT LTRIM(RTRIM(SUBSTRING(#CSVList, #OldPos, #Pos - #OldPos))) Col001
SELECT #OldPos = #Pos + 1
END
RETURN
END
We can then seperate the CSV values into our respective columns using a SELECT statement
You can use a table-valued function STRING_SPLIT, which is available only under compatibility level 130. If your database compatibility level is lower than 130, SQL Server will not be able to find and execute the STRING_SPLIT function. You can change a compatibility level of the database using the following command:
ALTER DATABASE DatabaseName SET COMPATIBILITY_LEVEL = 130
Syntax
SELECT * FROM STRING_SPLIT ( string, separator )
see documentation here
I think following function will work for you:
You have to create a function in SQL first. Like this
CREATE FUNCTION [dbo].[fn_split](
#str VARCHAR(MAX),
#delimiter CHAR(1)
)
RETURNS #returnTable TABLE (idx INT PRIMARY KEY IDENTITY, item VARCHAR(8000))
AS
BEGIN
DECLARE #pos INT
SELECT #str = #str + #delimiter
WHILE LEN(#str) > 0
BEGIN
SELECT #pos = CHARINDEX(#delimiter,#str)
IF #pos = 1
INSERT #returnTable (item)
VALUES (NULL)
ELSE
INSERT #returnTable (item)
VALUES (SUBSTRING(#str, 1, #pos-1))
SELECT #str = SUBSTRING(#str, #pos+1, LEN(#str)-#pos)
END
RETURN
END
You can call this function, like this:
select * from fn_split('1,24,5',',')
Implementation:
Declare #test TABLE (
ID VARCHAR(200),
Data VARCHAR(200)
)
insert into #test
(ID, Data)
Values
('1','Cleo,Smith')
insert into #test
(ID, Data)
Values
('2','Paul,Grim')
select ID,
(select item from fn_split(Data,',') where idx in (1)) as Name ,
(select item from fn_split(Data,',') where idx in (2)) as Surname
from #test
Result will like this:
Use Parsename() function
with cte as(
select 'Aria,Karimi' as FullName
Union
select 'Joe,Karimi' as FullName
Union
select 'Bab,Karimi' as FullName
)
SELECT PARSENAME(REPLACE(FullName,',','.'),2) as Name,
PARSENAME(REPLACE(FullName,',','.'),1) as Family
FROM cte
Result
Name Family
----- ------
Aria Karimi
Bab Karimi
Joe Karimi
Try this:
declare #csv varchar(100) ='aaa,bb,csda,daass';
set #csv = #csv+',';
with cte as
(
select SUBSTRING(#csv,1,charindex(',',#csv,1)-1) as val, SUBSTRING(#csv,charindex(',',#csv,1)+1,len(#csv)) as rem
UNION ALL
select SUBSTRING(a.rem,1,charindex(',',a.rem,1)-1)as val, SUBSTRING(a.rem,charindex(',',a.rem,1)+1,len(A.rem))
from cte a where LEN(a.rem)>=1
) select val from cte
This function is most fast:
CREATE FUNCTION dbo.F_ExtractSubString
(
#String VARCHAR(MAX),
#NroSubString INT,
#Separator VARCHAR(5)
)
RETURNS VARCHAR(MAX) AS
BEGIN
DECLARE #St INT = 0, #End INT = 0, #Ret VARCHAR(MAX)
SET #String = #String + #Separator
WHILE CHARINDEX(#Separator, #String, #End + 1) > 0 AND #NroSubString > 0
BEGIN
SET #St = #End + 1
SET #End = CHARINDEX(#Separator, #String, #End + 1)
SET #NroSubString = #NroSubString - 1
END
IF #NroSubString > 0
SET #Ret = ''
ELSE
SET #Ret = SUBSTRING(#String, #St, #End - #St)
RETURN #Ret
END
GO
Example usage:
SELECT dbo.F_ExtractSubString(COLUMN, 1, ', '),
dbo.F_ExtractSubString(COLUMN, 2, ', '),
dbo.F_ExtractSubString(COLUMN, 3, ', ')
FROM TABLE
I encountered a similar problem but a complex one and since this is the first thread i found regarding that issue i decided to post my finding. i know it is complex solution to a simple problem but i hope that i could help other people who go to this thread looking for a more complex solution. i had to split a string containing 5 numbers (column name: levelsFeed) and to show each number in a separate column.
for example: 8,1,2,2,2
should be shown as :
1 2 3 4 5
-------------
8 1 2 2 2
Solution 1: using XML functions:
this solution for the slowest solution by far
SELECT Distinct FeedbackID,
, S.a.value('(/H/r)[1]', 'INT') AS level1
, S.a.value('(/H/r)[2]', 'INT') AS level2
, S.a.value('(/H/r)[3]', 'INT') AS level3
, S.a.value('(/H/r)[4]', 'INT') AS level4
, S.a.value('(/H/r)[5]', 'INT') AS level5
FROM (
SELECT *,CAST (N'<H><r>' + REPLACE(levelsFeed, ',', '</r><r>') + '</r> </H>' AS XML) AS [vals]
FROM Feedbacks
) as d
CROSS APPLY d.[vals].nodes('/H/r') S(a)
Solution 2: using Split function and pivot. (the split function split a string to rows with the column name Data)
SELECT FeedbackID, [1],[2],[3],[4],[5]
FROM (
SELECT *, ROW_NUMBER() OVER (PARTITION BY feedbackID ORDER BY (SELECT null)) as rn
FROM (
SELECT FeedbackID, levelsFeed
FROM Feedbacks
) as a
CROSS APPLY dbo.Split(levelsFeed, ',')
) as SourceTable
PIVOT
(
MAX(data)
FOR rn IN ([1],[2],[3],[4],[5])
)as pivotTable
Solution 3: using string manipulations functions - fastest by small margin over solution 2
SELECT FeedbackID,
SUBSTRING(levelsFeed,0,CHARINDEX(',',levelsFeed)) AS level1,
PARSENAME(REPLACE(SUBSTRING(levelsFeed,CHARINDEX(',',levelsFeed)+1,LEN(levelsFeed)),',','.'),4) AS level2,
PARSENAME(REPLACE(SUBSTRING(levelsFeed,CHARINDEX(',',levelsFeed)+1,LEN(levelsFeed)),',','.'),3) AS level3,
PARSENAME(REPLACE(SUBSTRING(levelsFeed,CHARINDEX(',',levelsFeed)+1,LEN(levelsFeed)),',','.'),2) AS level4,
PARSENAME(REPLACE(SUBSTRING(levelsFeed,CHARINDEX(',',levelsFeed)+1,LEN(levelsFeed)),',','.'),1) AS level5
FROM Feedbacks
since the levelsFeed contains 5 string values i needed to use the substring function for the first string.
i hope that my solution will help other that got to this thread looking for a more complex split to columns methods
Using instring function :)
select Value,
substring(String,1,instr(String," ") -1) Fname,
substring(String,instr(String,",") +1) Sname
from tablename;
Used two functions,
1. substring(string, position, length) ==> returns string from positon to length
2. instr(string,pattern) ==> returns position of pattern.
If we don’t provide length argument in substring it returns until end of string
This worked for me
CREATE FUNCTION [dbo].[SplitString](
#delimited NVARCHAR(MAX),
#delimiter NVARCHAR(100)
) RETURNS #t TABLE ( val NVARCHAR(MAX))
AS
BEGIN
DECLARE #xml XML
SET #xml = N'<t>' + REPLACE(#delimited,#delimiter,'</t><t>') + '</t>'
INSERT INTO #t(val)
SELECT r.value('.','varchar(MAX)') as item
FROM #xml.nodes('/t') as records(r)
RETURN
END
mytable:
Value ColOne
--------------------
1 Cleo, Smith
The following should work if there aren't too many columns
ALTER TABLE mytable ADD ColTwo nvarchar(256);
UPDATE mytable SET ColTwo = LEFT(ColOne, Charindex(',', ColOne) - 1);
--'Cleo' = LEFT('Cleo, Smith', Charindex(',', 'Cleo, Smith') - 1)
UPDATE mytable SET ColTwo = REPLACE(ColOne, ColTwo + ',', '');
--' Smith' = REPLACE('Cleo, Smith', 'Cleo' + ',')
UPDATE mytable SET ColOne = REPLACE(ColOne, ',' + ColTwo, ''), ColTwo = LTRIM(ColTwo);
--'Cleo' = REPLACE('Cleo, Smith', ',' + ' Smith', '')
Result:
Value ColOne ColTwo
--------------------
1 Cleo Smith
DECLARE #INPUT VARCHAR (MAX)='N,A,R,E,N,D,R,A'
DECLARE #ELIMINATE_CHAR CHAR (1)=','
DECLARE #L_START INT=1
DECLARE #L_END INT=(SELECT LEN (#INPUT))
DECLARE #OUTPUT CHAR (1)
WHILE #L_START <=#L_END
BEGIN
SET #OUTPUT=(SUBSTRING (#INPUT,#L_START,1))
IF #OUTPUT!=#ELIMINATE_CHAR
BEGIN
PRINT #OUTPUT
END
SET #L_START=#L_START+1
END
You may find the solution in SQL User Defined Function to Parse a Delimited String helpful (from The Code Project).
This is the code part from this page:
CREATE FUNCTION [fn_ParseText2Table]
(#p_SourceText VARCHAR(MAX)
,#p_Delimeter VARCHAR(100)=',' --default to comma delimited.
)
RETURNS #retTable
TABLE([Position] INT IDENTITY(1,1)
,[Int_Value] INT
,[Num_Value] NUMERIC(18,3)
,[Txt_Value] VARCHAR(MAX)
,[Date_value] DATETIME
)
AS
/*
********************************************************************************
Purpose: Parse values from a delimited string
& return the result as an indexed table
Copyright 1996, 1997, 2000, 2003 Clayton Groom (Clayton_Groom#hotmail.com)
Posted to the public domain Aug, 2004
2003-06-17 Rewritten as SQL 2000 function.
Reworked to allow for delimiters > 1 character in length
and to convert Text values to numbers
2016-04-05 Added logic for date values based on "new" ISDATE() function, Updated to use XML approach, which is more efficient.
********************************************************************************
*/
BEGIN
DECLARE #w_xml xml;
SET #w_xml = N'<root><i>' + replace(#p_SourceText, #p_Delimeter,'</i><i>') + '</i></root>';
INSERT INTO #retTable
([Int_Value]
, [Num_Value]
, [Txt_Value]
, [Date_value]
)
SELECT CASE
WHEN ISNUMERIC([i].value('.', 'VARCHAR(MAX)')) = 1
THEN CAST(CAST([i].value('.', 'VARCHAR(MAX)') AS NUMERIC) AS INT)
END AS [Int_Value]
, CASE
WHEN ISNUMERIC([i].value('.', 'VARCHAR(MAX)')) = 1
THEN CAST([i].value('.', 'VARCHAR(MAX)') AS NUMERIC(18, 3))
END AS [Num_Value]
, [i].value('.', 'VARCHAR(MAX)') AS [txt_Value]
, CASE
WHEN ISDATE([i].value('.', 'VARCHAR(MAX)')) = 1
THEN CAST([i].value('.', 'VARCHAR(MAX)') AS DATETIME)
END AS [Num_Value]
FROM #w_xml.nodes('//root/i') AS [Items]([i]);
RETURN;
END;
GO
ALTER function get_occurance_index(#delimiter varchar(1),#occurence int,#String varchar(100))
returns int
AS Begin
--Declare #delimiter varchar(1)=',',#occurence int=2,#String varchar(100)='a,b,c'
Declare #result int
;with T as (
select 1 Rno,0 as row, charindex(#delimiter, #String) pos,#String st
union all
select Rno+1,pos + 1, charindex(#delimiter, #String, pos + 1), #String
from T
where pos > 0
)
select #result=pos
from T
where pos > 0 and rno = #occurence
return isnull(#result,0)
ENd
declare #data as table (data varchar(100))
insert into #data values('1,2,3')
insert into #data values('aaa,bbbbb,cccc')
select top 3 Substring (data,0,dbo.get_occurance_index( ',',1,data)) ,--First Record always starts with 0
Substring (data,dbo.get_occurance_index( ',',1,data)+1,dbo.get_occurance_index( ',',2,data)-dbo.get_occurance_index( ',',1,data)-1) ,
Substring (data,dbo.get_occurance_index( ',',2,data)+1,len(data)) , -- Last record cant be more than len of actual data
data
From #data
I found that using PARSENAME as above caused any name with a period to get nulled.
So if there was an initial or a title in the name followed by a dot they return NULL.
I found this worked for me:
SELECT
REPLACE(SUBSTRING(FullName, 1,CHARINDEX(',', FullName)), ',','') as Name,
REPLACE(SUBSTRING(FullName, CHARINDEX(',', FullName), LEN(FullName)), ',', '') as Surname
FROM Table1
it is so easy, you can take it by below query:
DECLARE #str NVARCHAR(MAX)='ControlID_05436b78-04ba-9667-fa01-9ff8c1b7c235,3'
SELECT LEFT(#str, CHARINDEX(',',#str)-1),RIGHT(#str,LEN(#str)-(CHARINDEX(',',#str)))
select distinct modelFileId,F4.*
from contract
cross apply (select XmlList=convert(xml, '<x>'+replace(modelFileId,';','</x><x>')+'</x>').query('.')) F2
cross apply (select mfid1=XmlNode.value('/x[1]','varchar(512)')
,mfid2=XmlNode.value('/x[2]','varchar(512)')
,mfid3=XmlNode.value('/x[3]','varchar(512)')
,mfid4=XmlNode.value('/x[4]','varchar(512)') from XmlList.nodes('x') F3(XmlNode)) F4
where modelFileId like '%;%'
order by modelFileId
Select distinct PROJ_UID,PROJ_NAME,RES_UID from E2E_ProjectWiseTimesheetActuals
where CHARINDEX(','+cast(PROJ_UID as varchar(8000))+',', #params) > 0 and CHARINDEX(','+cast(RES_UID as varchar(8000))+',', #res) > 0
I re-wrote an answer above and made it better:
CREATE FUNCTION [dbo].[CSVParser]
(
#s VARCHAR(255),
#idx NUMERIC
)
RETURNS VARCHAR(12)
BEGIN
DECLARE #comma int
SET #comma = CHARINDEX(',', #s)
WHILE 1=1
BEGIN
IF #comma=0
IF #idx=1
RETURN #s
ELSE
RETURN ''
IF #idx=1
BEGIN
DECLARE #word VARCHAR(12)
SET #word=LEFT(#s, #comma - 1)
RETURN #word
END
SET #s = RIGHT(#s,LEN(#s)-#comma)
SET #comma = CHARINDEX(',', #s)
SET #idx = #idx - 1
END
RETURN 'not used'
END
Example usage:
SELECT dbo.CSVParser(COLUMN, 1),
dbo.CSVParser(COLUMN, 2),
dbo.CSVParser(COLUMN, 3)
FROM TABLE
question is simple, but problem is hot :)
So I create some wrapper for string_split() which pivot result in more generic way. It's table function which returns values (nn, value1, value2, ... , value50) - enough for most CSV lines. If there are more values, they will wrap to next line - nn indicate line number. Set third parameter #columnCnt = [yourNumber] to wrap at specific position:
alter FUNCTION fn_Split50
(
#str varchar(max),
#delim char(1),
#columnCnt int = 50
)
RETURNS TABLE
AS
RETURN
(
SELECT *
FROM (SELECT
nn = (nn - 1) / #columnCnt + 1,
nnn = 'value' + cast(((nn - 1) % #columnCnt) + 1 as varchar(10)),
value
FROM (SELECT
nn = ROW_NUMBER() over (order by (select null)),
value
FROM string_split(#str, #delim) aa
) aa
where nn > 0
) bb
PIVOT
(
max(value)
FOR nnn IN (
value1, value2, value3, value4, value5, value6, value7, value8, value9, value10,
value11, value12, value13, value14, value15, value16, value17, value18, value19, value20,
value21, value22, value23, value24, value25, value26, value27, value28, value29, value30,
value31, value32, value33, value34, value35, value36, value37, value38, value39, value40,
value41, value42, value43, value44, value45, value46, value47, value48, value49, value50
)
) AS PivotTable
)
Example of using:
select * from dbo.fn_split50('zz1,aa2,ss3,dd4,ff5', ',', DEFAULT)
select * from dbo.fn_split50('zz1,aa2,ss3,dd4,ff5,gg6,hh7,jj8,ww9,qq10', ',', 3)
select * from dbo.fn_split50('zz1,11,aa2,22,ss3,33,dd4,44,ff5,55,gg6,66,hh7,77,jj8,88,ww9,99,qq10,1010', ',',2)
Hope, it will helps :)

SQL split a colon and Space from the Description Field/Column [duplicate]

I have a table like this
Value String
-------------------
1 Cleo, Smith
I want to separate the comma delimited string into two columns
Value Name Surname
-------------------
1 Cleo Smith
I need only two fixed extra columns
Your purpose can be solved using following query -
Select Value , Substring(FullName, 1,Charindex(',', FullName)-1) as Name,
Substring(FullName, Charindex(',', FullName)+1, LEN(FullName)) as Surname
from Table1
There is no readymade Split function in sql server, so we need to create user defined function.
CREATE FUNCTION Split (
#InputString VARCHAR(8000),
#Delimiter VARCHAR(50)
)
RETURNS #Items TABLE (
Item VARCHAR(8000)
)
AS
BEGIN
IF #Delimiter = ' '
BEGIN
SET #Delimiter = ','
SET #InputString = REPLACE(#InputString, ' ', #Delimiter)
END
IF (#Delimiter IS NULL OR #Delimiter = '')
SET #Delimiter = ','
--INSERT INTO #Items VALUES (#Delimiter) -- Diagnostic
--INSERT INTO #Items VALUES (#InputString) -- Diagnostic
DECLARE #Item VARCHAR(8000)
DECLARE #ItemList VARCHAR(8000)
DECLARE #DelimIndex INT
SET #ItemList = #InputString
SET #DelimIndex = CHARINDEX(#Delimiter, #ItemList, 0)
WHILE (#DelimIndex != 0)
BEGIN
SET #Item = SUBSTRING(#ItemList, 0, #DelimIndex)
INSERT INTO #Items VALUES (#Item)
-- Set #ItemList = #ItemList minus one less item
SET #ItemList = SUBSTRING(#ItemList, #DelimIndex+1, LEN(#ItemList)-#DelimIndex)
SET #DelimIndex = CHARINDEX(#Delimiter, #ItemList, 0)
END -- End WHILE
IF #Item IS NOT NULL -- At least one delimiter was encountered in #InputString
BEGIN
SET #Item = #ItemList
INSERT INTO #Items VALUES (#Item)
END
-- No delimiters were encountered in #InputString, so just return #InputString
ELSE INSERT INTO #Items VALUES (#InputString)
RETURN
END -- End Function
GO
---- Set Permissions
--GRANT SELECT ON Split TO UserRole1
--GRANT SELECT ON Split TO UserRole2
--GO
;WITH Split_Names (Value,Name, xmlname)
AS
(
SELECT Value,
Name,
CONVERT(XML,'<Names><name>'
+ REPLACE(Name,',', '</name><name>') + '</name></Names>') AS xmlname
FROM tblnames
)
SELECT Value,
xmlname.value('/Names[1]/name[1]','varchar(100)') AS Name,
xmlname.value('/Names[1]/name[2]','varchar(100)') AS Surname
FROM Split_Names
and also check the link below for reference
http://jahaines.blogspot.in/2009/06/converting-delimited-string-of-values.html
xml based answer is simple and clean
refer this
DECLARE #S varchar(max),
#Split char(1),
#X xml
SELECT #S = 'ab,cd,ef,gh,ij',
#Split = ','
SELECT #X = CONVERT(xml,' <root> <myvalue>' +
REPLACE(#S,#Split,'</myvalue> <myvalue>') + '</myvalue> </root> ')
SELECT T.c.value('.','varchar(20)'), --retrieve ALL values at once
T.c.value('(/root/myvalue)[1]','VARCHAR(20)') , --retrieve index 1 only, which is the 'ab'
T.c.value('(/root/myvalue)[2]','VARCHAR(20)')
FROM #X.nodes('/root/myvalue') T(c)
I think this is cool
SELECT value,
PARSENAME(REPLACE(String,',','.'),2) 'Name' ,
PARSENAME(REPLACE(String,',','.'),1) 'Surname'
FROM table WITH (NOLOCK)
With CROSS APPLY
select ParsedData.*
from MyTable mt
cross apply ( select str = mt.String + ',,' ) f1
cross apply ( select p1 = charindex( ',', str ) ) ap1
cross apply ( select p2 = charindex( ',', str, p1 + 1 ) ) ap2
cross apply ( select Nmame = substring( str, 1, p1-1 )
, Surname = substring( str, p1+1, p2-p1-1 )
) ParsedData
There are multiple ways to solve this and many different ways have been proposed already. Simplest would be to use LEFT / SUBSTRING and other string functions to achieve the desired result.
Sample Data
DECLARE #tbl1 TABLE (Value INT,String VARCHAR(MAX))
INSERT INTO #tbl1 VALUES(1,'Cleo, Smith');
INSERT INTO #tbl1 VALUES(2,'John, Mathew');
Using String Functions like LEFT
SELECT
Value,
LEFT(String,CHARINDEX(',',String)-1) as Fname,
LTRIM(RIGHT(String,LEN(String) - CHARINDEX(',',String) )) AS Lname
FROM #tbl1
This approach fails if there are more 2 items in a String.
In such a scenario, we can use a splitter and then use PIVOT or convert the string into an XML and use .nodes to get string items. XML based solution have been detailed out by aads and bvr in their solution.
The answers for this question which use splitter, all use WHILE which is inefficient for splitting. Check this performance comparison. One of the best splitters around is DelimitedSplit8K, created by Jeff Moden. You can read more about it here
Splitter with PIVOT
DECLARE #tbl1 TABLE (Value INT,String VARCHAR(MAX))
INSERT INTO #tbl1 VALUES(1,'Cleo, Smith');
INSERT INTO #tbl1 VALUES(2,'John, Mathew');
SELECT t3.Value,[1] as Fname,[2] as Lname
FROM #tbl1 as t1
CROSS APPLY [dbo].[DelimitedSplit8K](String,',') as t2
PIVOT(MAX(Item) FOR ItemNumber IN ([1],[2])) as t3
Output
Value Fname Lname
1 Cleo Smith
2 John Mathew
DelimitedSplit8K by Jeff Moden
CREATE FUNCTION [dbo].[DelimitedSplit8K]
/**********************************************************************************************************************
Purpose:
Split a given string at a given delimiter and return a list of the split elements (items).
Notes:
1. Leading a trailing delimiters are treated as if an empty string element were present.
2. Consecutive delimiters are treated as if an empty string element were present between them.
3. Except when spaces are used as a delimiter, all spaces present in each element are preserved.
Returns:
iTVF containing the following:
ItemNumber = Element position of Item as a BIGINT (not converted to INT to eliminate a CAST)
Item = Element value as a VARCHAR(8000)
Statistics on this function may be found at the following URL:
http://www.sqlservercentral.com/Forums/Topic1101315-203-4.aspx
CROSS APPLY Usage Examples and Tests:
--=====================================================================================================================
-- TEST 1:
-- This tests for various possible conditions in a string using a comma as the delimiter. The expected results are
-- laid out in the comments
--=====================================================================================================================
--===== Conditionally drop the test tables to make reruns easier for testing.
-- (this is NOT a part of the solution)
IF OBJECT_ID('tempdb..#JBMTest') IS NOT NULL DROP TABLE #JBMTest
;
--===== Create and populate a test table on the fly (this is NOT a part of the solution).
-- In the following comments, "b" is a blank and "E" is an element in the left to right order.
-- Double Quotes are used to encapsulate the output of "Item" so that you can see that all blanks
-- are preserved no matter where they may appear.
SELECT *
INTO #JBMTest
FROM ( --# & type of Return Row(s)
SELECT 0, NULL UNION ALL --1 NULL
SELECT 1, SPACE(0) UNION ALL --1 b (Empty String)
SELECT 2, SPACE(1) UNION ALL --1 b (1 space)
SELECT 3, SPACE(5) UNION ALL --1 b (5 spaces)
SELECT 4, ',' UNION ALL --2 b b (both are empty strings)
SELECT 5, '55555' UNION ALL --1 E
SELECT 6, ',55555' UNION ALL --2 b E
SELECT 7, ',55555,' UNION ALL --3 b E b
SELECT 8, '55555,' UNION ALL --2 b B
SELECT 9, '55555,1' UNION ALL --2 E E
SELECT 10, '1,55555' UNION ALL --2 E E
SELECT 11, '55555,4444,333,22,1' UNION ALL --5 E E E E E
SELECT 12, '55555,4444,,333,22,1' UNION ALL --6 E E b E E E
SELECT 13, ',55555,4444,,333,22,1,' UNION ALL --8 b E E b E E E b
SELECT 14, ',55555,4444,,,333,22,1,' UNION ALL --9 b E E b b E E E b
SELECT 15, ' 4444,55555 ' UNION ALL --2 E (w/Leading Space) E (w/Trailing Space)
SELECT 16, 'This,is,a,test.' --E E E E
) d (SomeID, SomeValue)
;
--===== Split the CSV column for the whole table using CROSS APPLY (this is the solution)
SELECT test.SomeID, test.SomeValue, split.ItemNumber, Item = QUOTENAME(split.Item,'"')
FROM #JBMTest test
CROSS APPLY dbo.DelimitedSplit8K(test.SomeValue,',') split
;
--=====================================================================================================================
-- TEST 2:
-- This tests for various "alpha" splits and COLLATION using all ASCII characters from 0 to 255 as a delimiter against
-- a given string. Note that not all of the delimiters will be visible and some will show up as tiny squares because
-- they are "control" characters. More specifically, this test will show you what happens to various non-accented
-- letters for your given collation depending on the delimiter you chose.
--=====================================================================================================================
WITH
cteBuildAllCharacters (String,Delimiter) AS
(
SELECT TOP 256
'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
CHAR(ROW_NUMBER() OVER (ORDER BY (SELECT NULL))-1)
FROM master.sys.all_columns
)
SELECT ASCII_Value = ASCII(c.Delimiter), c.Delimiter, split.ItemNumber, Item = QUOTENAME(split.Item,'"')
FROM cteBuildAllCharacters c
CROSS APPLY dbo.DelimitedSplit8K(c.String,c.Delimiter) split
ORDER BY ASCII_Value, split.ItemNumber
;
-----------------------------------------------------------------------------------------------------------------------
Other Notes:
1. Optimized for VARCHAR(8000) or less. No testing or error reporting for truncation at 8000 characters is done.
2. Optimized for single character delimiter. Multi-character delimiters should be resolvedexternally from this
function.
3. Optimized for use with CROSS APPLY.
4. Does not "trim" elements just in case leading or trailing blanks are intended.
5. If you don't know how a Tally table can be used to replace loops, please see the following...
http://www.sqlservercentral.com/articles/T-SQL/62867/
6. Changing this function to use NVARCHAR(MAX) will cause it to run twice as slow. It's just the nature of
VARCHAR(MAX) whether it fits in-row or not.
7. Multi-machine testing for the method of using UNPIVOT instead of 10 SELECT/UNION ALLs shows that the UNPIVOT method
is quite machine dependent and can slow things down quite a bit.
-----------------------------------------------------------------------------------------------------------------------
Credits:
This code is the product of many people's efforts including but not limited to the following:
cteTally concept originally by Iztek Ben Gan and "decimalized" by Lynn Pettis (and others) for a bit of extra speed
and finally redacted by Jeff Moden for a different slant on readability and compactness. Hat's off to Paul White for
his simple explanations of CROSS APPLY and for his detailed testing efforts. Last but not least, thanks to
Ron "BitBucket" McCullough and Wayne Sheffield for their extreme performance testing across multiple machines and
versions of SQL Server. The latest improvement brought an additional 15-20% improvement over Rev 05. Special thanks
to "Nadrek" and "peter-757102" (aka Peter de Heer) for bringing such improvements to light. Nadrek's original
improvement brought about a 10% performance gain and Peter followed that up with the content of Rev 07.
I also thank whoever wrote the first article I ever saw on "numbers tables" which is located at the following URL
and to Adam Machanic for leading me to it many years ago.
http://sqlserver2000.databases.aspfaq.com/why-should-i-consider-using-an-auxiliary-numbers-table.html
-----------------------------------------------------------------------------------------------------------------------
Revision History:
Rev 00 - 20 Jan 2010 - Concept for inline cteTally: Lynn Pettis and others.
Redaction/Implementation: Jeff Moden
- Base 10 redaction and reduction for CTE. (Total rewrite)
Rev 01 - 13 Mar 2010 - Jeff Moden
- Removed one additional concatenation and one subtraction from the SUBSTRING in the SELECT List for that tiny
bit of extra speed.
Rev 02 - 14 Apr 2010 - Jeff Moden
- No code changes. Added CROSS APPLY usage example to the header, some additional credits, and extra
documentation.
Rev 03 - 18 Apr 2010 - Jeff Moden
- No code changes. Added notes 7, 8, and 9 about certain "optimizations" that don't actually work for this
type of function.
Rev 04 - 29 Jun 2010 - Jeff Moden
- Added WITH SCHEMABINDING thanks to a note by Paul White. This prevents an unnecessary "Table Spool" when the
function is used in an UPDATE statement even though the function makes no external references.
Rev 05 - 02 Apr 2011 - Jeff Moden
- Rewritten for extreme performance improvement especially for larger strings approaching the 8K boundary and
for strings that have wider elements. The redaction of this code involved removing ALL concatenation of
delimiters, optimization of the maximum "N" value by using TOP instead of including it in the WHERE clause,
and the reduction of all previous calculations (thanks to the switch to a "zero based" cteTally) to just one
instance of one add and one instance of a subtract. The length calculation for the final element (not
followed by a delimiter) in the string to be split has been greatly simplified by using the ISNULL/NULLIF
combination to determine when the CHARINDEX returned a 0 which indicates there are no more delimiters to be
had or to start with. Depending on the width of the elements, this code is between 4 and 8 times faster on a
single CPU box than the original code especially near the 8K boundary.
- Modified comments to include more sanity checks on the usage example, etc.
- Removed "other" notes 8 and 9 as they were no longer applicable.
Rev 06 - 12 Apr 2011 - Jeff Moden
- Based on a suggestion by Ron "Bitbucket" McCullough, additional test rows were added to the sample code and
the code was changed to encapsulate the output in pipes so that spaces and empty strings could be perceived
in the output. The first "Notes" section was added. Finally, an extra test was added to the comments above.
Rev 07 - 06 May 2011 - Peter de Heer, a further 15-20% performance enhancement has been discovered and incorporated
into this code which also eliminated the need for a "zero" position in the cteTally table.
**********************************************************************************************************************/
--===== Define I/O parameters
(#pString VARCHAR(8000), #pDelimiter CHAR(1))
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
--===== "Inline" CTE Driven "Tally Table" produces values from 0 up to 10,000...
-- enough to cover NVARCHAR(4000)
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(#pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(#pString,t.N,1) = #pDelimiter
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(#pDelimiter,#pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(#pString, l.N1, l.L1)
FROM cteLen l
;
GO
With SQL Server 2016 we can use string_split to accomplish this:
create table commasep (
id int identity(1,1)
,string nvarchar(100) )
insert into commasep (string) values ('John, Adam'), ('test1,test2,test3')
select id, [value] as String from commasep
cross apply string_split(string,',')
CREATE FUNCTION [dbo].[fn_split_string_to_column] (
#string NVARCHAR(MAX),
#delimiter CHAR(1)
)
RETURNS #out_put TABLE (
[column_id] INT IDENTITY(1, 1) NOT NULL,
[value] NVARCHAR(MAX)
)
AS
BEGIN
DECLARE #value NVARCHAR(MAX),
#pos INT = 0,
#len INT = 0
SET #string = CASE
WHEN RIGHT(#string, 1) != #delimiter
THEN #string + #delimiter
ELSE #string
END
WHILE CHARINDEX(#delimiter, #string, #pos + 1) > 0
BEGIN
SET #len = CHARINDEX(#delimiter, #string, #pos + 1) - #pos
SET #value = SUBSTRING(#string, #pos, #len)
INSERT INTO #out_put ([value])
SELECT LTRIM(RTRIM(#value)) AS [column]
SET #pos = CHARINDEX(#delimiter, #string, #pos + #len) + 1
END
RETURN
END
SELECT id,
Substring(NAME, 0, Charindex(',', NAME)) AS firstname,
Substring(NAME, Charindex(',', NAME), Len(NAME) + 1) AS lastname
FROM spilt
Try this (change instances of ' ' to ',' or whatever delimiter you want to use)
CREATE FUNCTION dbo.Wordparser
(
#multiwordstring VARCHAR(255),
#wordnumber NUMERIC
)
returns VARCHAR(255)
AS
BEGIN
DECLARE #remainingstring VARCHAR(255)
SET #remainingstring=#multiwordstring
DECLARE #numberofwords NUMERIC
SET #numberofwords=(LEN(#remainingstring) - LEN(REPLACE(#remainingstring, ' ', '')) + 1)
DECLARE #word VARCHAR(50)
DECLARE #parsedwords TABLE
(
line NUMERIC IDENTITY(1, 1),
word VARCHAR(255)
)
WHILE #numberofwords > 1
BEGIN
SET #word=LEFT(#remainingstring, CHARINDEX(' ', #remainingstring) - 1)
INSERT INTO #parsedwords(word)
SELECT #word
SET #remainingstring= REPLACE(#remainingstring, Concat(#word, ' '), '')
SET #numberofwords=(LEN(#remainingstring) - LEN(REPLACE(#remainingstring, ' ', '')) + 1)
IF #numberofwords = 1
BREAK
ELSE
CONTINUE
END
IF #numberofwords = 1
SELECT #word = #remainingstring
INSERT INTO #parsedwords(word)
SELECT #word
RETURN
(SELECT word
FROM #parsedwords
WHERE line = #wordnumber)
END
Example usage:
SELECT dbo.Wordparser(COLUMN, 1),
dbo.Wordparser(COLUMN, 2),
dbo.Wordparser(COLUMN, 3)
FROM TABLE
I think PARSENAME is the neat function to use for this example, as described in this article: http://www.sqlshack.com/parsing-and-rotating-delimited-data-in-sql-server-2012/
The PARSENAME function is logically designed to parse four-part object names. The nice thing about PARSENAME is that it’s not limited to parsing just SQL Server four-part object names – it will parse any function or string data that is delimited by dots.
The first parameter is the object to parse, and the second is the integer value of the object piece to return. The article is discussing parsing and rotating delimited data - company phone numbers, but it can be used to parse name/surname data also.
Example:
USE COMPANY;
SELECT PARSENAME('Whatever.you.want.parsed',3) AS 'ReturnValue';
The article also describes using a Common Table Expression (CTE) called ‘replaceChars’, to run PARSENAME against the delimiter-replaced values. A CTE is useful for returning a temporary view or result set.
After that, the UNPIVOT function has been used to convert some columns into rows; SUBSTRING and CHARINDEX functions have been used for cleaning up the inconsistencies in the data, and the LAG function (new for SQL Server 2012) has been used in the end, as it allows referencing of previous records.
We can create a function as this
CREATE Function [dbo].[fn_CSVToTable]
(
#CSVList Varchar(max)
)
RETURNS #Table TABLE (ColumnData VARCHAR(100))
AS
BEGIN
IF RIGHT(#CSVList, 1) <> ','
SELECT #CSVList = #CSVList + ','
DECLARE #Pos BIGINT,
#OldPos BIGINT
SELECT #Pos = 1,
#OldPos = 1
WHILE #Pos < LEN(#CSVList)
BEGIN
SELECT #Pos = CHARINDEX(',', #CSVList, #OldPos)
INSERT INTO #Table
SELECT LTRIM(RTRIM(SUBSTRING(#CSVList, #OldPos, #Pos - #OldPos))) Col001
SELECT #OldPos = #Pos + 1
END
RETURN
END
We can then seperate the CSV values into our respective columns using a SELECT statement
You can use a table-valued function STRING_SPLIT, which is available only under compatibility level 130. If your database compatibility level is lower than 130, SQL Server will not be able to find and execute the STRING_SPLIT function. You can change a compatibility level of the database using the following command:
ALTER DATABASE DatabaseName SET COMPATIBILITY_LEVEL = 130
Syntax
SELECT * FROM STRING_SPLIT ( string, separator )
see documentation here
I think following function will work for you:
You have to create a function in SQL first. Like this
CREATE FUNCTION [dbo].[fn_split](
#str VARCHAR(MAX),
#delimiter CHAR(1)
)
RETURNS #returnTable TABLE (idx INT PRIMARY KEY IDENTITY, item VARCHAR(8000))
AS
BEGIN
DECLARE #pos INT
SELECT #str = #str + #delimiter
WHILE LEN(#str) > 0
BEGIN
SELECT #pos = CHARINDEX(#delimiter,#str)
IF #pos = 1
INSERT #returnTable (item)
VALUES (NULL)
ELSE
INSERT #returnTable (item)
VALUES (SUBSTRING(#str, 1, #pos-1))
SELECT #str = SUBSTRING(#str, #pos+1, LEN(#str)-#pos)
END
RETURN
END
You can call this function, like this:
select * from fn_split('1,24,5',',')
Implementation:
Declare #test TABLE (
ID VARCHAR(200),
Data VARCHAR(200)
)
insert into #test
(ID, Data)
Values
('1','Cleo,Smith')
insert into #test
(ID, Data)
Values
('2','Paul,Grim')
select ID,
(select item from fn_split(Data,',') where idx in (1)) as Name ,
(select item from fn_split(Data,',') where idx in (2)) as Surname
from #test
Result will like this:
Use Parsename() function
with cte as(
select 'Aria,Karimi' as FullName
Union
select 'Joe,Karimi' as FullName
Union
select 'Bab,Karimi' as FullName
)
SELECT PARSENAME(REPLACE(FullName,',','.'),2) as Name,
PARSENAME(REPLACE(FullName,',','.'),1) as Family
FROM cte
Result
Name Family
----- ------
Aria Karimi
Bab Karimi
Joe Karimi
Try this:
declare #csv varchar(100) ='aaa,bb,csda,daass';
set #csv = #csv+',';
with cte as
(
select SUBSTRING(#csv,1,charindex(',',#csv,1)-1) as val, SUBSTRING(#csv,charindex(',',#csv,1)+1,len(#csv)) as rem
UNION ALL
select SUBSTRING(a.rem,1,charindex(',',a.rem,1)-1)as val, SUBSTRING(a.rem,charindex(',',a.rem,1)+1,len(A.rem))
from cte a where LEN(a.rem)>=1
) select val from cte
This function is most fast:
CREATE FUNCTION dbo.F_ExtractSubString
(
#String VARCHAR(MAX),
#NroSubString INT,
#Separator VARCHAR(5)
)
RETURNS VARCHAR(MAX) AS
BEGIN
DECLARE #St INT = 0, #End INT = 0, #Ret VARCHAR(MAX)
SET #String = #String + #Separator
WHILE CHARINDEX(#Separator, #String, #End + 1) > 0 AND #NroSubString > 0
BEGIN
SET #St = #End + 1
SET #End = CHARINDEX(#Separator, #String, #End + 1)
SET #NroSubString = #NroSubString - 1
END
IF #NroSubString > 0
SET #Ret = ''
ELSE
SET #Ret = SUBSTRING(#String, #St, #End - #St)
RETURN #Ret
END
GO
Example usage:
SELECT dbo.F_ExtractSubString(COLUMN, 1, ', '),
dbo.F_ExtractSubString(COLUMN, 2, ', '),
dbo.F_ExtractSubString(COLUMN, 3, ', ')
FROM TABLE
I encountered a similar problem but a complex one and since this is the first thread i found regarding that issue i decided to post my finding. i know it is complex solution to a simple problem but i hope that i could help other people who go to this thread looking for a more complex solution. i had to split a string containing 5 numbers (column name: levelsFeed) and to show each number in a separate column.
for example: 8,1,2,2,2
should be shown as :
1 2 3 4 5
-------------
8 1 2 2 2
Solution 1: using XML functions:
this solution for the slowest solution by far
SELECT Distinct FeedbackID,
, S.a.value('(/H/r)[1]', 'INT') AS level1
, S.a.value('(/H/r)[2]', 'INT') AS level2
, S.a.value('(/H/r)[3]', 'INT') AS level3
, S.a.value('(/H/r)[4]', 'INT') AS level4
, S.a.value('(/H/r)[5]', 'INT') AS level5
FROM (
SELECT *,CAST (N'<H><r>' + REPLACE(levelsFeed, ',', '</r><r>') + '</r> </H>' AS XML) AS [vals]
FROM Feedbacks
) as d
CROSS APPLY d.[vals].nodes('/H/r') S(a)
Solution 2: using Split function and pivot. (the split function split a string to rows with the column name Data)
SELECT FeedbackID, [1],[2],[3],[4],[5]
FROM (
SELECT *, ROW_NUMBER() OVER (PARTITION BY feedbackID ORDER BY (SELECT null)) as rn
FROM (
SELECT FeedbackID, levelsFeed
FROM Feedbacks
) as a
CROSS APPLY dbo.Split(levelsFeed, ',')
) as SourceTable
PIVOT
(
MAX(data)
FOR rn IN ([1],[2],[3],[4],[5])
)as pivotTable
Solution 3: using string manipulations functions - fastest by small margin over solution 2
SELECT FeedbackID,
SUBSTRING(levelsFeed,0,CHARINDEX(',',levelsFeed)) AS level1,
PARSENAME(REPLACE(SUBSTRING(levelsFeed,CHARINDEX(',',levelsFeed)+1,LEN(levelsFeed)),',','.'),4) AS level2,
PARSENAME(REPLACE(SUBSTRING(levelsFeed,CHARINDEX(',',levelsFeed)+1,LEN(levelsFeed)),',','.'),3) AS level3,
PARSENAME(REPLACE(SUBSTRING(levelsFeed,CHARINDEX(',',levelsFeed)+1,LEN(levelsFeed)),',','.'),2) AS level4,
PARSENAME(REPLACE(SUBSTRING(levelsFeed,CHARINDEX(',',levelsFeed)+1,LEN(levelsFeed)),',','.'),1) AS level5
FROM Feedbacks
since the levelsFeed contains 5 string values i needed to use the substring function for the first string.
i hope that my solution will help other that got to this thread looking for a more complex split to columns methods
Using instring function :)
select Value,
substring(String,1,instr(String," ") -1) Fname,
substring(String,instr(String,",") +1) Sname
from tablename;
Used two functions,
1. substring(string, position, length) ==> returns string from positon to length
2. instr(string,pattern) ==> returns position of pattern.
If we don’t provide length argument in substring it returns until end of string
This worked for me
CREATE FUNCTION [dbo].[SplitString](
#delimited NVARCHAR(MAX),
#delimiter NVARCHAR(100)
) RETURNS #t TABLE ( val NVARCHAR(MAX))
AS
BEGIN
DECLARE #xml XML
SET #xml = N'<t>' + REPLACE(#delimited,#delimiter,'</t><t>') + '</t>'
INSERT INTO #t(val)
SELECT r.value('.','varchar(MAX)') as item
FROM #xml.nodes('/t') as records(r)
RETURN
END
mytable:
Value ColOne
--------------------
1 Cleo, Smith
The following should work if there aren't too many columns
ALTER TABLE mytable ADD ColTwo nvarchar(256);
UPDATE mytable SET ColTwo = LEFT(ColOne, Charindex(',', ColOne) - 1);
--'Cleo' = LEFT('Cleo, Smith', Charindex(',', 'Cleo, Smith') - 1)
UPDATE mytable SET ColTwo = REPLACE(ColOne, ColTwo + ',', '');
--' Smith' = REPLACE('Cleo, Smith', 'Cleo' + ',')
UPDATE mytable SET ColOne = REPLACE(ColOne, ',' + ColTwo, ''), ColTwo = LTRIM(ColTwo);
--'Cleo' = REPLACE('Cleo, Smith', ',' + ' Smith', '')
Result:
Value ColOne ColTwo
--------------------
1 Cleo Smith
DECLARE #INPUT VARCHAR (MAX)='N,A,R,E,N,D,R,A'
DECLARE #ELIMINATE_CHAR CHAR (1)=','
DECLARE #L_START INT=1
DECLARE #L_END INT=(SELECT LEN (#INPUT))
DECLARE #OUTPUT CHAR (1)
WHILE #L_START <=#L_END
BEGIN
SET #OUTPUT=(SUBSTRING (#INPUT,#L_START,1))
IF #OUTPUT!=#ELIMINATE_CHAR
BEGIN
PRINT #OUTPUT
END
SET #L_START=#L_START+1
END
You may find the solution in SQL User Defined Function to Parse a Delimited String helpful (from The Code Project).
This is the code part from this page:
CREATE FUNCTION [fn_ParseText2Table]
(#p_SourceText VARCHAR(MAX)
,#p_Delimeter VARCHAR(100)=',' --default to comma delimited.
)
RETURNS #retTable
TABLE([Position] INT IDENTITY(1,1)
,[Int_Value] INT
,[Num_Value] NUMERIC(18,3)
,[Txt_Value] VARCHAR(MAX)
,[Date_value] DATETIME
)
AS
/*
********************************************************************************
Purpose: Parse values from a delimited string
& return the result as an indexed table
Copyright 1996, 1997, 2000, 2003 Clayton Groom (Clayton_Groom#hotmail.com)
Posted to the public domain Aug, 2004
2003-06-17 Rewritten as SQL 2000 function.
Reworked to allow for delimiters > 1 character in length
and to convert Text values to numbers
2016-04-05 Added logic for date values based on "new" ISDATE() function, Updated to use XML approach, which is more efficient.
********************************************************************************
*/
BEGIN
DECLARE #w_xml xml;
SET #w_xml = N'<root><i>' + replace(#p_SourceText, #p_Delimeter,'</i><i>') + '</i></root>';
INSERT INTO #retTable
([Int_Value]
, [Num_Value]
, [Txt_Value]
, [Date_value]
)
SELECT CASE
WHEN ISNUMERIC([i].value('.', 'VARCHAR(MAX)')) = 1
THEN CAST(CAST([i].value('.', 'VARCHAR(MAX)') AS NUMERIC) AS INT)
END AS [Int_Value]
, CASE
WHEN ISNUMERIC([i].value('.', 'VARCHAR(MAX)')) = 1
THEN CAST([i].value('.', 'VARCHAR(MAX)') AS NUMERIC(18, 3))
END AS [Num_Value]
, [i].value('.', 'VARCHAR(MAX)') AS [txt_Value]
, CASE
WHEN ISDATE([i].value('.', 'VARCHAR(MAX)')) = 1
THEN CAST([i].value('.', 'VARCHAR(MAX)') AS DATETIME)
END AS [Num_Value]
FROM #w_xml.nodes('//root/i') AS [Items]([i]);
RETURN;
END;
GO
ALTER function get_occurance_index(#delimiter varchar(1),#occurence int,#String varchar(100))
returns int
AS Begin
--Declare #delimiter varchar(1)=',',#occurence int=2,#String varchar(100)='a,b,c'
Declare #result int
;with T as (
select 1 Rno,0 as row, charindex(#delimiter, #String) pos,#String st
union all
select Rno+1,pos + 1, charindex(#delimiter, #String, pos + 1), #String
from T
where pos > 0
)
select #result=pos
from T
where pos > 0 and rno = #occurence
return isnull(#result,0)
ENd
declare #data as table (data varchar(100))
insert into #data values('1,2,3')
insert into #data values('aaa,bbbbb,cccc')
select top 3 Substring (data,0,dbo.get_occurance_index( ',',1,data)) ,--First Record always starts with 0
Substring (data,dbo.get_occurance_index( ',',1,data)+1,dbo.get_occurance_index( ',',2,data)-dbo.get_occurance_index( ',',1,data)-1) ,
Substring (data,dbo.get_occurance_index( ',',2,data)+1,len(data)) , -- Last record cant be more than len of actual data
data
From #data
I found that using PARSENAME as above caused any name with a period to get nulled.
So if there was an initial or a title in the name followed by a dot they return NULL.
I found this worked for me:
SELECT
REPLACE(SUBSTRING(FullName, 1,CHARINDEX(',', FullName)), ',','') as Name,
REPLACE(SUBSTRING(FullName, CHARINDEX(',', FullName), LEN(FullName)), ',', '') as Surname
FROM Table1
it is so easy, you can take it by below query:
DECLARE #str NVARCHAR(MAX)='ControlID_05436b78-04ba-9667-fa01-9ff8c1b7c235,3'
SELECT LEFT(#str, CHARINDEX(',',#str)-1),RIGHT(#str,LEN(#str)-(CHARINDEX(',',#str)))
select distinct modelFileId,F4.*
from contract
cross apply (select XmlList=convert(xml, '<x>'+replace(modelFileId,';','</x><x>')+'</x>').query('.')) F2
cross apply (select mfid1=XmlNode.value('/x[1]','varchar(512)')
,mfid2=XmlNode.value('/x[2]','varchar(512)')
,mfid3=XmlNode.value('/x[3]','varchar(512)')
,mfid4=XmlNode.value('/x[4]','varchar(512)') from XmlList.nodes('x') F3(XmlNode)) F4
where modelFileId like '%;%'
order by modelFileId
Select distinct PROJ_UID,PROJ_NAME,RES_UID from E2E_ProjectWiseTimesheetActuals
where CHARINDEX(','+cast(PROJ_UID as varchar(8000))+',', #params) > 0 and CHARINDEX(','+cast(RES_UID as varchar(8000))+',', #res) > 0
I re-wrote an answer above and made it better:
CREATE FUNCTION [dbo].[CSVParser]
(
#s VARCHAR(255),
#idx NUMERIC
)
RETURNS VARCHAR(12)
BEGIN
DECLARE #comma int
SET #comma = CHARINDEX(',', #s)
WHILE 1=1
BEGIN
IF #comma=0
IF #idx=1
RETURN #s
ELSE
RETURN ''
IF #idx=1
BEGIN
DECLARE #word VARCHAR(12)
SET #word=LEFT(#s, #comma - 1)
RETURN #word
END
SET #s = RIGHT(#s,LEN(#s)-#comma)
SET #comma = CHARINDEX(',', #s)
SET #idx = #idx - 1
END
RETURN 'not used'
END
Example usage:
SELECT dbo.CSVParser(COLUMN, 1),
dbo.CSVParser(COLUMN, 2),
dbo.CSVParser(COLUMN, 3)
FROM TABLE
question is simple, but problem is hot :)
So I create some wrapper for string_split() which pivot result in more generic way. It's table function which returns values (nn, value1, value2, ... , value50) - enough for most CSV lines. If there are more values, they will wrap to next line - nn indicate line number. Set third parameter #columnCnt = [yourNumber] to wrap at specific position:
alter FUNCTION fn_Split50
(
#str varchar(max),
#delim char(1),
#columnCnt int = 50
)
RETURNS TABLE
AS
RETURN
(
SELECT *
FROM (SELECT
nn = (nn - 1) / #columnCnt + 1,
nnn = 'value' + cast(((nn - 1) % #columnCnt) + 1 as varchar(10)),
value
FROM (SELECT
nn = ROW_NUMBER() over (order by (select null)),
value
FROM string_split(#str, #delim) aa
) aa
where nn > 0
) bb
PIVOT
(
max(value)
FOR nnn IN (
value1, value2, value3, value4, value5, value6, value7, value8, value9, value10,
value11, value12, value13, value14, value15, value16, value17, value18, value19, value20,
value21, value22, value23, value24, value25, value26, value27, value28, value29, value30,
value31, value32, value33, value34, value35, value36, value37, value38, value39, value40,
value41, value42, value43, value44, value45, value46, value47, value48, value49, value50
)
) AS PivotTable
)
Example of using:
select * from dbo.fn_split50('zz1,aa2,ss3,dd4,ff5', ',', DEFAULT)
select * from dbo.fn_split50('zz1,aa2,ss3,dd4,ff5,gg6,hh7,jj8,ww9,qq10', ',', 3)
select * from dbo.fn_split50('zz1,11,aa2,22,ss3,33,dd4,44,ff5,55,gg6,66,hh7,77,jj8,88,ww9,99,qq10,1010', ',',2)
Hope, it will helps :)

How to parse a string and create several columns from it?

I have a varchar(max) field containing Name Value pairs, in every line I have Name UnderScore Value.
I need to do a query against it so that it returns the Name, Value pairs in two columns (so by parsing the text, removing the underscore and the "new line" char.
So from this
select NameValue from Table
where I get this text:
Name1_Value1
Name2_Value2
Name3_Value3
I would like to have this output
Names Values
===== ======
Name1 Value1
Name2 Value2
Name3 Value3
SELECT substring(NameValue, 1, charindex('_', NameValue)-1) AS Names,
substring(NameValue, charindex('_', NameValue)+1, LEN(NameValue)) AS Values
FROM Table
EDIT:
Something like this put in a function or stored procedure combined with a temp table should work for more than one line, depending on the line delimiter you should also remove CHAR(13) before you start:
DECLARE #helper varchar(512)
DECLARE #current varchar(512)
SET #helper = NAMEVALUE
WHILE CHARINDEX(CHAR(10), #helper) > 0 BEGIN
SET #current = SUBSTRING(#helper, 1, CHARINDEX(CHAR(10), #helper)-1)
SELECT SUBSTRING(#current, 1, CHARINDEX('_', #current)-1) AS Names,
SUBSTRING(#current, CHARINDEX('_', #current)+1, LEN(#current)) AS Names
SET #helper = SUBSTRING(#helper, CHARINDEX(CHAR(10), #helper)+1, LEN(#helper))
END
SELECT SUBSTRING(#helper, 1, CHARINDEX('_', #helper)-1) AS Names,
SUBSTRING(#helper, CHARINDEX('_', #helper)+1, LEN(#helper)) AS Names
DECLARE #TExt NVARCHAR(MAX)= '***[ddd]***
dfdf
fdfdfdfdfdf
***[fff]***
4545445
45454
***[ahaASSDAD]***
DFDFDF
***[SOME TEXT]***
'
DECLARE #Delimiter VARCHAR(1000)= CHAR(13) + CHAR(10) ;
WITH numbers
AS ( SELECT ROW_NUMBER() OVER ( ORDER BY o.object_id, o2.object_id ) Number
FROM sys.objects o
CROSS JOIN sys.objects o2
),
c AS ( SELECT Number CHARBegin ,
ROW_NUMBER() OVER ( ORDER BY number ) RN
FROM numbers
WHERE SUBSTRING(#text, Number, LEN(#Delimiter)) = #Delimiter
),
res
AS ( SELECT CHARBegin ,
CAST(LEFT(#text, charbegin) AS NVARCHAR(MAX)) Res ,
RN
FROM c
WHERE rn = 1
UNION ALL
SELECT c.CHARBegin ,
CAST(SUBSTRING(#text, res.CHARBegin,
c.CHARBegin - res.CHARBegin) AS NVARCHAR(MAX)) ,
c.RN
FROM c
JOIN res ON c.RN = res.RN + 1
)
SELECT *
FROM res
He is an example that you can use:
-- Creating table:
create table demo (dID int, dRec varchar(100));
-- Inserting records:
insert into demo (dID, dRec) values (1, 'BCQP1 Sam');
insert into demo (dID, dRec) values (2, 'BCQP2 LD');
-- Selecting fields to retrive records:
select * from demo;
Then I want to show in one single row both rows combined and display only the values from the left removing the name on the right side up to the space character.
/*
The STUFF() function puts a string in another string, from an initial position.
The LEFT() function returns the left part of a character string with the specified number of characters.
The CHARINDEX() string function returns the starting position of the specified expression in a character string.
*/
SELECT
DISTINCT
STUFF((SELECT ' ' + LEFT(dt1.dRec, charindex(' ', dt1.dRec) - 1)
FROM demo dt1
ORDER BY dRec
FOR XML PATH('')), 1, 1, '') [Convined values]
FROM demo dt2
--
GROUP BY dt2.dID, dt2.dRec
ORDER BY 1
As you can see here when you run the function the output will be:
BCQP1 BCQP2
On the top of the script I explained what each function is used for (STUFF(), LEFT(), CHARINDEX() functions) I also used DISTINCT in order to eliminate duplicate values.
NOTE: dt stands for "demo table", I used the same table and use two alias dt1 and dt2, and dRec stands for "demo Record"
If you want to learn more about STUFF() Function here is a link:
https://www.mssqltips.com/sqlservertip/2914/rolling-up-multiple-rows-into-a-single-row-and-column-for-sql-server-data/
With a CTE you will have a problem with Recursion if more that 100 items
Msg 530, Level 16, State 1, Line 20 The statement terminated. The
maximum recursion 100 has been exhausted before statement completion.
DECLARE #TExt NVARCHAR(MAX)
SET #TExt = '100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203'
DECLARE #Delimiter VARCHAR(1000)= ',';
WITH numbers
AS ( SELECT ROW_NUMBER() OVER ( ORDER BY o.object_id, o2.object_id ) Number
FROM sys.objects o
CROSS JOIN sys.objects o2
),
c AS ( SELECT Number CHARBegin ,
ROW_NUMBER() OVER ( ORDER BY number ) RN
FROM numbers
WHERE SUBSTRING(#text, Number, LEN(#Delimiter)) = #Delimiter
),
res
AS ( SELECT CHARBegin ,
CAST(LEFT(#text, charbegin) AS NVARCHAR(MAX)) Res ,
RN
FROM c
WHERE rn = 1
UNION ALL
SELECT c.CHARBegin ,
CAST(SUBSTRING(#text, res.CHARBegin,
c.CHARBegin - res.CHARBegin) AS NVARCHAR(MAX)) ,
c.RN
FROM c
JOIN res ON c.RN = res.RN + 1
)
SELECT *
FROM res

Find non-ASCII characters in varchar columns using SQL Server

How can rows with non-ASCII characters be returned using SQL Server?
If you can show how to do it for one column would be great.
I am doing something like this now, but it is not working
select *
from Staging.APARMRE1 as ar
where ar.Line like '%[^!-~ ]%'
For extra credit, if it can span all varchar columns in a table, that would be outstanding! In this solution, it would be nice to return three columns:
The identity field for that record. (This will allow the whole record to be reviewed with another query.)
The column name
The text with the invalid character
Id | FieldName | InvalidText |
----+-----------+-------------------+
25 | LastName | Solís |
56 | FirstName | François |
100 | Address1 | 123 Ümlaut street |
Invalid characters would be any outside the range of SPACE (3210) through ~ (12710)
Here is a solution for the single column search using PATINDEX.
It also displays the StartPosition, InvalidCharacter and ASCII code.
select line,
patindex('%[^ !-~]%' COLLATE Latin1_General_BIN,Line) as [Position],
substring(line,patindex('%[^ !-~]%' COLLATE Latin1_General_BIN,Line),1) as [InvalidCharacter],
ascii(substring(line,patindex('%[^ !-~]%' COLLATE Latin1_General_BIN,Line),1)) as [ASCIICode]
from staging.APARMRE1
where patindex('%[^ !-~]%' COLLATE Latin1_General_BIN,Line) >0
I've been running this bit of code with success
declare #UnicodeData table (
data nvarchar(500)
)
insert into
#UnicodeData
values
(N'Horse�')
,(N'Dog')
,(N'Cat')
select
data
from
#UnicodeData
where
data collate LATIN1_GENERAL_BIN != cast(data as varchar(max))
Which works well for known columns.
For extra credit, I wrote this quick script to search all nvarchar columns in a given table for Unicode characters.
declare
#sql varchar(max) = ''
,#table sysname = 'mytable' -- enter your table here
;with ColumnData as (
select
RowId = row_number() over (order by c.COLUMN_NAME)
,c.COLUMN_NAME
,ColumnName = '[' + c.COLUMN_NAME + ']'
,TableName = '[' + c.TABLE_SCHEMA + '].[' + c.TABLE_NAME + ']'
from
INFORMATION_SCHEMA.COLUMNS c
where
c.DATA_TYPE = 'nvarchar'
and c.TABLE_NAME = #table
)
select
#sql = #sql + 'select FieldName = ''' + c.ColumnName + ''', InvalidCharacter = [' + c.COLUMN_NAME + '] from ' + c.TableName + ' where ' + c.ColumnName + ' collate LATIN1_GENERAL_BIN != cast(' + c.ColumnName + ' as varchar(max)) ' + case when c.RowId <> (select max(RowId) from ColumnData) then ' union all ' else '' end + char(13)
from
ColumnData c
-- check
-- print #sql
exec (#sql)
I'm not a fan of dynamic SQL but it does have its uses for exploratory queries like this.
try something like this:
DECLARE #YourTable table (PK int, col1 varchar(20), col2 varchar(20), col3 varchar(20));
INSERT #YourTable VALUES (1, 'ok','ok','ok');
INSERT #YourTable VALUES (2, 'BA'+char(182)+'D','ok','ok');
INSERT #YourTable VALUES (3, 'ok',char(182)+'BAD','ok');
INSERT #YourTable VALUES (4, 'ok','ok','B'+char(182)+'AD');
INSERT #YourTable VALUES (5, char(182)+'BAD','ok',char(182)+'BAD');
INSERT #YourTable VALUES (6, 'BAD'+char(182),'B'+char(182)+'AD','BAD'+char(182)+char(182)+char(182));
--if you have a Numbers table use that, other wise make one using a CTE
WITH AllNumbers AS
( SELECT 1 AS Number
UNION ALL
SELECT Number+1
FROM AllNumbers
WHERE Number<1000
)
SELECT
pk, 'Col1' BadValueColumn, CONVERT(varchar(20),col1) AS BadValue --make the XYZ in convert(varchar(XYZ), ...) the largest value of col1, col2, col3
FROM #YourTable y
INNER JOIN AllNumbers n ON n.Number <= LEN(y.col1)
WHERE ASCII(SUBSTRING(y.col1, n.Number, 1))<32 OR ASCII(SUBSTRING(y.col1, n.Number, 1))>127
UNION
SELECT
pk, 'Col2' BadValueColumn, CONVERT(varchar(20),col2) AS BadValue --make the XYZ in convert(varchar(XYZ), ...) the largest value of col1, col2, col3
FROM #YourTable y
INNER JOIN AllNumbers n ON n.Number <= LEN(y.col2)
WHERE ASCII(SUBSTRING(y.col2, n.Number, 1))<32 OR ASCII(SUBSTRING(y.col2, n.Number, 1))>127
UNION
SELECT
pk, 'Col3' BadValueColumn, CONVERT(varchar(20),col3) AS BadValue --make the XYZ in convert(varchar(XYZ), ...) the largest value of col1, col2, col3
FROM #YourTable y
INNER JOIN AllNumbers n ON n.Number <= LEN(y.col3)
WHERE ASCII(SUBSTRING(y.col3, n.Number, 1))<32 OR ASCII(SUBSTRING(y.col3, n.Number, 1))>127
order by 1
OPTION (MAXRECURSION 1000);
OUTPUT:
pk BadValueColumn BadValue
----------- -------------- --------------------
2 Col1 BA¶D
3 Col2 ¶BAD
4 Col3 B¶AD
5 Col1 ¶BAD
5 Col3 ¶BAD
6 Col1 BAD¶
6 Col2 B¶AD
6 Col3 BAD¶¶¶
(8 row(s) affected)
This script searches for non-ascii characters in one column. It generates a string of all valid characters, here code point 32 to 127. Then it searches for rows that don't match the list:
declare #str varchar(128);
declare #i int;
set #str = '';
set #i = 32;
while #i <= 127
begin
set #str = #str + '|' + char(#i);
set #i = #i + 1;
end;
select col1
from YourTable
where col1 like '%[^' + #str + ']%' escape '|';
running the various solutions on some real world data - 12M rows varchar length ~30, around 9k dodgy rows, no full text index in play, the patIndex solution is the fastest, and it also selects the most rows.
(pre-ran km. to set the cache to a known state, ran the 3 processes, and finally ran km again - the last 2 runs of km gave times within 2 seconds)
patindex solution by Gerhard Weiss -- Runtime 0:38, returns 9144 rows
select dodgyColumn from myTable fcc
WHERE patindex('%[^ !-~]%' COLLATE Latin1_General_BIN,dodgyColumn ) >0
the substring-numbers solution by MT. -- Runtime 1:16, returned 8996 rows
select dodgyColumn from myTable fcc
INNER JOIN dbo.Numbers32k dn ON dn.number<(len(fcc.dodgyColumn ))
WHERE ASCII(SUBSTRING(fcc.dodgyColumn , dn.Number, 1))<32
OR ASCII(SUBSTRING(fcc.dodgyColumn , dn.Number, 1))>127
udf solution by Deon Robertson -- Runtime 3:47, returns 7316 rows
select dodgyColumn
from myTable
where dbo.udf_test_ContainsNonASCIIChars(dodgyColumn , 1) = 1
There is a user defined function available on the web 'Parse Alphanumeric'. Google UDF parse alphanumeric and you should find the code for it. This user defined function removes all characters that doesn't fit between 0-9, a-z, and A-Z.
Select * from Staging.APARMRE1 ar
where udf_parsealpha(ar.last_name) <> ar.last_name
That should bring back any records that have a last_name with invalid chars for you...though your bonus points question is a bit more of a challenge, but I think a case statement could handle it. This is a bit psuedo code, I'm not entirely sure if it'd work.
Select id, case when udf_parsealpha(ar.last_name) <> ar.last_name then 'last name'
when udf_parsealpha(ar.first_name) <> ar.first_name then 'first name'
when udf_parsealpha(ar.Address1) <> ar.last_name then 'Address1'
end,
case when udf_parsealpha(ar.last_name) <> ar.last_name then ar.last_name
when udf_parsealpha(ar.first_name) <> ar.first_name then ar.first_name
when udf_parsealpha(ar.Address1) <> ar.last_name then ar.Address1
end
from Staging.APARMRE1 ar
where udf_parsealpha(ar.last_name) <> ar.last_name or
udf_parsealpha(ar.first_name) <> ar.first_name or
udf_parsealpha(ar.Address1) <> ar.last_name
I wrote this in the forum post box...so I'm not quite sure if that'll function as is, but it should be close. I'm not quite sure how it will behave if a single record has two fields with invalid chars either.
As an alternative, you should be able to change the from clause away from a single table and into a subquery that looks something like:
select id,fieldname,value from (
Select id,'last_name' as 'fieldname', last_name as 'value'
from Staging.APARMRE1 ar
Union
Select id,'first_name' as 'fieldname', first_name as 'value'
from Staging.APARMRE1 ar
---(and repeat unions for each field)
)
where udf_parsealpha(value) <> value
Benefit here is for every column you'll only need to extend the union statement here, while you need to put that comparisson three times for every column in the case statement version of this script
To find which field has invalid characters:
SELECT * FROM Staging.APARMRE1 FOR XML AUTO, TYPE
You can test it with this query:
SELECT top 1 'char 31: '+char(31)+' (hex 0x1F)' field
from sysobjects
FOR XML AUTO, TYPE
The result will be:
Msg 6841, Level 16, State 1, Line 3 FOR XML could not serialize the
data for node 'field' because it contains a character (0x001F) which
is not allowed in XML. To retrieve this data using FOR XML, convert it
to binary, varbinary or image data type and use the BINARY BASE64
directive.
It is very useful when you write xml files and get error of invalid characters when validate it.
Here is a UDF I built to detectc columns with extended ascii charaters. It is quick and you can extended the character set you want to check. The second parameter allows you to switch between checking anything outside the standard character set or allowing an extended set:
create function [dbo].[udf_ContainsNonASCIIChars]
(
#string nvarchar(4000),
#checkExtendedCharset bit
)
returns bit
as
begin
declare #pos int = 0;
declare #char varchar(1);
declare #return bit = 0;
while #pos < len(#string)
begin
select #char = substring(#string, #pos, 1)
if ascii(#char) < 32 or ascii(#char) > 126
begin
if #checkExtendedCharset = 1
begin
if ascii(#char) not in (9,124,130,138,142,146,150,154,158,160,170,176,180,181,183,184,185,186,192,193,194,195,196,197,199,200,201,202,203,204,205,206,207,209,210,211,212,213,214,216,217,218,219,220,221,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,248,249,250,251,252,253,254,255)
begin
select #return = 1;
select #pos = (len(#string) + 1)
end
else
begin
select #pos = #pos + 1
end
end
else
begin
select #return = 1;
select #pos = (len(#string) + 1)
end
end
else
begin
select #pos = #pos + 1
end
end
return #return;
end
USAGE:
select Address1
from PropertyFile_English
where udf_ContainsNonASCIIChars(Address1, 1) = 1

Resources