Scramble/Obfuscate email values in SQL Server - sql-server

I would like to improve the following function to randomize the letters in an email column.
So far I have the following function but the output is not what I expected:
CREATE VIEW dbo.vwRandom
AS
SELECT RAND() as RandomValue;
GO
CREATE FUNCTION dbo.Character_Scramble
(
#OrigVal varchar(MAX)
)
RETURNS varchar(MAX)
WITH ENCRYPTION
AS
BEGIN
-- Variables used
DECLARE #NewVal VARCHAR(MAX);
DECLARE #OrigLen INT;
DECLARE #CurrLen INT;
DECLARE #LoopCt INT;
DECLARE #Rand INT;
-- Set variable default values
SET #NewVal = '';
SET #OrigLen = DATALENGTH(#OrigVal);
SET #CurrLen = #OrigLen;
SET #LoopCt = 1;
-- Loop through the characters passed
WHILE #LoopCt <= #OrigLen
BEGIN
-- Current length of possible characters
SET #CurrLen = DATALENGTH(CHARINDEX('#', #OrigVal));
-- Random position of character to use
SELECT
#Rand = Convert(int,(((1) - #CurrLen) *
RandomValue + #CurrLen))
FROM
dbo.vwRandom;
-- Assembles the value to be returned
SET #NewVal =
SUBSTRING(#OrigVal,#Rand,1) + #NewVal;
-- Removes the character from available options
SET #OrigVal =
Replace(#OrigVal,SUBSTRING(#Origval,#Rand,1),'');
-- Advance the loop="color:black">
SET #LoopCt = #LoopCt + 1;
END
-- Returns new value
Return LOWER(#NewVal);
END
GO
The output returned by the function is:
SELECT dbo.Character_Scramble('waltero.lukase#gmail.com')
-- output: vmgoli.#cuares
The output I want would be to respect the length of the word and the position of the symbols (., #, _, etc.).
SELECT dbo.Character_Scramble('waltero.lukase#gmail.com')
-- pkderso.modefk#poajf.lpd
Any help would help me enormously.
Thank you!

Maybe not exactly what you are doing, but the solution randomize the sequence of the alphabet set a-z and then use translate() to translate from normal sequence (a to z) to the randomize sequence
Due to string_agg() constraint, it cannot have two order by in the same scope, 2 separate CTE is used to generate the #normal and #scramble
declare #original varchar(100) = 'waltero.lukase#gmail.com';
declare #normal varchar(26),
#scramble varchar(26);
with chars as
(
select *
from
(
values ('a'), ('b'), ('c'), ('d'), ('e'), ('f'), ('g'), ('h'), ('i'), ('j'),
('k'), ('l'), ('m'), ('n'), ('o'), ('p'), ('q'), ('r'), ('s'), ('t'),
('u'), ('v'), ('w'), ('x'), ('y'), ('z')
) a (a)
),
normal as
(
-- normal ordering : a to z
select normal = string_agg(a, '') within group (order by a)
from chars
),
scramble as
(
-- randomize ordering
select scramble = string_agg(a, '') within group (order by newid())
from chars
)
select #normal = normal,
#scramble = scramble
from normal cross join scramble
select original = #original,
scrambled = translate(#original, #normal, #scramble);
UPDATE:
to apply this to your table, just add the FROM to the end of the query and change #original to your table name
with chars as
. . .
select original = email,
scrambled = translate(email, #normal, #scramble)
from yourtable
db<>fiddle demo

Related

Searching for multiple patterns in a string in T-SQL

In t-sql my dilemma is that I have to parse a potentially long string (up to 500 characters) for any of over 230 possible values and remove them from the string for reporting purposes. These values are a column in another table and they're all upper case and 4 characters long with the exception of two that are 5 characters long.
Examples of these values are:
USFRI
PROME
AZCH
TXJS
NYDS
XVIV. . . . .
Example of string before:
"Offered to XVIV and USFRI as back ups. No response as of yet."
Example of string after:
"Offered to and as back ups. No response as of yet."
Pretty sure it will have to be a UDF but I'm unable to come up with anything other than stripping ALL the upper case characters out of the string with PATINDEX which is not the objective.
This is unavoidably cludgy but one way is to split your string into rows, once you have a set of words the rest is easy; Simply re-aggregate while ignoring the matching values*:
with t as (
select 'Offered to XVIV and USFRI as back ups. No response as of yet.' s
union select 'Another row AZCH and TXJS words.'
), v as (
select * from (values('USFRI'),('PROME'),('AZCH'),('TXJS'),('NYDS'),('XVIV'))v(v)
)
select t.s OriginalString, s.Removed
from t
cross apply (
select String_Agg(j.[value], ' ') within group(order by Convert(tinyint,j.[key])) Removed
from OpenJson(Concat('["',replace(s, ' ', '","'),'"]')) j
where not exists (select * from v where v.v = j.[value])
)s;
* Requires a fully-supported version of SQL Server.
build a function to do the cleaning of one sentence, then call that function from your query, something like this SELECT Col1, dbo.fn_ReplaceValue(Col1) AS cleanValue, * FROM MySentencesTable. Your fn_ReplaceValue will be something like the code below, you could also create the table variable outside the function and pass it as parameter to speed up the process, but this way is all self contained.
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE FUNCTION fn_ReplaceValue(#sentence VARCHAR(500))
RETURNS VARCHAR(500)
AS
BEGIN
DECLARE #ResultVar VARCHAR(500)
DECLARE #allValues TABLE (rowID int, sValues VARCHAR(15))
DECLARE #id INT = 0
DECLARE #ReplaceVal VARCHAR(10)
DECLARE #numberOfValues INT = (SELECT COUNT(*) FROM MyValuesTable)
--Populate table variable with all values
INSERT #allValues
SELECT ROW_NUMBER() OVER(ORDER BY MyValuesCol) AS rowID, MyValuesCol
FROM MyValuesTable
SET #ResultVar = #sentence
WHILE (#id <= #numberOfValues)
BEGIN
SET #id = #id + 1
SET #ReplaceVal = (SELECT sValue FROM #allValues WHERE rowID = #id)
SET #ResultVar = REPLACE(#ResultVar, #ReplaceVal, SPACE(0))
END
RETURN #ResultVar
END
GO
I suggest creating a table (either temporary or permanent), and loading these 230 string values into this table. Then use it in the following delete:
DELETE
FROM yourTable
WHERE col IN (SELECT col FROM tempTable);
If you just want to view your data sans these values, then use:
SELECT *
FROM yourTable
WHERE col NOT IN (SELECT col FROM tempTable);

How to check if a list of comma separated string values match an integer?

Below I've added the SQL query.
I wanted retrieve the list of records that match a condition. I pass integer values into #ClassID and #SectionID parameters, The problem is ce.Class_ID and ce.Section_ID are lists of comma-separated string values.
SELECT ce.ID AS CircularEntryCount
FROM dbo.CircularEntry ce
WHERE ce.AcademicYearID = 1
AND (ce.Circular_Date = #CurrentDate OR CAST(ce.Created_Date AS date) = #CurrentDate)
AND (ce.CircularApplicableForID = 1 OR ce.CircularApplicableForID = 3)
AND (ce.Class_ID = #ClassID OR ce.Class_ID = '0')
AND (ce.Section_ID = #SectionID OR ce.Section_ID = '0')
PS: I used split string function to split the values into individual columns and compared the same with the parameters, but it shows.
Error converting data type nvarchar to bigint
(
#List nvarchar(2000),
#SplitOn nvarchar(1)
)
RETURNS #RtnValue table (
Id int identity(1,1),
Value nvarchar(100)
)
AS
BEGIN
While (Charindex(#SplitOn,#List)>0)
Begin
Insert Into #RtnValue (value)
Select
Value = ltrim(rtrim(Substring(#List,1,Charindex(#SplitOn,#List)-1)))
Set #List = Substring(#List,Charindex(#SplitOn,#List)+len(#SplitOn),len(#List))
End
Insert Into #RtnValue (Value)
Select Value = ltrim(rtrim(#List))
Return
EN
The correct solution is to fix the problem - which means changing the structure of the database to not store delimited strings at all, but instead normalize the data and use foreign keys.
For more information, read Is storing a delimited list in a database column really that bad?, and not only the accepted answer by Bill Karwin, but other answers as well.
In case you can't change the database structure, you can use a workaround using like:
SELECT ce.ID AS CircularEntryCount
FROM dbo.CircularEntry ce
WHERE ce.AcademicYearID = 1
AND (ce.Circular_Date = #CurrentDate OR CAST(ce.Created_Date AS date) = #CurrentDate)
AND (ce.CircularApplicableForID = 1 OR ce.CircularApplicableForID = 3)
AND (','+ ce.Class_ID +',' LIKE '%,'+ CAST(#ClassID as varchar(20)) +'%,' OR ce.Class_ID = '0')
AND (','+ ce.Section_ID +',' LIKE '%,'+ CAST(#SectionID as varchar(20)) +'%,' OR ce.Section_ID = '0')
Note the cast to varchar(20) - bigint's min value contains a minus sign and 19 digits. If the data type of #ClassID or #SectionID is int, you can cast to varchar(11) instead.

TSQL: How insert separator between each character in a string

I have a string like this:
Apple
I want to include a separator after each character so the end result will turn out like this:
A,p,p,l,e
In C#, we have one liner method to achieve the above with Regex.Replace('Apple', ".{1}", "$0,");
I can only think of looping each character with charindex to append the separator but seems a little complicated. Is there any elegant way and simpler way to achieve this?
Thanks HABO for the suggestions. I'm able to generate the result that I want using the code but takes a little bit of time to really understand how the code work.
After some searching, I manage to found one useful article to insert empty spaces between each character and it's easier for me to understand.
I modify the code a little to define and include desire separator instead of fixing it to space as the separator:
DECLARE #pos INT = 2 -- location where we want first space
DECLARE #result VARCHAR(100) = 'Apple'
DECLARE #separator nvarchar(5) = ','
WHILE #pos < LEN(#result)+1
BEGIN
SET #result = STUFF(#result, #pos, 0, #separator);
SET #pos = #pos+2;
END
select #result; -- Output: A,p,p,l,e
Reference
In following SQL scripts, I get each character using SUBSTRING() function using with a number table (basically I used spt_values view here for simplicity) and then I concatenate them via two different methods, you can choose one
If you are using SQL Server 2017, we have a new SQL string aggregation function
First script uses string_agg function
declare #str nvarchar(max) = 'Apple'
SELECT
string_agg( substring(#str,number,1) , ',') Within Group (Order By number)
FROM master..spt_values n
WHERE
Type = 'P' and
Number between 1 and len(#str)
If you are working with a previous version, you can use string concatenation using FOR XML Path and SQL Stuff function as follows
declare #str nvarchar(max) = 'Apple'
; with cte as (
SELECT
number,
substring(#str,number,1) as L
FROM master..spt_values n
WHERE
Type = 'P' and
Number between 1 and len(#str)
)
SELECT
STUFF(
(
SELECT
',' + L
FROM cte
order by number
FOR XML PATH('')
), 1, 1, ''
)
Both solution yields the same result, I hope it helps
If you have SQL Server 2017 and a copy of ngrams8k it's ultra simple:
declare #word varchar(100) = 'apple';
select newString = string_agg(token, ',') within group (order by position)
from dbo.ngrams8k(#word,1);
For pre-2017 systems it's almost as simple:
declare #word varchar(100) = 'apple';
select newstring =
( select token + case len(#word)+1-position when 1 then '' else ',' end
from dbo.ngrams8k(#word,1)
order by position
for xml path(''))
One ugly way to do it is to split the string into characters, ideally using a numbers table, and reassemble it with the desired separator.
A less efficient implementation uses recursion in a CTE to split the characters and insert the separator between pairs of characters as it goes:
declare #Sample as VarChar(20) = 'Apple';
declare #Separator as Char = ',';
with Characters as (
select 1 as Position, Substring( #Sample, 1, 1 ) as Character
union all
select Position + 1,
case when Position & 1 = 1 then #Separator else Substring( #Sample, Position / 2 + 1, 1 ) end
from Characters
where Position < 2 * Len( #Sample ) - 1 )
select Stuff( ( select Character + '' from Characters order by Position for XML Path( '' ) ), 1, 0, '' ) as Result;
You can replace the select Stuff... line with select * from Characters; to see what's going on.
Try this
declare #var varchar(50) ='Apple'
;WITH CTE
AS
(
SELECT
SeqNo = 1,
MyStr = #var,
OpStr = CAST('' AS VARCHAR(50))
UNION ALL
SELECT
SeqNo = SeqNo+1,
MyStr = MyStR,
OpStr = CAST(ISNULL(OpStr,'')+SUBSTRING(MyStR,SeqNo,1)+',' AS VARCHAR(50))
FROM CTE
WHERE SeqNo <= LEN(#var)
)
SELECT
OpStr = LEFT(OpStr,LEN(OpStr)-1)
FROM CTE
WHERE SeqNo = LEN(#Var)+1

How to find UPPER case characters in a string and replace them with a space with SQL or SSRS

I have a column which has string values with mixed upper and lower case characters like (AliBabaSaidHello). I want to use this column values for my SSRS table cell headers like (Ali Baba Said Hello). First, I like to find each UPPER case letter and add space to it.
Ascii 65-90 tip was helpful for creating below code for a function:
declare #Reset bit;
declare #Ret varchar(8000);
declare #i int;
declare #c char(1);
select #Reset = 1, #i=1, #Ret = '';
while (#i <= len('AliBabaSaidHello'))
select #c= substring('AliBabaSaidHello',#i,1),
#Reset = case when ascii(#c) between 65 and 90 then 1 else 0 end,
#Ret = #Ret + case when #Reset=1 then ' ' + #c else #c end,
#i = #i +1
select #Ret
Thanks all, after Reading all the answers, I created this flexible and very efficient function:
FUNCTION dbo.UDF_DelimitersForCases (#string NVARCHAR(MAX), #Delimiter char(1))
RETURNS NVARCHAR(MAX)
AS
BEGIN
DECLARE #len INT = LEN(#string)
,#iterator INT = 2 --Don't put space to left of first even if it's a capital
;
WHILE #iterator <= LEN(#string)
BEGIN
IF PATINDEX('[ABCDEFGHIJKLMNOPQRSTUVWXYZ]',SUBSTRING(#string,#iterator,1) COLLATE Latin1_General_CS_AI) <> 0
BEGIN
SET #string = STUFF(#string,#iterator,0,#Delimiter);
SET #iterator += 1;
END
;
SET #iterator += 1;
END
RETURN #string;
END
;
GO
Example:
SELECT dbo.udf_DelimitersForCases('AliBabaSaidHello','_');
Returns "Ali_Baba_Said_Hello" (no quotes).
get chars one by one like "A" , "l" , "i", and look whether returning value of method ascii('&i_char') is between 65 and 90, those are "capital letters".
( ascii('A')=65(capital), ascii('l')=108(non-capital), ascii('i')=105(non-capital) )
Use case sensitive collation for your qry and combine with like for each of character. When you itterate characters you can easily replace upper characters for upper char + space.
WHERE SourceText COLLATE Latin1_General_CS_AI like '[A-Z]'
-- or for variable #char COLLATE Latin1_General_CS_AI = upper(#char)
The important in Latin1_General_CS_AI where "CS" is Case sensitive.
If you want to make this reusable for some reason, here's the code to make a user function to call.
DROP FUNCTION IF EXISTS udf_SpacesforCases;
GO
CREATE FUNCTION udf_SpacesForCases (#string NVARCHAR(MAX))
RETURNS NVARCHAR(MAX)
AS
BEGIN
DECLARE #len INT = LEN(#string)
,#iterator INT = 2 --Don't put space to left of first even if it's a capital
;
WHILE #iterator <= LEN(#string)
BEGIN
IF PATINDEX('[ABCDEFGHIJKLMNOPQRSTUVWXYZ]',SUBSTRING(#string,#iterator,1) COLLATE Latin1_General_CS_AI) <> 0
BEGIN
SET #string = STUFF(#string,#iterator,0,' ');
SET #iterator += 1;
END
;
SET #iterator += 1;
END
RETURN #string;
END
;
GO
SELECT dbo.udf_SpacesForCases('AliBabaSaidHello');
Any solution that involves a scalar user defined function and/or a loop will not perform as well as a set-based solution. This is a cake walk using using NGrams8K:
DECLARE #string varchar(1000) = 'AliBabaSaidHello';
SELECT newString =
( SELECT
CASE
WHEN ASCII(token) BETWEEN 65 AND 90 AND position > 1 THEN ' '+token ELSE token
END+''
FROM dbo.NGrams8k(#string, 1)
FOR XML PATH(''));
Returns: "Ali Baba Said Hello" (no quotes).
Note that the there is not a space before the first character. Alternatively, a set-based solution that doesn't use the function would look like this:
WITH
E1(N) AS (SELECT 1 FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1))t(c)),
iTally(N) AS
(
SELECT TOP (LEN(#string)) ROW_NUMBER() OVER (ORDER BY (SELECT 1))
FROM E1 a, E1 b, E1 c, E1 d
),
nGrams(NewString) AS
(
SELECT
CASE WHEN ASCII(SUBSTRING(#string, N, 1)) BETWEEN 65 AND 90 AND N > 1
THEN ' '+SUBSTRING(#string, N, 1) ELSE SUBSTRING(#string, N, 1)
END+''
FROM iTally
FOR XML PATH('')
)
SELECT NewString
FROM nGrams;
The APL approach is to split the input into characters, pad the characters as needed, then reassemble the string. In T-SQL it would look rather like this:
-- Sample data.
declare #Samples as Table ( Sample VarChar(32) );
insert into #Samples ( Sample ) values ( 'AliBabaSaidHello' ), ( 'MeshMuscleShirt' );
select * from #Samples;
-- Stuff it.
with
Ten ( Number ) as ( select Number from ( values (0), (1), (2), (3), (4), (5), (6), (7), (8), (9) ) as Digits( Number ) ),
TenUp2 ( Number ) as ( select 42 from Ten as L cross join Ten as R ),
TenUp4 ( Number ) as ( select 42 from TenUp2 as L cross join TenUp2 as R ),
Numbers ( Number ) as ( select Row_Number() over ( order by ( select NULL ) ) from TenUp4 ),
Characters ( Sample, Number, PaddedCh ) as (
select S.Sample, N.Number, PC.PaddedCh
from #Samples as S inner join
Numbers as N on N.Number <= Len( S.Sample ) cross apply
( select SubString( S.Sample, N.Number, 1 ) as Ch ) as SS cross apply
( select case when N.Number > 1 and ASCII( 'A' ) <= ASCII( SS.Ch ) and ASCII( SS.Ch ) <= ASCII( 'Z' ) then ' ' + Ch else Ch end as PaddedCh ) as PC )
select S.Sample,
( select PaddedCh from Characters where Sample = S.Sample order by Number for XML path(''), type).value('.[1]', 'VarChar(max)' ) as PaddedSample
from #Samples as S
order by Sample;
Another option (quite verbose) might be:
SELECT REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE('AliBabaSaidHello' COLLATE Latin1_General_CS_AS,'A',' A'),'B',' B'),'C',' C'),'D',' D'),'E',' E'),'F',' F'),'G',' G'),'H',' H'),'I',' I'),'J',' J'),'K',' K'),'L',' L'),'M',' M'),'N',' N'),'O',' O'),'P',' P'),'Q',' Q'),'R',' R'),'S',' S'),'T',' T'),'U',' U'),'V',' V'),'W',' W'),'X',' X'),'Y',' Y'),'Z',' Z')

User defined function replacing WHERE col IN(...)

I have created a user defined function to gain performance with queries containing 'WHERE col IN (...)' like this case:
SELECT myCol1, myCol2
FROM myTable
WHERE myCol3 IN (100, 200, 300, ..., 4900, 5000);
The queries are generated from an web application and are in some cases much more complex.
The function definition looks like this:
CREATE FUNCTION [dbo].[udf_CSVtoIntTable]
(
#CSV VARCHAR(MAX),
#Delimiter CHAR(1) = ','
)
RETURNS
#Result TABLE
(
[Value] INT
)
AS
BEGIN
DECLARE #CurrStartPos SMALLINT;
SET #CurrStartPos = 1;
DECLARE #CurrEndPos SMALLINT;
SET #CurrEndPos = 1;
DECLARE #TotalLength SMALLINT;
-- Remove space, tab, linefeed, carrier return
SET #CSV = REPLACE(#CSV, ' ', '');
SET #CSV = REPLACE(#CSV, CHAR(9), '');
SET #CSV = REPLACE(#CSV, CHAR(10), '');
SET #CSV = REPLACE(#CSV, CHAR(13), '');
-- Add extra delimiter if needed
IF NOT RIGHT(#CSV, 1) = #Delimiter
SET #CSV = #CSV + #Delimiter;
-- Get total string length
SET #TotalLength = LEN(#CSV);
WHILE #CurrStartPos < #TotalLength
BEGIN
SET #CurrEndPos = CHARINDEX(#Delimiter, #CSV, #CurrStartPos);
INSERT INTO #Result
VALUES (CAST(SUBSTRING(#CSV, #CurrStartPos, #CurrEndPos - #CurrStartPos) AS INT));
SET #CurrStartPos = #CurrEndPos + 1;
END
RETURN
END
The function is intended to be used like this (or as an INNER JOIN):
SELECT myCol1, myCol2
FROM myTable
WHERE myCol3 IN (
SELECT [Value]
FROM dbo.udf_CSVtoIntTable('100, 200, 300, ..., 4900, 5000', ',');
Do anyone have some optimiztion idears of my function or other ways to improve performance in my case?
Is there any drawbacks that I have missed?
I am using MS SQL Server 2005 Std and .NET 2.0 framework.
I'm not sure of the performance increase, but I would use it as an inner join and get away from the inner select statement.
Using a UDF in a WHERE clause or (worse) a subquery is asking for trouble. The optimizer sometimes gets it right, but often gets it wrong and evaluates the function once for every row in your query, which you don't want.
If your parameters are static (they appear to be) and you can issue a multistatement batch, I'd load the results of your UDF into a table variable, then use a join against the table variable to do your filtering. This should work more reliably.
that loop will kill performance!
create a table like this:
CREATE TABLE Numbers
(
Number int not null primary key
)
that has rows containing values 1 to 8000 or so and use this function:
CREATE FUNCTION [dbo].[FN_ListAllToNumberTable]
(
#SplitOn char(1) --REQUIRED, the character to split the #List string on
,#List varchar(8000) --REQUIRED, the list to split apart
)
RETURNS
#ParsedList table
(
RowNumber int
,ListValue varchar(500)
)
AS
BEGIN
/*
DESCRIPTION: Takes the given #List string and splits it apart based on the given #SplitOn character.
A table is returned, one row per split item, with a columns named "RowNumber" and "ListValue".
This function workes for fixed or variable lenght items.
Empty and null items will be included in the results set.
PARAMETERS:
#List varchar(8000) --REQUIRED, the list to split apart
#SplitOn char(1) --OPTIONAL, the character to split the #List string on, defaults to a comma ","
RETURN VALUES:
a table, one row per item in the list, with a column name "ListValue"
TEST WITH:
----------
SELECT * FROM dbo.FN_ListAllToNumTable(',','1,12,123,1234,54321,6,A,*,|||,,,,B')
DECLARE #InputList varchar(200)
SET #InputList='17;184;75;495'
SELECT
'well formed list',LEFT(#InputList,40) AS InputList,h.Name
FROM Employee h
INNER JOIN dbo.FN_ListAllToNumTable(';',#InputList) dt ON h.EmployeeID=dt.ListValue
WHERE dt.ListValue IS NOT NULL
SET #InputList='17;;;184;75;495;;;'
SELECT
'poorly formed list join',LEFT(#InputList,40) AS InputList,h.Name
FROM Employee h
INNER JOIN dbo.FN_ListAllToNumTable(';',#InputList) dt ON h.EmployeeID=dt.ListValue
SELECT
'poorly formed list',LEFT(#InputList,40) AS InputList, ListValue
FROM dbo.FN_ListAllToNumTable(';',#InputList)
**/
/*this will return empty rows, and row numbers*/
INSERT INTO #ParsedList
(RowNumber,ListValue)
SELECT
ROW_NUMBER() OVER(ORDER BY number) AS RowNumber
,LTRIM(RTRIM(SUBSTRING(ListValue, number+1, CHARINDEX(#SplitOn, ListValue, number+1)-number - 1))) AS ListValue
FROM (
SELECT #SplitOn + #List + #SplitOn AS ListValue
) AS InnerQuery
INNER JOIN Numbers n ON n.Number < LEN(InnerQuery.ListValue)
WHERE SUBSTRING(ListValue, number, 1) = #SplitOn
RETURN
END /*Function FN_ListAllToNumTable*/
I have other versions that do not return empty or null rows, ones that return just the item and not the row number, etc. Look in the header comment to see how to use this as part of a JOIN, which is much faster than in a where clause.
The CLR solution did not give me an good performance so I will use a recursive query. So here is the definition of the SP I will use (mostly based on Erland Sommarskogs examples):
CREATE FUNCTION [dbo].[priudf_CSVtoIntTable]
(
#CSV VARCHAR(MAX),
#Delimiter CHAR(1) = ','
)
RETURNS
#Result TABLE
(
[Value] INT
)
AS
BEGIN
-- Remove space, tab, linefeed, carrier return
SET #CSV = REPLACE(#CSV, ' ', '');
SET #CSV = REPLACE(#CSV, CHAR(9), '');
SET #CSV = REPLACE(#CSV, CHAR(10), '');
SET #CSV = REPLACE(#CSV, CHAR(13), '');
WITH csvtbl(start, stop) AS
(
SELECT start = CONVERT(BIGINT, 1),
stop = CHARINDEX(#Delimiter, #CSV + #Delimiter)
UNION ALL
SELECT start = stop + 1,
stop = CHARINDEX(#Delimiter, #CSV + #Delimiter, stop + 1)
FROM csvtbl
WHERE stop > 0
)
INSERT INTO #Result
SELECT CAST(SUBSTRING(#CSV, start, CASE WHEN stop > 0 THEN stop - start ELSE 0 END) AS INT) AS [Value]
FROM csvtbl
WHERE stop > 0
OPTION (MAXRECURSION 1000)
RETURN
END
Thank for the input, I have to admit that I have made som bad research before I started my work. I found that Erland Sommarskog has written a lot of this problem on his webpage, after your responeses and after reading his page I decided that I will try to make a CLR to solve this.
I tried a recursive query, this resulted in good performance but I will try CLR function anyway.

Resources