Finding a word after specific string - sql management studio 2012 - sql-server

I have a text column with fields as per below (small sample, there are many variations):
INSERT INTO #retention1 VALUES ('hello Action=Refer non-action=non-refer')
INSERT INTO #retention1 VALUES ('bye Action=Follow non-action=non-refer')
INSERT INTO #retention1 VALUES ('hello non-action=non-refer Action=compare')
I need to find the word after "Action="
example ANSWER :
Entry 1: Refer
Entry 2: Follow
Entry 3: Compare
If all the words after "Action=" was the same length then I am able to do it. Unfortunately the length is unknown of all variations. The word after action is almost always different not only the 3 variations above.
Any ideas suggestions would be highly appreciated.
This is the code I used for the "Refer" example only which works:
BEGIN
DECLARE #P_string nvarchar (100),
#P_variable nvarchar (100)/*,
#P_return_null nvarchar(100) = 'Y'*/
set #p_string = 'hello Action=Refer non-action=non-refer'
set #p_variable = 'Action'
select substring(#p_string, charindex(upper(#P_variable),upper(#P_string)) +len(#p_variable)+1,5) as trying
END;

Try this:
BEGIN
DECLARE #ret nvarchar (100),#P_string nvarchar (100),
#P_variable nvarchar (100)/*,
#P_return_null nvarchar(100) = 'Y'*/
set #p_string = 'hello Action=Refer non-action=non-refer'
set #p_variable = 'Action'
select #ret=substring(#p_string, charindex(upper(#P_variable),upper(#P_string)) +len(#p_variable)+1,100)
select substring(#ret,0,charindex(' ',lower(#ret),0))
END;

The code you are looking for should first look for the string Action and then for a space character after that word. After that you have all you need to manipulate your source string.
This should work:
DECLARE
#P_string nvarchar (100),
#P_variable nvarchar (100),
#idx1 int,
#idx2 int
SET #p_string = 'hello Action=Refer non-action=non-refer'
SET #p_variable = 'Action'
SELECT
#idx1 = charindex(lower(#P_variable),lower(#P_string)) + len(#p_variable) + 1,
#idx2 = charindex(lower(' '), #P_string, #idx1)
SELECT #idx1, #idx2
SELECT SUBSTRING(
#p_string,
#idx1,
#idx2 - #idx1) as trying
EDIT
After more thoroughly reviewing the requirements, I decided to tailor a rCTE structure that I use for similar purposes. Here it goes.
CREATE TABLE #retention1 (
ID int,
txt nvarchar (100)
)
INSERT INTO #retention1 VALUES (1, 'hello Action=Refer non-action=non-refer')
INSERT INTO #retention1 VALUES (2, 'bye Action=Follow non-action=non-refer')
INSERT INTO #retention1 VALUES (3, 'hello non-action=non-refer Action=compare')
;WITH T AS (
SELECT
ID,
Row = 0,
StartIdx = CAST(0 as int),
EndIdx = CAST(0 as int),
Result = CAST('' as nvarchar(max))
FROM #retention1
UNION ALL
SELECT
r1.ID,
Row + 1,
StartIdx = CAST(newstartidx AS int),
EndIdx = CAST(EndIdx + newendidx as int),
Result = CAST(newtoken as nvarchar(max))
FROM
T
JOIN #retention1 r1
ON r1.ID = T.ID
CROSS APPLY(
SELECT newstartidx = EndIdx + 1
) calc1
CROSS APPLY(
SELECT newtxt = substring(r1.txt, newstartidx, len(r1.txt))
) calc2
CROSS APPLY(
SELECT patidx = charindex(' ', newtxt)
) calc3
CROSS APPLY(
SELECT newendidx = CASE
WHEN patidx = 0 THEN len(newtxt)
ELSE patidx END
) calc4
CROSS APPLY(
SELECT newtoken = substring(r1.txt, newstartidx, newendidx)
) calc5
WHERE newendidx > 0
)
SELECT
ID,
--Result
Name = left(Result, eqIdx - 1),
Value = substring(Result, eqIdx + 1, len(Result) - eqIdx + 1)
FROM
T
OUTER APPLY (
SELECT eqIdx = charindex('=', Result)
) calc6
WHERE
Row != 0
AND eqIdx != 0
ORDER BY ID
Since there are more than one expressions to parse in the table, you would have problems referencing them without an identifier. So, i added ID to your temporary table.
The output from CTE also contains ID which you can use as a reference to #retention1.ID.

String handling galore with a little cheating: Extending #p_string with a space at the begining and the end.
DECLARE #P_string nvarchar (100), #P_variable nvarchar (100)
set #p_variable = ' Action='
set #p_string = ' hello Action=Refer non-action=non-refer '
select substring(substring(#p_string, charindex(upper(#P_variable),upper(#P_string))+1, len(#p_string)-len(#p_variable)),charindex('=',substring(#p_string, charindex(upper(#P_variable),upper(#P_string))+1, len(#p_string)-len(#p_variable)))+1,CHARINDEX(' ', substring(#p_string, charindex(upper(#P_variable),upper(#P_string))+1, len(#p_string)-len(#p_variable)))-LEN(#P_variable))
set #p_string = ' bye Action=Follow non-action=non-refer '
select substring(substring(#p_string, charindex(upper(#P_variable),upper(#P_string))+1, len(#p_string)-len(#p_variable)),charindex('=',substring(#p_string, charindex(upper(#P_variable),upper(#P_string))+1, len(#p_string)-len(#p_variable)))+1,CHARINDEX(' ', substring(#p_string, charindex(upper(#P_variable),upper(#P_string))+1, len(#p_string)-len(#p_variable)))-LEN(#P_variable))
set #p_string = ' hello non-action=non-refer Action=compare '
select substring(substring(#p_string, charindex(upper(#P_variable),upper(#P_string))+1, len(#p_string)-len(#p_variable)),charindex('=',substring(#p_string, charindex(upper(#P_variable),upper(#P_string))+1, len(#p_string)-len(#p_variable)))+1,CHARINDEX(' ', substring(#p_string, charindex(upper(#P_variable),upper(#P_string))+1, len(#p_string)-len(#p_variable)))-LEN(#P_variable))

Related

How to make this IsSimilar(varchar,varchar) function more performant

I need to implement a function to look for similar names. It's for the development of a new system that has migrated the data of a previous system.
One of the features would be that on account creation, it would try and look for person records that are there with a similar name and propose them.
Examples for similar names for "John Johnson" could be:
John Johnson
Jon Jonsen
John & Jane Johnson-Peters
Fam. Johnsen
J. Johnson
To achieve this I've created some SQL functions and functional wise they work:
[dbo].[Levenshtein]: A copy of the top rated answer from this question
[dbo].[SplitString]: Which is to split a name string based on '/', '', '&', '+' and '-'
CREATE FUNCTION [dbo].[SplitString](
#s nvarchar(4000)
)
RETURNS #table TABLE ([value] VARCHAR(4000))
WITH SCHEMABINDING
AS
BEGIN
DECLARE #repl VARCHAR(4000) = REPLACE(REPLACE(REPLACE(REPLACE(#s, '/', '-'),'&', '-'),'+', '-'),'\', '-');
INSERT INTO #table
SELECT TRIM(value) FROM STRING_SPLIT(#repl, '-', NULL)
WHERE RTRIM(value) <> '';
RETURN
END
[dbo].[IsSimilar]: Takes 2 strings, calls the split function and checks if any part of the splits are similar, meaning within Levenshtein distance 5, an initial, or 'Fam'
CREATE FUNCTION [dbo].[IsSimilar](
#s nvarchar(4000)
, #t nvarchar(4000)
)
RETURNS BIT
WITH SCHEMABINDING
AS
BEGIN
DECLARE #sT TABLE (idx int IDENTITY(1,1), [value] VARCHAR(4000))
DECLARE #tT TABLE (idx int IDENTITY(1,1), [value] VARCHAR(4000))
DECLARE #sNrOfWords INT,
#tNrOfWords INT,
#sCount INT = 1,
#tCount INT = 1,
#sVal VARCHAR(4000),
#tVal VARCHAR(4000)
IF (#s = 'fam' OR #s = 'fam.' OR #t = 'fam' OR #t = 'fam.')
return 1
INSERT INTO #sT SELECT [value] FROM [dbo].[SplitString](#s)
INSERT INTO #tT SELECT [value] FROM [dbo].[SplitString](#t)
SET #sNrOfWords = (SELECT COUNT([value]) FROM #sT)
SET #tNrOfWords = (SELECT COUNT([value]) FROM #tT)
IF (#sNrOfWords > 0 AND #tNrOfWords > 0)
BEGIN
WHILE (#sCount <= (SELECT MAX(idx) FROM #sT))
BEGIN
SET #sVal = (SELECT [value] FROM #sT WHERE idx = #sCount)
WHILE (#tCount <= (SELECT MAX(idx) FROM #tT))
BEGIN
SET #tVal = (SELECT [value] FROM #tT WHERE idx = #tCount)
IF (((LEN(#sVal) = 1 OR LEN(#tVal) = 1 OR SUBSTRING(#sVal, 2, 1) IN ('.', ' ') OR SUBSTRING(#tVal, 2, 1) IN ('.', ' ')) AND SUBSTRING(#sVal, 1, 1) = SUBSTRING(#tVal, 1, 1)) OR ((SELECT [dbo].[Levenshtein](#sVal,#tVal, 5)) IS NOT NULL))
RETURN 1
SET #tCount = #tCount + 1
END
SET #sCount = #sCount + 1
SET #tCount = 1
END
END
RETURN 0
END
Sadly this solution isn't the most performant :(
It takes 12-13s to find this record based on my mispelled name, which off course is too long.
The full table is only 512 records at the moment.
Any help on getting this more performant?
I know looping isn't recomended in SQL, so probably something to gain there.
I'm not a DBA or SQL specialist, no idea how to write that differently without the loops.
Didn't think I could use a join as there's no equality.
After implementing the suggestions in the comments on the OP, I managed to cut down the same SELECT statement from 12-13s to about 1s, which is a lot more acceptable.
The SplitString has been changed to an inline function:
Create FUNCTION [dbo].[SplitString](
#s nvarchar(4000)
)
RETURNS TABLE
WITH SCHEMABINDING
AS
RETURN(
SELECT TRIM(value) AS value FROM STRING_SPLIT(REPLACE(REPLACE(REPLACE(REPLACE(#s, '/', '-'),'&', '-'),'+', '-'),'\', '-'), '-', NULL)
WHERE RTRIM(value) <> ''
);
Cutting down on the variables and using a Join statement for the IsSimilar function gives a boost as well:
CREATE FUNCTION [dbo].[IsSimilar](
#s nvarchar(4000)
, #t nvarchar(4000)
)
RETURNS BIT
AS
BEGIN
IF (#s = 'fam' OR #s = 'fam.' OR #t = 'fam' OR #t = 'fam.')
return 1
IF (LEN(TRIM(#s)) > 0 AND LEN(TRIM(#t)) > 0)
BEGIN
RETURN (SELECT IIF (EXISTS(SELECT [sT].[value] FROM (SELECT [value] FROM [dbo].[SplitString](#s)) AS sT INNER JOIN (SELECT [value] FROM [dbo].[SplitString](#t)) AS tT ON (((LEN([sT].[value]) = 1 OR LEN([tT].[value]) = 1 OR SUBSTRING([sT].[value], 2, 1) IN ('.', ' ') OR SUBSTRING([tT].[value], 2, 1) IN ('.', ' ')) AND SUBSTRING([sT].[value], 1, 1) = SUBSTRING([tT].[value], 1, 1)) OR (NOT(SUBSTRING([sT].[value], 2, 1) IN ('.', ' ') OR SUBSTRING([tT].[value], 2, 1) IN ('.', ' ')) AND (SELECT [dbo].[Levenshtein]([sT].[value],[tT].[value], 5)) IS NOT NULL)) ), 1, 0))
END
RETURN 0
END
I don't know how much this boost will hold up to real big data, but in this case that'll not be the case as Person records get linked to Account records with every new account creation and only Person records with AccountID = NULL will be considered.

subquery with table value function not working

DECLARE #temp AS TABLE (id INT, NAME VARCHAR(20) )
DECLARE #str VARCHAR(20) = '1,2'
INSERT INTO #temp (id, NAME)
VALUES (1, ''), (2, ''), (2, '')
SELECT *
FROM #temp a
WHERE id IN ((SELECT String FROM dbo.FN_SplitStrings(#str,',')))
I'm getting the following error while running this
Conversion failed when converting the varchar value '1,2' to data type
int.
Code:
CREATE function [dbo].[FN_SplitStrings]
(
#StringToSplit varchar(8000),
#Separator varchar(128)
)
RETURN TABLE
AS
RETURN
with indices as
(
select
0 S, 1 E
union all
select
E, charindex(#Separator, #StringToSplit, E) + len(#Separator)
from
indices
where E > S
)
select
substring(#StringToSplit,S, case when E > len(#Separator)
then e-s-len(#Separator) else len(#StringToSplit) - s + 1 end) String ,
S StartIndex
from
indices
where
S > 0
Try this. This splitting can be used without a function
DECLARE #temp AS TABLE
(
id INT,
NAME VARCHAR(20)
)
DECLARE #str VARCHAR(20)='1,2'
INSERT INTO #temp
( id, NAME )
VALUES ( 1, '' ),
( 2, ''),
( 2, '')
SELECT * FROM #temp a
WHERE id IN
(
SELECT LTRIM(RTRIM(Split.a.value('.', 'VARCHAR(100)'))) 'KeyWords'
FROM
(
-- To change ',' to any other delimeter, just change ',' before '</M><M>' to your desired one
SELECT CAST ('<M>' + REPLACE(#str, ',', '</M><M>') + '</M>' AS XML) AS Data
) AS A
CROSS APPLY Data.nodes ('/M') AS Split(a)
)
Click here to view the result
EDIT:
You had some problems in function for splitting.
Function
ALTER FUNCTION dbo.FN_SplitStrings(#StringToSplit varchar(8000),#Separator char(1))
RETURNS table
AS
RETURN (
WITH splitter_cte AS (
SELECT CHARINDEX(#Separator, #StringToSplit) as pos, 0 as lastPos
UNION ALL
SELECT CHARINDEX(#Separator, #StringToSplit, pos + 1), pos
FROM splitter_cte
WHERE pos > 0
)
SELECT SUBSTRING(#StringToSplit, lastPos + 1,
case when pos = 0 then 80000
else pos - lastPos -1 end) as String
FROM splitter_cte
)
Query
DECLARE #temp AS TABLE (id INT, NAME VARCHAR(20) )
DECLARE #str VARCHAR(20) = '1,2'
INSERT INTO #temp (id, NAME)
VALUES (1, ''), (2, ''), (2, '')
SELECT *
FROM #temp a
WHERE id IN ((SELECT String FROM dbo.FN_SplitStrings(#str,',')))
Click here to view result
The reason is a mix in data types and the fact that your function is an inline table valued functions which means that it is embedded into the query before query optimization takes place.
If you remove where S > 0 from your function and execute it with 1,2, the result of the function is:
String
------
1,2
1
2
Notice the first row where the value is 1,2.
When the optimizer does its job with your query the comparison against the column id is done before the where clause of the function is evaluated. In that comparison you have an implicit cast to int and 1,2 can not be casted to an int.
To fix this you can make sure that the String column of your split function is always an int (and perhaps changing the name of the column in the process).
select
cast(substring(#StringToSplit,S, case when E > len(#Separator)
then e-s-len(#Separator)
else len(#StringToSplit) - s + 1
end) as int) String ,

Remove leading zero in a temp variable

I have declalared a temp variable in sql. it contains a string A001, A011, A002, A14, A158.
i want to remove zeroes to get following output
A1, A2, A11, A14, A158
You can achieve what you're after with some string manipulation.
So if you take the first character from A010:
DECLARE #SomeValue nvarchar(10) = 'A010'
SELECT LEFT(#SomeValue, 1) -- gives you 'A'
Then get the numeric part, by removing the first character and convert it to an integer:
SELECT CONVERT(INT, SUBSTRING(#SomeValue, 2, LEN(#SomeValue) - 1)) -- returns 10
This will remove the leading zeroes for you. You then just need to convert it back to a string and append it on to the first character.
Full working solution:
DECLARE #T1 TABLE ( SomeValue NVARCHAR(10) )
INSERT INTO #T1
( SomeValue )
VALUES ( 'A001' ),
( 'A011' ),
( 'A002' ),
( 'A14' ),
( 'A158' ),
( 'A010' )
SELECT SomeValue AS Original ,
LEFT(SomeValue, 1)
+ CONVERT(NVARCHAR(10),
CONVERT(INT, SUBSTRING(SomeValue, 2, LEN(SomeValue)-1))) RemoveLeadingZeroes
FROM #T1
Use this: (I used XML to replace extract each word and then replaced the 0's by casting 0010 to 10 and combined the values A + 10 to get A10 and again combined the each word to form A1, A2, A14..)
DECLARE #string VARCHAR(MAX),
#Split CHAR(1),
#X xml
SELECT #string = 'A001, A011, A002, A14, A158',
#Split = ', '
SELECT #X = CONVERT(xml,'<root><s>' + REPLACE(#string,#Split,'</s><s>') + '</s></root>')
SELECT
STUFF(((SELECT ', ' + CAST(Result AS VARCHAR(100)) FROM
(SELECT SUBSTRING(LTRIM(Result), 1, 1) +
CAST(CAST(SUBSTRING(LTRIM(Result), 2, LEN(LTRIM(Result))) AS INT) AS VARCHAR) AS Result
FROM
(SELECT T.c.value('.','varchar(max)') AS Result
FROM #X.nodes('/root/s') T(c)) AS NewResult)
AS NewResult1
FOR xml path(''), type).value('.','varchar(max)')), 1,2,'')
AS FinalResult
SqlFiddle: Demo

SQL Server - find nth occurrence in a string

I have a table column that contains values such as abc_1_2_3_4.gif or zzz_12_3_3_45.gif etc.
I want to find the index of each underscore _ in the above values. There will only ever be four underscores but given that they can be in any position in the string, how can I achieve this?
I've tried the substring and charindex function, but I can only reliably get hold of the first one. Any ideas?
You can use the same function inside for the position +1
charindex('_', [TEXT], (charindex('_', [TEXT], 1))+1)
in where +1 is the nth time you will want to find.
One way (2k8);
select 'abc_1_2_3_4.gif  ' as img into #T
insert #T values ('zzz_12_3_3_45.gif')
;with T as (
select 0 as row, charindex('_', img) pos, img from #T
union all
select pos + 1, charindex('_', img, pos + 1), img
from T
where pos > 0
)
select
img, pos
from T
where pos > 0
order by img, pos
>>>>
img pos
abc_1_2_3_4.gif 4
abc_1_2_3_4.gif 6
abc_1_2_3_4.gif 8
abc_1_2_3_4.gif 10
zzz_12_3_3_45.gif 4
zzz_12_3_3_45.gif 7
zzz_12_3_3_45.gif 9
zzz_12_3_3_45.gif 11
Update
;with T(img, starts, pos) as (
select img, 1, charindex('_', img) from #t
union all
select img, pos + 1, charindex('_', img, pos + 1)
from t
where pos > 0
)
select
*, substring(img, starts, case when pos > 0 then pos - starts else len(img) end) token
from T
order by img, starts
>>>
img starts pos token
abc_1_2_3_4.gif   1 4 abc
abc_1_2_3_4.gif   5 6 1
abc_1_2_3_4.gif   7 8 2
abc_1_2_3_4.gif   9 10 3
abc_1_2_3_4.gif   11 0 4.gif  
zzz_12_3_3_45.gif 1 4 zzz
zzz_12_3_3_45.gif 5 7 12
zzz_12_3_3_45.gif 8 9 3
zzz_12_3_3_45.gif 10 11 3
zzz_12_3_3_45.gif 12 0 45.gif
You can use the CHARINDEX and specify the starting location:
DECLARE #x VARCHAR(32) = 'MS-SQL-Server';
SELECT
STUFF(STUFF(#x,3 , 0, '/'), 8, 0, '/') InsertString
,CHARINDEX('-',LTRIM(RTRIM(#x))) FirstIndexOf
,CHARINDEX('-',LTRIM(RTRIM(#x)), (CHARINDEX('-', LTRIM(RTRIM(#x)) )+1)) SecondIndexOf
,CHARINDEX('-',#x,CHARINDEX('-',#x, (CHARINDEX('-',#x)+1))+1) ThirdIndexOf
,CHARINDEX('-',REVERSE(LTRIM(RTRIM(#x)))) LastIndexOf;
GO
You can use the following function to split the values by a delimiter. It'll return a table and to find the nth occurrence just make a select on it! Or change it a little for it to return what you need instead of the table.
CREATE FUNCTION dbo.Split
(
#RowData nvarchar(2000),
#SplitOn nvarchar(5)
)
RETURNS #RtnValue table
(
Id int identity(1,1),
Data nvarchar(100)
)
AS
BEGIN
Declare #Cnt int
Set #Cnt = 1
While (Charindex(#SplitOn,#RowData)>0)
Begin
Insert Into #RtnValue (data)
Select
Data = ltrim(rtrim(Substring(#RowData,1,Charindex(#SplitOn,#RowData)-1)))
Set #RowData = Substring(#RowData,Charindex(#SplitOn,#RowData)+1,len(#RowData))
Set #Cnt = #Cnt + 1
End
Insert Into #RtnValue (data)
Select Data = ltrim(rtrim(#RowData))
Return
END
DECLARE #str AS VARCHAR(100)
SET #str='1,2 , 3, 4, 5,6'
SELECT COALESCE(LTRIM(CAST(('<X>'+REPLACE(#str,',' ,'</X><X>')+'</X>') AS XML).value('(/X)[1]', 'varchar(128)')), ''),
COALESCE(LTRIM(CAST(('<X>'+REPLACE(#str,',' ,'</X><X>')+'</X>') AS XML).value('(/X)[2]', 'varchar(128)')), ''),
COALESCE(LTRIM(CAST(('<X>'+REPLACE(#str,',' ,'</X><X>')+'</X>') AS XML).value('(/X)[3]', 'varchar(128)')), ''),
COALESCE(LTRIM(CAST(('<X>'+REPLACE(#str,',' ,'</X><X>')+'</X>') AS XML).value('(/X)[4]', 'varchar(128)')), ''),
COALESCE(LTRIM(CAST(('<X>'+REPLACE(#str,',' ,'</X><X>')+'</X>') AS XML).value('(/X)[5]', 'varchar(128)')), ''),
COALESCE(LTRIM(CAST(('<X>'+REPLACE(#str,',' ,'</X><X>')+'</X>') AS XML).value('(/X)[6]', 'varchar(128)')), ''),
COALESCE(LTRIM(CAST(('<X>'+REPLACE(#str,',' ,'</X><X>')+'</X>') AS XML).value('(/X)[7]', 'varchar(128)')), ''),
COALESCE(LTRIM(CAST(('<X>'+REPLACE(#str,',' ,'</X><X>')+'</X>') AS XML).value('(/X)[8]', 'varchar(128)')), ''),
COALESCE(LTRIM(CAST(('<X>'+REPLACE(#str,',' ,'</X><X>')+'</X>') AS XML).value('(/X)[9]', 'varchar(128)')), '')
You can look for the four underscore in this way:
create table #test
( t varchar(50) );
insert into #test values
( 'abc_1_2_3_4.gif'),
('zzz_12_3_3_45.gif');
declare #t varchar(50);
declare #t_aux varchar(50);
declare #t1 int;
declare #t2 int;
declare #t3 int;
declare #t4 int;
DECLARE t_cursor CURSOR
FOR SELECT t FROM #test
OPEN t_cursor
FETCH NEXT FROM t_cursor into #t;​
set #t1 = charindex( '_', #t )
set #t2 = charindex( '_', #t , #t1+1)
set #t3 = charindex( '_', #t , #t2+1)
set #t4 = charindex( '_', #t , #t3+1)
select #t1, #t2, t3, t4
--do a loop to iterate over all table
you can test it here.
Or in this simple way:
select
charindex( '_', t ) as first,
charindex( '_', t, charindex( '_', t ) + 1 ) as second,
...
from
#test
You can try peeling the variable/array, assuming distinctness in your list
declare #array table ----table of values
(
id int identity(1,1)
,value nvarchar(max)
)
DECLARE #VALUE NVARCHAR(MAX)='val1_val2_val3_val4_val5_val6_val7'----string array
DECLARE #CURVAL NVARCHAR(MAX) ---current value
DECLARE #DELIM NVARCHAR(1)='_' ---delimiter
DECLARE #BREAKPT INT ---current index of the delimiter
WHILE EXISTS (SELECT #VALUE)
BEGIN
SET #BREAKPT=CHARINDEX(#DELIM,#VALUE) ---set the current index
---
If #BREAKPT<> 0 ---index at 0 breaks the loop
begin
SET #CURVAL=SUBSTRING(#VALUE,1,#BREAKPT-1) ---current value
set #VALUE=REPLACE(#VALUE,SUBSTRING(#VALUE,1,#BREAKPT),'') ---current value and delimiter, replace
insert into #array(value) ---insert data
select #CURVAL
end
else
begin
SET #CURVAL=#VALUE ---current value now last value
insert into #array(value) ---insert data
select #CURVAL
break ---break loop
end
end
select * from #array ---find nth occurance given the id
DECLARE #LEN INT
DECLARE #VAR VARCHAR(20)
SET #VAR = 'HELLO WORLD'
SET #LEN = LEN(#VAR)
--SELECT #LEN
SELECT PATINDEX('%O%',SUBSTRING(#VAR,PATINDEX('%O%' ,#VAR) + 1 ,PATINDEX('%O%',#VAR) + 1)) + PATINDEX('%O%',#VAR)
My SQL supports the function of a substring_Index where it will return the postion of a value in a string for the n occurance. A similar User defined function could be written to achieve this. Example in the link
Alternatively you could use charindex function call it x times to report the location of each _ given a starting postion +1 of the previously found instance. until a 0 is found
Edit: NM Charindex is the correct function
A simple sample to do this with xml conversion:
SELECT 'A|B|C'
, concat('<x>', REPLACE('A|B|C', '|', '</x><x>'), '</x>')
, cast(concat('<x>', REPLACE('A|B|C', '|', '</x><x>'), '</x>') as xml).query('/x[2]')
, cast(concat('<x>', REPLACE('A|B|C', '|', '</x><x>'), '</x>') as xml).value('/x[2]',
'varchar');
And here a translation for your sample:
SELECT gifname
,cast(concat('<x>', REPLACE(gifname, '_', '</x><x>'), '</x>') as xml).query('/x[2]') as xmlelement
, cast(concat('<x>', REPLACE(gifname, '_', '</x><x>'), '</x>') as xml).value('/x[2]', 'varchar(10)') as result
FROM (
SELECT 'abc_1_2_3_4.gif' as gifname
UNION ALL
SELECT 'zzz_12_3_3_45.gif'
) tmp
I did this creating several separate custom functions, one for each position of the searched character i.e. 2nd, 3rd:
CREATE FUNCTION [dbo].[fnCHARPOS2]
(#SEARCHCHAR VARCHAR(255),
#SEARCHSTRING VARCHAR(255))
RETURNS INT
AS
BEGIN
RETURN CHARINDEX(#SEARCHCHAR,#SEARCHSTRING(CHARINDEX(#SEARCHCHAR,#SEARCHSTRING,0)+1));
CREATE FUNCTION [dbo].[fnCHARPOS3]
(#SEARCHCHAR VARCHAR(255),
#SEARCHSTRING VARCHAR(255))
RETURNS INT
AS
BEGIN
RETURN CHARINDEX(#SEARCHCHAR,#SEARCHSTRING, (CHARINDEX(#SEARCHCHAR,#SEARCHSTRING, (CHARINDEX(#SEARCHCHAR,#SEARCHSTRING,0)+1)))+1);
You can then pass in as a parameter the character you are searching for and the string you are searching in:
So if you were searching for 'f' and wanted to know position of 1st 3 occurences:
select
database.dbo.fnCHARPOS2('f',tablename.columnname),
database.dbo.fnCHARPOS3('f',tablename.columnname)
from tablename
It worked for me!
I decided to use a recursive function because for me it was easier to follow the logic. Note that SQL Server has a default function recursion limit of 32, so this is only good for smaller workloads.
create function dbo._charindex_nth (
#FindThis varchar(8000),
#InThis varchar(max),
#StartFrom int,
#NthOccurence tinyint
)
returns bigint
as
begin
/*
Recursive helper used by dbo.charindex_nth to return the position of the nth occurance of #FindThis in #InThis
Who When What
PJR 160421 Initial
*/
declare #Pos bigint
if isnull(#NthOccurence, 0) <= 0 or isnull(#StartFrom, 0) <= 0
begin
select #Pos = 0
end else begin
if #NthOccurence = 1
begin
select #Pos = charindex(#FindThis, #InThis, #StartFrom)
end else begin
select #Pos = dbo._charindex_nth(#FindThis, #InThis, nullif(charindex(#FindThis, #InThis, #StartFrom), 0) + 1, #NthOccurence - 1)
end
end
return #Pos
end
create function dbo.charindex_nth (
#FindThis varchar(8000),
#InThis varchar(max),
#NthOccurence tinyint
)
returns bigint
as
begin
/*
Returns the position of the nth occurance of #FindThis in #InThis
Who When What
PJR 160421 Initial
*/
return dbo._charindex_nth(#FindThis, #InThis, 1, #NthOccurence)
end
declare #val varchar(max) = 'zzz_12_3_3_45.gif'
select dbo.charindex_nth('_', #val, 1) Underscore1
, dbo.charindex_nth('_', #val, 2) Underscore2
, dbo.charindex_nth('_', #val, 3) Underscore3
, dbo.charindex_nth('_', #val, 4) Underscore4
DECLARE #T AS TABLE(pic_name VARCHAR(100));
INSERT INTO #T VALUES ('abc_1_2_3_4.gif'),('zzz_12_3_3_45.gif');
SELECT A.pic_name, P1.D, P2.D, P3.D, P4.D
FROM #T A
CROSS APPLY (SELECT NULLIF(CHARINDEX('_', A.pic_name),0) AS D) P1
CROSS APPLY (SELECT NULLIF(CHARINDEX('_', A.pic_name, P1.D+1), 0) AS D) P2
CROSS APPLY (SELECT NULLIF(CHARINDEX('_', A.pic_name, P2.D+1),0) AS D) P3
CROSS APPLY (SELECT NULLIF(CHARINDEX('_', A.pic_name, P3.D+1),0) AS D) P4
I've used a function to grab the "nth" element from a delimited string field with great success. Like mentioned above, it's not a "fast" way of dealing with things but it sure as heck is convenient.
create function GetArrayIndex(#delimited nvarchar(max), #index int, #delimiter nvarchar(100) = ',') returns nvarchar(max)
as
begin
declare #xml xml, #result nvarchar(max)
set #xml = N'<root><r>' + replace(#delimited, #delimiter,'</r><r>') + '</r></root>'
select #result = r.value('.','varchar(max)')
from #xml.nodes('//root/r[sql:variable("#index")]') as records(r)
return #result
end
I was toying with a faster way to do this than simply iterating through the string.
CREATE FUNCTION [ssf_GetNthSeparatorPosition] ( #TargetString VARCHAR(MAX)
, #Sep VARCHAR(25)
, #n INTEGER )
RETURNS INTEGER
/****************************************************************************************
--#############################################################################
-- Returns the position of the Nth Charactor sequence
-- 1234567890123456789
-- Declare #thatString varchar(max) = 'hi,there,jay,yo'
Select dbo.ssf_GetNthSeparatorPosition(#thatString, ',', 3) --would return 13
--############################################################################
****************************************************************************************/
AS
BEGIN
DECLARE #Retval INTEGER = 0
DECLARE #CurPos INTEGER = 0
DECLARE #LenSep INTEGER = LEN(#Sep)
SELECT #CurPos = CHARINDEX(#Sep, #TargetString)
IF ISNULL(#LenSep, 0) > 0
AND #CurPos > 0
BEGIN
SELECT #CurPos = 0
;with lv0 AS (SELECT 0 g UNION ALL SELECT 0)
,lv1 AS (SELECT 0 g FROM lv0 a CROSS JOIN lv0 b) -- 4
,lv2 AS (SELECT 0 g FROM lv1 a CROSS JOIN lv1 b) -- 16
,lv3 AS (SELECT 0 g FROM lv2 a CROSS JOIN lv2 b) -- 256
,lv4 AS (SELECT 0 g FROM lv3 a CROSS JOIN lv3 b) -- 65,536
,lv5 AS (SELECT 0 g FROM lv4 a CROSS JOIN lv4 b) -- 4,294,967,296
,Tally (n) AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM lv5),
results
AS ( SELECT n - LEN(#Sep) AS Nth
, row_number() OVER ( ORDER BY n ) - 1 AS Position
FROM Tally t
WHERE n BETWEEN 1
AND DATALENGTH(#TargetString) + DATALENGTH(#Sep)
AND SUBSTRING(#Sep + #TargetString, n, LEN(#Sep)) = #Sep)
SELECT #CurPos = Nth
FROM results
WHERE results.Position = #n
END
RETURN #CurPos
END
GO
declare #a nvarchar(50)='Enter Your string '
declare #character char='e'
declare #nthoccurence int = 2
declare #i int = 1
declare #j int =0
declare #count int = len(#a)-len(replace(#a,#character,''))
if(#count >= #nthoccurence)
begin
while (#I <= #nthoccurence)
begin
set #j= CHARINDEX(#character,#a,#j+1)
set #i= #i+1
end
print #j
end
else
Print 'you have only '+convert(nvarchar ,#count)+' occurrences of '+#character
end
DECLARE #x VARCHAR(32) = 'MS-SQL-Server';
SELECT
SUBSTRING(#x,0,CHARINDEX('-',LTRIM(RTRIM(#x)))) A,
SUBSTRING(#x,CHARINDEX('-',LTRIM(RTRIM(#x)))+1,CHARINDEX('-'
,LTRIM(RTRIM(#x)))) B,
SUBSTRING(#x,CHARINDEX('-',REVERSE(LTRIM(RTRIM(#x))))+1,LEN(#x)-1) C
A B C
MS SQL Server
Inspired by Alex K's reply One way (2k8), I have created a script for a Token Function for the SQL Server for returning a specific token from a string.
I needed this for refacturing a SSIS-package to T-SQL without having to implement Alex' solution a number of times manually.
My function has one disadvantage: It returns the token value as a table (one column, one row) instead of as a varchar value. If anyone has a solution for this, please let me know.
DROP FUNCTION [RDW].[token]
GO
create function [RDW].[token] (#string varchar(8000), #split varchar(50), #returnIndex int)
returns table
as
return with T(img, starts, pos, [index]) as (
select #string, 1, charindex(#split, #string), 0
union all
select #string, pos + 1, charindex(#split, #string, pos + 1), [index]+1
from t
where pos > 0
)
select substring(img, starts, case when pos > 0 then pos - starts else len(img) end) token
from T
where [index] = #returnIndex
GO
Try this
CREATE FUNCTION [dbo].[CHARINDEX2] (
#expressionToFind VARCHAR(MAX),
#expressionToSearch VARCHAR(MAX),
#occurrenceIndex INT,
#startLocation INT = 0
)
RETURNS INT
AS BEGIN
IF #occurrenceIndex < 1 BEGIN
RETURN CAST('The argument #occurrenceIndex must be a positive integer.' AS INT)
END
IF #startLocation < 0 BEGIN
RETURN CAST('The argument #startLocation must be a non negative integer.' AS INT)
END
DECLARE #returnIndex INT
SET #returnIndex = CHARINDEX(#expressionToFind, #expressionToSearch, #startLocation)
IF (#occurrenceIndex = 1) BEGIN
RETURN #returnIndex
END
DECLARE #target_length INT
SET #target_length = LEN(#expressionToFind)
SET #occurrenceIndex += -1
WHILE (#occurrenceIndex > 0 AND #returnIndex > 0) BEGIN
SET #returnIndex = CHARINDEX(#expressionToFind, #expressionToSearch, #returnIndex + #target_length);
SET #occurrenceIndex += -1
END
RETURN #returnIndex
END
GO
I use this function:
CREATE FUNCTION [pub].[SplitString]
( #StringToSplit nvarchar(MAX),
#Delimiter as nvarchar(10)
)
-- Example of use:
-- select * from [pub].[SplitString] ('one;two;three;;for & six;', ';')
RETURNS
#returnList TABLE ([Item] [nvarchar] (4000), [ID] [int] IDENTITY(1,1))
AS
BEGIN
DECLARE #xml as xml
SET #xml =
cast
( ('<X>' + replace(replace(#StringToSplit, #Delimiter ,'</X><X>'),'&','&') + '</X>'
) as xml
)
INSERT INTO #returnList([Item])
SELECT
N.value('.', 'nvarchar(max)') as [Item]
FROM
#xml.nodes('X') as T(N)
RETURN
END
This does not directly answer the question, but as I have found this while looking for a solution for extracting multiple values from a single string, with a specific delimiter, I post it anyway in case it could help someone.
SQL Server ships with the function PARSENAME that can extract up to 4 elements in a dot separated string, from the right :
SELECT PARSENAME('1234.5437.43278.532', 2) as third_element
--43278
You could use this function to retrieve the 2nd to the 5th element of the file name, by selecting from the second element to the end of the filename, without the extension.
Note that the code to remove the file extension (the 4th position from the right) is hardcoded here, so it's better to change it if you have some mix with .jpg and .jpeg file extensions for instance.
DECLARE #t TABLE (
c_filename VARCHAR(1000)
) INSERT #t
values
('abc_1_2_3_4.gif'),
('abcdefgh_1231_78432_398389_12144.png')
SELECT
LEFT(
c_filename,
CHARINDEX('_', c_filename) -1
) as first_element,
PARSENAME(
REPLACE(
/* PARSENAME only works with 4 elements */
/* we remove :
- the 1st element
- and the file extension */
SUBSTRING(
c_filename,
CHARINDEX('_', c_filename) + 1,
LEN(c_filename) - CHARINDEX('_', c_filename) -4
),
'_',
'.'
),
4 -- change this to get the nth element
) as second_element,
PARSENAME(
REPLACE(
SUBSTRING(
c_filename,
CHARINDEX('_', c_filename) + 1,
LEN(c_filename) - CHARINDEX('_', c_filename) -4
),
'_',
'.'
),
3
) as third_element,
PARSENAME(
REPLACE(
SUBSTRING(
c_filename,
CHARINDEX('_', c_filename) + 1,
LEN(c_filename) - CHARINDEX('_', c_filename) -4
),
'_',
'.'
),
2
) as fourth_element,
PARSENAME(
REPLACE(
SUBSTRING(
c_filename,
CHARINDEX('_', c_filename) + 1,
LEN(c_filename) - CHARINDEX('_', c_filename) -4
),
'_',
'.'
),
1
) as fifth_element
FROM
#t
+---------------+----------------+---------------+----------------+---------------+
| first_element | second_element | third_element | fourth_element | fifth_element |
+---------------+----------------+---------------+----------------+---------------+
| abc | 1 | 2 | 3 | 4 |
| abcdefghijkl | 12qwerty31 | 78891432 | 398977389 | 1212345344 |
+---------------+----------------+---------------+----------------+---------------+

Find non-ASCII characters in varchar columns using SQL Server

How can rows with non-ASCII characters be returned using SQL Server?
If you can show how to do it for one column would be great.
I am doing something like this now, but it is not working
select *
from Staging.APARMRE1 as ar
where ar.Line like '%[^!-~ ]%'
For extra credit, if it can span all varchar columns in a table, that would be outstanding! In this solution, it would be nice to return three columns:
The identity field for that record. (This will allow the whole record to be reviewed with another query.)
The column name
The text with the invalid character
Id | FieldName | InvalidText |
----+-----------+-------------------+
25 | LastName | Solís |
56 | FirstName | François |
100 | Address1 | 123 Ümlaut street |
Invalid characters would be any outside the range of SPACE (3210) through ~ (12710)
Here is a solution for the single column search using PATINDEX.
It also displays the StartPosition, InvalidCharacter and ASCII code.
select line,
patindex('%[^ !-~]%' COLLATE Latin1_General_BIN,Line) as [Position],
substring(line,patindex('%[^ !-~]%' COLLATE Latin1_General_BIN,Line),1) as [InvalidCharacter],
ascii(substring(line,patindex('%[^ !-~]%' COLLATE Latin1_General_BIN,Line),1)) as [ASCIICode]
from staging.APARMRE1
where patindex('%[^ !-~]%' COLLATE Latin1_General_BIN,Line) >0
I've been running this bit of code with success
declare #UnicodeData table (
data nvarchar(500)
)
insert into
#UnicodeData
values
(N'Horse�')
,(N'Dog')
,(N'Cat')
select
data
from
#UnicodeData
where
data collate LATIN1_GENERAL_BIN != cast(data as varchar(max))
Which works well for known columns.
For extra credit, I wrote this quick script to search all nvarchar columns in a given table for Unicode characters.
declare
#sql varchar(max) = ''
,#table sysname = 'mytable' -- enter your table here
;with ColumnData as (
select
RowId = row_number() over (order by c.COLUMN_NAME)
,c.COLUMN_NAME
,ColumnName = '[' + c.COLUMN_NAME + ']'
,TableName = '[' + c.TABLE_SCHEMA + '].[' + c.TABLE_NAME + ']'
from
INFORMATION_SCHEMA.COLUMNS c
where
c.DATA_TYPE = 'nvarchar'
and c.TABLE_NAME = #table
)
select
#sql = #sql + 'select FieldName = ''' + c.ColumnName + ''', InvalidCharacter = [' + c.COLUMN_NAME + '] from ' + c.TableName + ' where ' + c.ColumnName + ' collate LATIN1_GENERAL_BIN != cast(' + c.ColumnName + ' as varchar(max)) ' + case when c.RowId <> (select max(RowId) from ColumnData) then ' union all ' else '' end + char(13)
from
ColumnData c
-- check
-- print #sql
exec (#sql)
I'm not a fan of dynamic SQL but it does have its uses for exploratory queries like this.
try something like this:
DECLARE #YourTable table (PK int, col1 varchar(20), col2 varchar(20), col3 varchar(20));
INSERT #YourTable VALUES (1, 'ok','ok','ok');
INSERT #YourTable VALUES (2, 'BA'+char(182)+'D','ok','ok');
INSERT #YourTable VALUES (3, 'ok',char(182)+'BAD','ok');
INSERT #YourTable VALUES (4, 'ok','ok','B'+char(182)+'AD');
INSERT #YourTable VALUES (5, char(182)+'BAD','ok',char(182)+'BAD');
INSERT #YourTable VALUES (6, 'BAD'+char(182),'B'+char(182)+'AD','BAD'+char(182)+char(182)+char(182));
--if you have a Numbers table use that, other wise make one using a CTE
WITH AllNumbers AS
( SELECT 1 AS Number
UNION ALL
SELECT Number+1
FROM AllNumbers
WHERE Number<1000
)
SELECT
pk, 'Col1' BadValueColumn, CONVERT(varchar(20),col1) AS BadValue --make the XYZ in convert(varchar(XYZ), ...) the largest value of col1, col2, col3
FROM #YourTable y
INNER JOIN AllNumbers n ON n.Number <= LEN(y.col1)
WHERE ASCII(SUBSTRING(y.col1, n.Number, 1))<32 OR ASCII(SUBSTRING(y.col1, n.Number, 1))>127
UNION
SELECT
pk, 'Col2' BadValueColumn, CONVERT(varchar(20),col2) AS BadValue --make the XYZ in convert(varchar(XYZ), ...) the largest value of col1, col2, col3
FROM #YourTable y
INNER JOIN AllNumbers n ON n.Number <= LEN(y.col2)
WHERE ASCII(SUBSTRING(y.col2, n.Number, 1))<32 OR ASCII(SUBSTRING(y.col2, n.Number, 1))>127
UNION
SELECT
pk, 'Col3' BadValueColumn, CONVERT(varchar(20),col3) AS BadValue --make the XYZ in convert(varchar(XYZ), ...) the largest value of col1, col2, col3
FROM #YourTable y
INNER JOIN AllNumbers n ON n.Number <= LEN(y.col3)
WHERE ASCII(SUBSTRING(y.col3, n.Number, 1))<32 OR ASCII(SUBSTRING(y.col3, n.Number, 1))>127
order by 1
OPTION (MAXRECURSION 1000);
OUTPUT:
pk BadValueColumn BadValue
----------- -------------- --------------------
2 Col1 BA¶D
3 Col2 ¶BAD
4 Col3 B¶AD
5 Col1 ¶BAD
5 Col3 ¶BAD
6 Col1 BAD¶
6 Col2 B¶AD
6 Col3 BAD¶¶¶
(8 row(s) affected)
This script searches for non-ascii characters in one column. It generates a string of all valid characters, here code point 32 to 127. Then it searches for rows that don't match the list:
declare #str varchar(128);
declare #i int;
set #str = '';
set #i = 32;
while #i <= 127
begin
set #str = #str + '|' + char(#i);
set #i = #i + 1;
end;
select col1
from YourTable
where col1 like '%[^' + #str + ']%' escape '|';
running the various solutions on some real world data - 12M rows varchar length ~30, around 9k dodgy rows, no full text index in play, the patIndex solution is the fastest, and it also selects the most rows.
(pre-ran km. to set the cache to a known state, ran the 3 processes, and finally ran km again - the last 2 runs of km gave times within 2 seconds)
patindex solution by Gerhard Weiss -- Runtime 0:38, returns 9144 rows
select dodgyColumn from myTable fcc
WHERE patindex('%[^ !-~]%' COLLATE Latin1_General_BIN,dodgyColumn ) >0
the substring-numbers solution by MT. -- Runtime 1:16, returned 8996 rows
select dodgyColumn from myTable fcc
INNER JOIN dbo.Numbers32k dn ON dn.number<(len(fcc.dodgyColumn ))
WHERE ASCII(SUBSTRING(fcc.dodgyColumn , dn.Number, 1))<32
OR ASCII(SUBSTRING(fcc.dodgyColumn , dn.Number, 1))>127
udf solution by Deon Robertson -- Runtime 3:47, returns 7316 rows
select dodgyColumn
from myTable
where dbo.udf_test_ContainsNonASCIIChars(dodgyColumn , 1) = 1
There is a user defined function available on the web 'Parse Alphanumeric'. Google UDF parse alphanumeric and you should find the code for it. This user defined function removes all characters that doesn't fit between 0-9, a-z, and A-Z.
Select * from Staging.APARMRE1 ar
where udf_parsealpha(ar.last_name) <> ar.last_name
That should bring back any records that have a last_name with invalid chars for you...though your bonus points question is a bit more of a challenge, but I think a case statement could handle it. This is a bit psuedo code, I'm not entirely sure if it'd work.
Select id, case when udf_parsealpha(ar.last_name) <> ar.last_name then 'last name'
when udf_parsealpha(ar.first_name) <> ar.first_name then 'first name'
when udf_parsealpha(ar.Address1) <> ar.last_name then 'Address1'
end,
case when udf_parsealpha(ar.last_name) <> ar.last_name then ar.last_name
when udf_parsealpha(ar.first_name) <> ar.first_name then ar.first_name
when udf_parsealpha(ar.Address1) <> ar.last_name then ar.Address1
end
from Staging.APARMRE1 ar
where udf_parsealpha(ar.last_name) <> ar.last_name or
udf_parsealpha(ar.first_name) <> ar.first_name or
udf_parsealpha(ar.Address1) <> ar.last_name
I wrote this in the forum post box...so I'm not quite sure if that'll function as is, but it should be close. I'm not quite sure how it will behave if a single record has two fields with invalid chars either.
As an alternative, you should be able to change the from clause away from a single table and into a subquery that looks something like:
select id,fieldname,value from (
Select id,'last_name' as 'fieldname', last_name as 'value'
from Staging.APARMRE1 ar
Union
Select id,'first_name' as 'fieldname', first_name as 'value'
from Staging.APARMRE1 ar
---(and repeat unions for each field)
)
where udf_parsealpha(value) <> value
Benefit here is for every column you'll only need to extend the union statement here, while you need to put that comparisson three times for every column in the case statement version of this script
To find which field has invalid characters:
SELECT * FROM Staging.APARMRE1 FOR XML AUTO, TYPE
You can test it with this query:
SELECT top 1 'char 31: '+char(31)+' (hex 0x1F)' field
from sysobjects
FOR XML AUTO, TYPE
The result will be:
Msg 6841, Level 16, State 1, Line 3 FOR XML could not serialize the
data for node 'field' because it contains a character (0x001F) which
is not allowed in XML. To retrieve this data using FOR XML, convert it
to binary, varbinary or image data type and use the BINARY BASE64
directive.
It is very useful when you write xml files and get error of invalid characters when validate it.
Here is a UDF I built to detectc columns with extended ascii charaters. It is quick and you can extended the character set you want to check. The second parameter allows you to switch between checking anything outside the standard character set or allowing an extended set:
create function [dbo].[udf_ContainsNonASCIIChars]
(
#string nvarchar(4000),
#checkExtendedCharset bit
)
returns bit
as
begin
declare #pos int = 0;
declare #char varchar(1);
declare #return bit = 0;
while #pos < len(#string)
begin
select #char = substring(#string, #pos, 1)
if ascii(#char) < 32 or ascii(#char) > 126
begin
if #checkExtendedCharset = 1
begin
if ascii(#char) not in (9,124,130,138,142,146,150,154,158,160,170,176,180,181,183,184,185,186,192,193,194,195,196,197,199,200,201,202,203,204,205,206,207,209,210,211,212,213,214,216,217,218,219,220,221,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,248,249,250,251,252,253,254,255)
begin
select #return = 1;
select #pos = (len(#string) + 1)
end
else
begin
select #pos = #pos + 1
end
end
else
begin
select #return = 1;
select #pos = (len(#string) + 1)
end
end
else
begin
select #pos = #pos + 1
end
end
return #return;
end
USAGE:
select Address1
from PropertyFile_English
where udf_ContainsNonASCIIChars(Address1, 1) = 1

Resources