How to replace a character in a string using T-SQL - sql-server

In my table column, i have below sample data
Test1 145, Area 1
Test2 146,
Test3 145, Area 2, Plot 10
What i want to achieve is to replace "," in the string but only if it is the last character. If i have more characters after "," then the replace should leave the string as it is.
In the example above, the replace would only work in line 2.
The expected out put would be like below
Test1 145, Area 1
Test2 146
Test3 145, Area 2, Plot 10
In line 2 above, "," has been replaced with empty space.
I have tried this Replace(column1, ', ', '') AS ColName but this replaces all the "," in Test1 and Test3.

You can try this:
DECLARE #value VARCHAR(1024) = 'Test2 146,';
SELECT IIF(RIGHT(#value,1) = ',', LEFT(#value, LEN(#value) - 1), #value);
For column it looks like below:
DECLARE #DataSource TABLE
(
[value] VARCHAR(1024)
);
INSERT INTO #DataSource ([value])
VALUES ('Test1 145, Area 1')
,('Test2 146,')
,('Test3 145, Area 2, Plot 10');
SELECT IIF(RIGHT([value],1) = ',', LEFT([value], LEN([value]) - 1), [value])
FROM #DataSource;

You can also do this will LIKE and IIF :
SELECT IIF(t.Column LIKE '%,' , LEFT(t.column, LEN(t.column) - 1) , t.column) as new_val
FROM YourTable t
For older versions: You can use CASE EXPRESSION since IIF is only available since 2012+ version(Link by #gotqn)
SELECT CASE WHEN t.Column LIKE '%,'
THEN LEFT(t.column, LEN(t.column) - 1)
ELSE t.column
END as new_val
FROM YourTable t

I am pretty sure IIF isn't available in SQL Server 2005. This is basically the same logic as the previous answer using CASE instead.
declare #MyString varchar(50)
set #MyString = 'Test2 146,'
select
case
when right(rtrim(#MyString), 1) = ',' then
substring(#MyString, 1, len(rtrim(#MyString)) - 1)
else
#MyString
end

Something like this:
SELECT CASE
WHEN Column1 LIKE '%,' THEN STUFF(column1, LEN(column1), 1, '')
ELSE Column1
END

This shows one way to do it.
DECLARE #test VARCHAR(30);
SET #test = 'Test1, 145, Area 1';
SELECT #test;
IF CHARINDEX(',', #test, LEN(RTRIM(#test))) > 0
BEGIN
SET #test = Replace(#test, ',', '');
END
SELECT #test;
SET #test = 'Test2 146,';
SELECT #test;
IF CHARINDEX(',', #test, LEN(RTRIM(#test))) > 0
BEGIN
SET #test = Replace(#test, ',', '');
END
SELECT #test;
SET #test = 'Test3 145, Area 2, Plot 10';
SELECT #test;
IF CHARINDEX(',', #test, LEN(RTRIM(#test))) > 0
BEGIN
SET #test = Replace(#test, ',', '');
END
SELECT #test;
-- How to work into a SELECT statement
SET #test = 'Test2 146,';
SELECT CASE WHEN CHARINDEX(',', #test, LEN(RTRIM(#test))) > 0 THEN SUBSTRING(#test, 1, LEN(#test) - 1) ELSE #test END AS 'Converted Value';

Related

How to trim leading zeros of each value in a concatenated field in SQL Server

I have a requirement as below.
Input:
00113|0267|0139
Expected Output:
113|267|139
Input is directly coming from a file and it's not a generated field
As of now i am using below code to split the filed into multiple values, then trimming it using Patindex and Substring functions, finally concatenating them again
Substring(col, 1,charindex('|',col)-1) part1,
Substring(col,charindex('|',col)+1,(charindex('|', col, charindex('|', col, 1)+1)-charindex('|',col)-1)) part2,
Substring(col,charindex('|', col, charindex('|', col, 1)+1)+1,len(col)) part3
Let me know if there is any better way to achieve this without using these many string parsing functions
try the following:
declare #i int = 0
while (patindex('%|0%', '|'+#str) > 0)
begin
set #str = ( replace(substring('|'+ #str, 1, patindex('%|0%', '|'+#str)+1 ), '|0', '|') + substring(#str, patindex('%|0%', '|'+#str)+1, len(#str)) )
set #i += 1
end
select stuff(#str, 1, #i, '')
you can also use string_split like below:
select STUFF((SELECT '|' + convert(varchar(100), convert(int, nullif(value, '')))
FROM string_split(#str, '|') t
FOR XML PATH ('')) , 1, 1, '') Grouped_Value
please find the db<>fiddle here.
one option is to simply use replace()
Example
Declare #YourTable Table ([SomeCol] varchar(50)) Insert Into #YourTable Values
('00113|0267|0139')
Select *
,NewVal = stuff(replace(replace(replace('|'+SomeCol,'|0','|'),'|0','|'),'|0','|'),1,1,'')
From #YourTable
Returns
SomeCol NewVal
00113|0267|0139 113|267|139

Need to calculate the sum of second using a function from a string variable using SQL

There is a column named as duration in a table called Adventurous.The column has values as below.In the column Suffix of 'H' is hours,Suffix of 'M' is minutes and Suffix of 'S' is seconds.How can we select the hours, minutes and seconds and convert all into seconds i.e sum of all the hours minutes and seconds in the form of seconds.
Duration
--------
PT10M13S
PT13M22S
PT1H2M18S
PT11S
i tried using substring and charindex as below and tried to create a function but i am getting error:
Declare #Duration varchar(30) ='PT16H13M42S', #Dur varchar(10)
Declare #hours int
declare #mins int
declare #secs int
declare #len int
select #len = len(substring (#Duration, 3, len(#Duration))), #Dur=substring (#Duration, 3, len(#Duration))
select #hours = charindex('H', #Dur)
select substring(#Dur, 1, #hours-1)
select #Duration=substring (#Dur, #hours+1, len(#Dur))
select #mins = charindex('M', #Duration)
select substring(#Duration, 1, #mins-1)
select #Dur=substring (#Duration, #mins+1, len(#Duration))
select #secs= charindex('S', #Dur)
select substring(#Dur, 1, #Secs-1)
select #len, #Dur, #Duration
example PT1H2M18S= 1*3600+2*60+18=3738
Try this:
Declare #t table (duration varchar(50))
insert into #t values ('PT1H2M18S')
select
convert(int,substring(duration,CHARINDEX('PT',duration)+2,(CHARINDEX('H',duration)-CHARINDEX('PT',duration))-2))*3600 +
convert(int,substring(duration,CHARINDEX('H',duration)+1,(CHARINDEX('M',duration)-CHARINDEX('H',duration))-1))*60 +
convert(int,substring(duration,CHARINDEX('M',duration)+1,(CHARINDEX('S',duration)-CHARINDEX('M',duration))-1))
from #t
Another possible approach is to transform Duration text input into a valid T-SQL expression ('PT1H2M18S' will be transformed into '1*3600+2*60+18*1+0'). After that, consider next two options:
Generate and execute a dynamic statement, which will evaluate each expression or
Define a function to make the calculations
Input:
CREATE TABLE #Data (
Duration varchar(50)
)
INSERT INTO #Data
(Duration)
VALUES
('PT10M13S'),
('PT13M22S'),
('PT1H2M18S'),
('PT100H'),
('PT11S')
Dynamic statement:
DECLARE #stm nvarchar(max)
SET #stm = N''
SELECT #stm = #stm +
CONCAT(
'UNION ALL SELECT ''',
Duration,
''' AS [Duration], ',
REPLACE(REPLACE(REPLACE(REPLACE(Duration, 'H', '*3600+'), 'M', '*60+'), 'S', '*1+'), 'PT', ''),
'0 AS [Seconds] '
)
FROM #Data
SET #stm = STUFF(#stm, 1, 10, N'')
EXEC (#stm)
User-defined function:
CREATE FUNCTION [udfCalculateHMS] (#expression varchar(100))
RETURNS int
AS
BEGIN
DECLARE #result int
DECLARE #s varchar(100)
--
SET #result = 0
WHILE (CHARINDEX('+', #expression) > 0) BEGIN
SET #s = SUBSTRING(#expression, 1, CHARINDEX('+', #expression) - 1)
SET #expression = STUFF(#expression, 1, CHARINDEX('+', #expression), '')
SET #result = #result +
CONVERT(int, SUBSTRING(#s, 1, CHARINDEX('*', #s) - 1)) *
CONVERT(int, STUFF(#s, 1, CHARINDEX('*', #s), ''))
END
-- Return value
RETURN #result
END
SELECT
Duration,
dbo.udfCalculateHMS(CONCAT(REPLACE(REPLACE(REPLACE(REPLACE(Duration, 'H', '*3600+'), 'M', '*60+'), 'S', '*1+'), 'PT', ''), '0')) AS Seconds
FROM #Data
Output:
Duration Seconds
PT10M13S 613
PT13M22S 802
PT1H2M18S 3738
PT100H 360000
PT11S 11
This is how I would move across the string the pull out the correct integer values. The number of characters to offset may change depending on if you can have varying numbers of characters per hour, minute and second. But the principle should get you going.
Declare #Duration varchar(30) ='PT16H13M42S'
select * from
(values(substring(#Duration,CHARINDEX('PT',#duration)+2,(CHARINDEX('H',#Duration)-CHARINDEX('PT',#Duration))-2),
substring(#Duration,CHARINDEX('H',#duration)+1,(CHARINDEX('M',#Duration)-CHARINDEX('H',#Duration))-1),
substring(#Duration,CHARINDEX('M',#duration)+1,(CHARINDEX('S',#Duration)-CHARINDEX('M',#Duration))-1))) duration ([Hours], [Minutes], [Seconds]);
Throwing in an answer using Tally Table and mostly reliable ISNUMERIC SQL function
This should be good for small datasets. I also assume that you have valid numbers i.e. hour part are not >24, minute part or seconds part are not >60
create table #t(duration nvarchar(max));
insert into #t values
('PT10M13S')
,('PT13M22S')
,('PT1H2M18S')
,('PT11S')
select
totalseconds= sum(m.factor* case when ISNUMERIC(substring(duration, r-2,2))=1 then substring(duration, r-2,2) else substring(duration, r-1,1) end ),
duration from #t
cross join
(
select r=row_number() over (order by (select NULL))-1
from sys.objects s1 cross join sys.objects s2
)t
join
(values('S',1),('M',60),('H',3600)) m(part,factor)
on r<=len(duration) and substring(duration, r,1) =m.part
group by duration
drop table #t
PS: See this SO link which suggests that scalar UDF are faster than ISNUMERIC
Fastest way to check if a character is a digit?

Replacing Part of a String without Replace/Stuff

This is my string: 11.0000.0101.000.000.0101.000.000
I need to replace ONLY the first "0101" with "101." I can't use replace, as it replaces the 2nd instance of 0101 as well.
I tried
stuff(string, 9, 3, '101')
but since the replacement string is shorter than the existing string, I end up with
11.0000.1011.000.000.0101.000.000
What can I use besides REPLACE or STUFF? Thanks!
declare #t table(s varchar(100))
insert into #t values
( '11.0000.0101.000.000.0101.000.000'), ('abc');
select case charindex('0101', s)
when 0 then s
else stuff(s, charindex('0101', s), 4, '101')
end as new_s
from #t;
Your expression was almost right.
Just tell it to replace 4 characters of the original string instead of 3 :
stuff(string, 9, 4, '101')
But this will only work if your string has always the same positions.
you can do something like:
replace (stuff(string,9,3,'#101'),'#','')
If you need replace all 0101 to 101 go with below code
DECLARE #TempData TABLE(Data VARCHAR(1000));
INSERT INTO #TempData VALUES
('11.0000.0101.000.000.0101.000.000');
;WITH Cte
AS (
SELECT CASE
WHEN DATA = '0101'
THEN '101'
ELSE CAST(DATA AS VARCHAR(10))
END AS DATA
FROM (
SELECT Split.a.value('.', 'VARCHAR(1000)') AS Data
FROM (
SELECT CAST('<S>' + REPLACE(Data, '.', '</S><S>') + '</S>' AS XML) AS Data
FROM #TempData
) AS A
CROSS APPLY Data.nodes('/S') AS Split(a)
) Dt
)
SELECT STUFF((
SELECT '.' + DATA
FROM cte
FOR XML PATH('')
), 1, 1, '') AS ExpectedResult
OutPut
ExpectedResult
11.0000.101.000.000.101.000.000
Yet another option if you don't know the 1st observation
Declare #S varchar(50) = '11.0000.0101.000.000.0101.000.000'
Declare #Find varchar(25) = '0101'
Declare #Repl varchar(25) = '101'
Select isnull(stuff(#S, charindex(#Find,#S), len(#Find), #Repl),#S)
Returns
11.0000.101.000.000.0101.000.000

Finding a word after specific string - sql management studio 2012

I have a text column with fields as per below (small sample, there are many variations):
INSERT INTO #retention1 VALUES ('hello Action=Refer non-action=non-refer')
INSERT INTO #retention1 VALUES ('bye Action=Follow non-action=non-refer')
INSERT INTO #retention1 VALUES ('hello non-action=non-refer Action=compare')
I need to find the word after "Action="
example ANSWER :
Entry 1: Refer
Entry 2: Follow
Entry 3: Compare
If all the words after "Action=" was the same length then I am able to do it. Unfortunately the length is unknown of all variations. The word after action is almost always different not only the 3 variations above.
Any ideas suggestions would be highly appreciated.
This is the code I used for the "Refer" example only which works:
BEGIN
DECLARE #P_string nvarchar (100),
#P_variable nvarchar (100)/*,
#P_return_null nvarchar(100) = 'Y'*/
set #p_string = 'hello Action=Refer non-action=non-refer'
set #p_variable = 'Action'
select substring(#p_string, charindex(upper(#P_variable),upper(#P_string)) +len(#p_variable)+1,5) as trying
END;
Try this:
BEGIN
DECLARE #ret nvarchar (100),#P_string nvarchar (100),
#P_variable nvarchar (100)/*,
#P_return_null nvarchar(100) = 'Y'*/
set #p_string = 'hello Action=Refer non-action=non-refer'
set #p_variable = 'Action'
select #ret=substring(#p_string, charindex(upper(#P_variable),upper(#P_string)) +len(#p_variable)+1,100)
select substring(#ret,0,charindex(' ',lower(#ret),0))
END;
The code you are looking for should first look for the string Action and then for a space character after that word. After that you have all you need to manipulate your source string.
This should work:
DECLARE
#P_string nvarchar (100),
#P_variable nvarchar (100),
#idx1 int,
#idx2 int
SET #p_string = 'hello Action=Refer non-action=non-refer'
SET #p_variable = 'Action'
SELECT
#idx1 = charindex(lower(#P_variable),lower(#P_string)) + len(#p_variable) + 1,
#idx2 = charindex(lower(' '), #P_string, #idx1)
SELECT #idx1, #idx2
SELECT SUBSTRING(
#p_string,
#idx1,
#idx2 - #idx1) as trying
EDIT
After more thoroughly reviewing the requirements, I decided to tailor a rCTE structure that I use for similar purposes. Here it goes.
CREATE TABLE #retention1 (
ID int,
txt nvarchar (100)
)
INSERT INTO #retention1 VALUES (1, 'hello Action=Refer non-action=non-refer')
INSERT INTO #retention1 VALUES (2, 'bye Action=Follow non-action=non-refer')
INSERT INTO #retention1 VALUES (3, 'hello non-action=non-refer Action=compare')
;WITH T AS (
SELECT
ID,
Row = 0,
StartIdx = CAST(0 as int),
EndIdx = CAST(0 as int),
Result = CAST('' as nvarchar(max))
FROM #retention1
UNION ALL
SELECT
r1.ID,
Row + 1,
StartIdx = CAST(newstartidx AS int),
EndIdx = CAST(EndIdx + newendidx as int),
Result = CAST(newtoken as nvarchar(max))
FROM
T
JOIN #retention1 r1
ON r1.ID = T.ID
CROSS APPLY(
SELECT newstartidx = EndIdx + 1
) calc1
CROSS APPLY(
SELECT newtxt = substring(r1.txt, newstartidx, len(r1.txt))
) calc2
CROSS APPLY(
SELECT patidx = charindex(' ', newtxt)
) calc3
CROSS APPLY(
SELECT newendidx = CASE
WHEN patidx = 0 THEN len(newtxt)
ELSE patidx END
) calc4
CROSS APPLY(
SELECT newtoken = substring(r1.txt, newstartidx, newendidx)
) calc5
WHERE newendidx > 0
)
SELECT
ID,
--Result
Name = left(Result, eqIdx - 1),
Value = substring(Result, eqIdx + 1, len(Result) - eqIdx + 1)
FROM
T
OUTER APPLY (
SELECT eqIdx = charindex('=', Result)
) calc6
WHERE
Row != 0
AND eqIdx != 0
ORDER BY ID
Since there are more than one expressions to parse in the table, you would have problems referencing them without an identifier. So, i added ID to your temporary table.
The output from CTE also contains ID which you can use as a reference to #retention1.ID.
String handling galore with a little cheating: Extending #p_string with a space at the begining and the end.
DECLARE #P_string nvarchar (100), #P_variable nvarchar (100)
set #p_variable = ' Action='
set #p_string = ' hello Action=Refer non-action=non-refer '
select substring(substring(#p_string, charindex(upper(#P_variable),upper(#P_string))+1, len(#p_string)-len(#p_variable)),charindex('=',substring(#p_string, charindex(upper(#P_variable),upper(#P_string))+1, len(#p_string)-len(#p_variable)))+1,CHARINDEX(' ', substring(#p_string, charindex(upper(#P_variable),upper(#P_string))+1, len(#p_string)-len(#p_variable)))-LEN(#P_variable))
set #p_string = ' bye Action=Follow non-action=non-refer '
select substring(substring(#p_string, charindex(upper(#P_variable),upper(#P_string))+1, len(#p_string)-len(#p_variable)),charindex('=',substring(#p_string, charindex(upper(#P_variable),upper(#P_string))+1, len(#p_string)-len(#p_variable)))+1,CHARINDEX(' ', substring(#p_string, charindex(upper(#P_variable),upper(#P_string))+1, len(#p_string)-len(#p_variable)))-LEN(#P_variable))
set #p_string = ' hello non-action=non-refer Action=compare '
select substring(substring(#p_string, charindex(upper(#P_variable),upper(#P_string))+1, len(#p_string)-len(#p_variable)),charindex('=',substring(#p_string, charindex(upper(#P_variable),upper(#P_string))+1, len(#p_string)-len(#p_variable)))+1,CHARINDEX(' ', substring(#p_string, charindex(upper(#P_variable),upper(#P_string))+1, len(#p_string)-len(#p_variable)))-LEN(#P_variable))

Find non-ASCII characters in varchar columns using SQL Server

How can rows with non-ASCII characters be returned using SQL Server?
If you can show how to do it for one column would be great.
I am doing something like this now, but it is not working
select *
from Staging.APARMRE1 as ar
where ar.Line like '%[^!-~ ]%'
For extra credit, if it can span all varchar columns in a table, that would be outstanding! In this solution, it would be nice to return three columns:
The identity field for that record. (This will allow the whole record to be reviewed with another query.)
The column name
The text with the invalid character
Id | FieldName | InvalidText |
----+-----------+-------------------+
25 | LastName | Solís |
56 | FirstName | François |
100 | Address1 | 123 Ümlaut street |
Invalid characters would be any outside the range of SPACE (3210) through ~ (12710)
Here is a solution for the single column search using PATINDEX.
It also displays the StartPosition, InvalidCharacter and ASCII code.
select line,
patindex('%[^ !-~]%' COLLATE Latin1_General_BIN,Line) as [Position],
substring(line,patindex('%[^ !-~]%' COLLATE Latin1_General_BIN,Line),1) as [InvalidCharacter],
ascii(substring(line,patindex('%[^ !-~]%' COLLATE Latin1_General_BIN,Line),1)) as [ASCIICode]
from staging.APARMRE1
where patindex('%[^ !-~]%' COLLATE Latin1_General_BIN,Line) >0
I've been running this bit of code with success
declare #UnicodeData table (
data nvarchar(500)
)
insert into
#UnicodeData
values
(N'Horse�')
,(N'Dog')
,(N'Cat')
select
data
from
#UnicodeData
where
data collate LATIN1_GENERAL_BIN != cast(data as varchar(max))
Which works well for known columns.
For extra credit, I wrote this quick script to search all nvarchar columns in a given table for Unicode characters.
declare
#sql varchar(max) = ''
,#table sysname = 'mytable' -- enter your table here
;with ColumnData as (
select
RowId = row_number() over (order by c.COLUMN_NAME)
,c.COLUMN_NAME
,ColumnName = '[' + c.COLUMN_NAME + ']'
,TableName = '[' + c.TABLE_SCHEMA + '].[' + c.TABLE_NAME + ']'
from
INFORMATION_SCHEMA.COLUMNS c
where
c.DATA_TYPE = 'nvarchar'
and c.TABLE_NAME = #table
)
select
#sql = #sql + 'select FieldName = ''' + c.ColumnName + ''', InvalidCharacter = [' + c.COLUMN_NAME + '] from ' + c.TableName + ' where ' + c.ColumnName + ' collate LATIN1_GENERAL_BIN != cast(' + c.ColumnName + ' as varchar(max)) ' + case when c.RowId <> (select max(RowId) from ColumnData) then ' union all ' else '' end + char(13)
from
ColumnData c
-- check
-- print #sql
exec (#sql)
I'm not a fan of dynamic SQL but it does have its uses for exploratory queries like this.
try something like this:
DECLARE #YourTable table (PK int, col1 varchar(20), col2 varchar(20), col3 varchar(20));
INSERT #YourTable VALUES (1, 'ok','ok','ok');
INSERT #YourTable VALUES (2, 'BA'+char(182)+'D','ok','ok');
INSERT #YourTable VALUES (3, 'ok',char(182)+'BAD','ok');
INSERT #YourTable VALUES (4, 'ok','ok','B'+char(182)+'AD');
INSERT #YourTable VALUES (5, char(182)+'BAD','ok',char(182)+'BAD');
INSERT #YourTable VALUES (6, 'BAD'+char(182),'B'+char(182)+'AD','BAD'+char(182)+char(182)+char(182));
--if you have a Numbers table use that, other wise make one using a CTE
WITH AllNumbers AS
( SELECT 1 AS Number
UNION ALL
SELECT Number+1
FROM AllNumbers
WHERE Number<1000
)
SELECT
pk, 'Col1' BadValueColumn, CONVERT(varchar(20),col1) AS BadValue --make the XYZ in convert(varchar(XYZ), ...) the largest value of col1, col2, col3
FROM #YourTable y
INNER JOIN AllNumbers n ON n.Number <= LEN(y.col1)
WHERE ASCII(SUBSTRING(y.col1, n.Number, 1))<32 OR ASCII(SUBSTRING(y.col1, n.Number, 1))>127
UNION
SELECT
pk, 'Col2' BadValueColumn, CONVERT(varchar(20),col2) AS BadValue --make the XYZ in convert(varchar(XYZ), ...) the largest value of col1, col2, col3
FROM #YourTable y
INNER JOIN AllNumbers n ON n.Number <= LEN(y.col2)
WHERE ASCII(SUBSTRING(y.col2, n.Number, 1))<32 OR ASCII(SUBSTRING(y.col2, n.Number, 1))>127
UNION
SELECT
pk, 'Col3' BadValueColumn, CONVERT(varchar(20),col3) AS BadValue --make the XYZ in convert(varchar(XYZ), ...) the largest value of col1, col2, col3
FROM #YourTable y
INNER JOIN AllNumbers n ON n.Number <= LEN(y.col3)
WHERE ASCII(SUBSTRING(y.col3, n.Number, 1))<32 OR ASCII(SUBSTRING(y.col3, n.Number, 1))>127
order by 1
OPTION (MAXRECURSION 1000);
OUTPUT:
pk BadValueColumn BadValue
----------- -------------- --------------------
2 Col1 BA¶D
3 Col2 ¶BAD
4 Col3 B¶AD
5 Col1 ¶BAD
5 Col3 ¶BAD
6 Col1 BAD¶
6 Col2 B¶AD
6 Col3 BAD¶¶¶
(8 row(s) affected)
This script searches for non-ascii characters in one column. It generates a string of all valid characters, here code point 32 to 127. Then it searches for rows that don't match the list:
declare #str varchar(128);
declare #i int;
set #str = '';
set #i = 32;
while #i <= 127
begin
set #str = #str + '|' + char(#i);
set #i = #i + 1;
end;
select col1
from YourTable
where col1 like '%[^' + #str + ']%' escape '|';
running the various solutions on some real world data - 12M rows varchar length ~30, around 9k dodgy rows, no full text index in play, the patIndex solution is the fastest, and it also selects the most rows.
(pre-ran km. to set the cache to a known state, ran the 3 processes, and finally ran km again - the last 2 runs of km gave times within 2 seconds)
patindex solution by Gerhard Weiss -- Runtime 0:38, returns 9144 rows
select dodgyColumn from myTable fcc
WHERE patindex('%[^ !-~]%' COLLATE Latin1_General_BIN,dodgyColumn ) >0
the substring-numbers solution by MT. -- Runtime 1:16, returned 8996 rows
select dodgyColumn from myTable fcc
INNER JOIN dbo.Numbers32k dn ON dn.number<(len(fcc.dodgyColumn ))
WHERE ASCII(SUBSTRING(fcc.dodgyColumn , dn.Number, 1))<32
OR ASCII(SUBSTRING(fcc.dodgyColumn , dn.Number, 1))>127
udf solution by Deon Robertson -- Runtime 3:47, returns 7316 rows
select dodgyColumn
from myTable
where dbo.udf_test_ContainsNonASCIIChars(dodgyColumn , 1) = 1
There is a user defined function available on the web 'Parse Alphanumeric'. Google UDF parse alphanumeric and you should find the code for it. This user defined function removes all characters that doesn't fit between 0-9, a-z, and A-Z.
Select * from Staging.APARMRE1 ar
where udf_parsealpha(ar.last_name) <> ar.last_name
That should bring back any records that have a last_name with invalid chars for you...though your bonus points question is a bit more of a challenge, but I think a case statement could handle it. This is a bit psuedo code, I'm not entirely sure if it'd work.
Select id, case when udf_parsealpha(ar.last_name) <> ar.last_name then 'last name'
when udf_parsealpha(ar.first_name) <> ar.first_name then 'first name'
when udf_parsealpha(ar.Address1) <> ar.last_name then 'Address1'
end,
case when udf_parsealpha(ar.last_name) <> ar.last_name then ar.last_name
when udf_parsealpha(ar.first_name) <> ar.first_name then ar.first_name
when udf_parsealpha(ar.Address1) <> ar.last_name then ar.Address1
end
from Staging.APARMRE1 ar
where udf_parsealpha(ar.last_name) <> ar.last_name or
udf_parsealpha(ar.first_name) <> ar.first_name or
udf_parsealpha(ar.Address1) <> ar.last_name
I wrote this in the forum post box...so I'm not quite sure if that'll function as is, but it should be close. I'm not quite sure how it will behave if a single record has two fields with invalid chars either.
As an alternative, you should be able to change the from clause away from a single table and into a subquery that looks something like:
select id,fieldname,value from (
Select id,'last_name' as 'fieldname', last_name as 'value'
from Staging.APARMRE1 ar
Union
Select id,'first_name' as 'fieldname', first_name as 'value'
from Staging.APARMRE1 ar
---(and repeat unions for each field)
)
where udf_parsealpha(value) <> value
Benefit here is for every column you'll only need to extend the union statement here, while you need to put that comparisson three times for every column in the case statement version of this script
To find which field has invalid characters:
SELECT * FROM Staging.APARMRE1 FOR XML AUTO, TYPE
You can test it with this query:
SELECT top 1 'char 31: '+char(31)+' (hex 0x1F)' field
from sysobjects
FOR XML AUTO, TYPE
The result will be:
Msg 6841, Level 16, State 1, Line 3 FOR XML could not serialize the
data for node 'field' because it contains a character (0x001F) which
is not allowed in XML. To retrieve this data using FOR XML, convert it
to binary, varbinary or image data type and use the BINARY BASE64
directive.
It is very useful when you write xml files and get error of invalid characters when validate it.
Here is a UDF I built to detectc columns with extended ascii charaters. It is quick and you can extended the character set you want to check. The second parameter allows you to switch between checking anything outside the standard character set or allowing an extended set:
create function [dbo].[udf_ContainsNonASCIIChars]
(
#string nvarchar(4000),
#checkExtendedCharset bit
)
returns bit
as
begin
declare #pos int = 0;
declare #char varchar(1);
declare #return bit = 0;
while #pos < len(#string)
begin
select #char = substring(#string, #pos, 1)
if ascii(#char) < 32 or ascii(#char) > 126
begin
if #checkExtendedCharset = 1
begin
if ascii(#char) not in (9,124,130,138,142,146,150,154,158,160,170,176,180,181,183,184,185,186,192,193,194,195,196,197,199,200,201,202,203,204,205,206,207,209,210,211,212,213,214,216,217,218,219,220,221,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,248,249,250,251,252,253,254,255)
begin
select #return = 1;
select #pos = (len(#string) + 1)
end
else
begin
select #pos = #pos + 1
end
end
else
begin
select #return = 1;
select #pos = (len(#string) + 1)
end
end
else
begin
select #pos = #pos + 1
end
end
return #return;
end
USAGE:
select Address1
from PropertyFile_English
where udf_ContainsNonASCIIChars(Address1, 1) = 1

Resources