split numbers based on special characters - sql-server

I have a column as below:
numbers
---------
225/271-2001
5565/2233-123
1392551-6,X117
The rule is: separate the number based on dash(-) and slash(/).The comma should be removed(if present).The 'X' is considered as an extention will remain as is. I need to separate this as shown below.
I understand, both the patterns are different. However, I can consider this separate while writing the query.
I would like to split these numbers as below:
number1 | number2
---------------------------
2252001 2712001
5565123 2233123
1392551X117 1392556X117
I tried with below query:
SELECT
ID, number
, STUFF(
LEFT(number,CHARINDEX('-',number)-1),
LEN(LEFT(number,CHARINDEX('-',number)))-LEN(RIGHT(number,CHARINDEX('-',REVERSE(number))-1)),
LEN(RIGHT(number,CHARINDEX('-',REVERSE(number))-1)),
RIGHT(number,CHARINDEX('-',REVERSE(number))-1)
)
, SUBSTRING(number,CHARINDEX('/',number)+1,LEN(number)) AS [2ndnumber]
FROM [tablename]
WHERE numbers LIKE '[0-9][0-9][0-9]/[0-9][0-9][0-9]-[0-9][0-9][0-9][0-9]'
However, I'm not getting the result as expected. What am I doing wrong?

You have different rules for numbers with / and ,. So you have to divide your query. Here's one way:
declare #t table (numbers varchar(20))
insert into #t
values ('225/271-2001')
, ('5565/2233-123'), ('1392551-6,X117')
select
number1 = left(numbers, ci1 - 1) + right(numbers, len(numbers) - ci2)
, number2 = substring(numbers, ci1 + 1, ci2 - ci1 - 1) + right(numbers, len(numbers) - ci2)
from
#t
cross apply (select ci1 = charindex('/', numbers), ci2 = charindex('-', numbers)) a
where
charindex('/', numbers) > 0
union all
select
left(numbers, ci1 - 1) + right(numbers, len(numbers) - ci2)
, left(numbers, ci1 - 1 - (ci2 - ci1 - 1)) + substring(numbers, ci1 + 1, ci2 - ci1 - 1) + right(numbers, len(numbers) - ci2)
from
#t
cross apply (select ci1 = charindex('-', numbers), ci2 = charindex(',', numbers)) a
where
charindex(',', numbers) > 0
Output
number1 number2
----------------------------
2252001 2712001
5565123 2233123
1392551X117 1392556X117

Try below, it may help.
CREATE TABLE #temp(ID INT, number varchar(200))
INSERT INTO #temp VALUES(1,'225/271-2001')
INSERT INTO #temp VALUES(2,'5565/2233-123')
INSERT INTO #temp VALUES(3,'1392551-6,X117')
INSERT INTO #temp VALUES(4,'13925515-65,X119')
SELECT ID,number
,CASE -- When the pattern is '#/#-#'
WHEN CHARINDEX('/',number) > 0 AND CHARINDEX('-',number) > CHARINDEX('/',number)
THEN CAST(SUBSTRING(number,1,CHARINDEX('/',number)-1) AS VARCHAR(50))
+ CAST(SUBSTRING(number,CHARINDEX('-',number)+1,LEN(number)) AS VARCHAR(50))
-- When the pattern is '#-#,#'
WHEN CHARINDEX('-',number) > 0 AND CHARINDEX(',',number) > CHARINDEX('-',number)
THEN CAST(SUBSTRING(number,1,CHARINDEX('-',number)-1) AS VARCHAR(50))
+ CAST(SUBSTRING(number,CHARINDEX(',',number)+1,LEN(number)) AS VARCHAR(50))
ELSE number
END [1st number]
,CASE -- When the pattern is '#/#-#'
WHEN CHARINDEX('/',number) > 0 AND CHARINDEX('-',number) > 0
THEN CAST(REPLACE(SUBSTRING(number,CHARINDEX('/',number)+1,LEN(number)),'-','') AS VARCHAR(50))
-- When the pattern is '#-#,#'
WHEN CHARINDEX('-',number) > 0 AND CHARINDEX(',',number) > CHARINDEX('-',number)
THEN LEFT(CAST(SUBSTRING(number,1,CHARINDEX('-',number)-1) AS VARCHAR(50)),(CHARINDEX('-',number)-1)-LEN(RIGHT(LEFT(number,CHARINDEX(',',number)-1),CHARINDEX(',',number)-1-CHARINDEX('-',number))))
+ CAST(REPLACE(SUBSTRING(number,CHARINDEX('-',number)+1,LEN(number)),',','') AS VARCHAR(50))
ELSE number
END [2nd number]
FROM #temp
OUTPUT:
ID number 1st number 2nd number
----- ------------------- --------------- ---------------
1 225/271-2001 2252001 2712001
2 5565/2233-123 5565123 2233123
3 1392551-6,X117 1392551X117 1392556X117
4 13925515-65,X119 13925515X119 13925565X119

This query should give the expected result. Simple query using xml format.
CREATE TABLE #temp(ID INT, number varchar(200))
INSERT INTO #temp VALUES(1,'225/271-2001')
INSERT INTO #temp VALUES(2,'5565/2233-123')
INSERT INTO #temp VALUES(3,'1392551-6,X117')
;WITH Split_Names (ID,xmlname)
AS
(
SELECT ID,
xmlname= CASE WHEN CHARINDEX(',',number)=0
THEN CONVERT(xml,'<Names><name>'
+ REPLACE(REPLACE(REPLACE(number,'-', '<?name><name>'),'/', '</name><name>'),'?','/') + '</name></Names>')
WHEN CHARINDEX(',',number)>0
THEN CONVERT(xml,'<Names><name>'
+ REPLACE(REPLACE(REPLACE(
REPLACE(REPLACE(concat(LEFT(number,(CHARINDEX('-',number)-1)),CAST('/' as char(1)),LEFT(number,(CHARINDEX('-',number)-2)),SUBSTRING(number,CHARINDEX('-',number)+1,1),',',substring(number,CHARINDEX(',',number)+1,LEN(number))),'-','/'),',','-')
,'-', '<?name><name>'),'/', '</name><name>'),'?','/') + '</name></Names>')
END
FROM #temp
)
select xmlname.value('/Names[1]/name[1]','varchar(100)')+ xmlname.value('/Names[1]/name[3]','varchar(100)') NUMBER1,
xmlname.value('/Names[1]/name[2]','varchar(100)')+ xmlname.value('/Names[1]/name[3]','varchar(100)') NUMBER2
from Split_Names
drop table #temp

Related

Split string by using CHARINDEX() in RIGHT() or SUBSTRING() return incorrect result

I want to split a column into two. I want to select values from where cell value has '(' So here is my requirement:
Input Strings:
col: mystr
----------
123(0)
233 (123)
23 (A)
2 (122)
Required Output:
Output
-------
(0)
(123)
(A)
(122)
I have done following:
SELECT right(mystr,LEN(mystr)-
CASE WHEN CHARINDEX('(',mystr)=0 THEN LEN(mystr)
ELSE CHARINDEX('(',mystr) END
+ 1)
FROM docs
How it works: I want to select index where I found first '(' and then select values next to it. As CHARINDEX() work from Left to right. So instead of:
select right(mystr,CHARINDEX('(',mystr))
I subtracted index from total length LEN(mystr)-CHARINDEX('(',mystr).
Here I found a scenario when '(' was not found and 'CHARINDEX()' returned 0 So in case '(' was not found I made the whole term 0 by:
CASE WHEN CHARINDEX('(',mystr)=0 THEN LEN(mystr)
ELSE CHARINDEX('(',mystr) END
Here first element is not selected so I added +1 to whole term but it results in an extra value:
mystr Out without +1, Out with +1, Out with +1 moved inside else; desired
----- ------------ ----------- ----------------- -------
112 '' 2 '' ''
1(0) 0) (0) ) (0)
1 (12) 12) (12) ) (12)
I have also tried with substring() but it has same issue:
SELECT substring(mystr,
CASE WHEN CHARINDEX('(',mystr)=0 THEN LEN(mystr)
ELSE CHARINDEX('(',mystr) END,
LEN(mystr)-CASE WHEN CHARINDEX('(',mystr)=0 THEN LEN(mystr)
ELSE CHARINDEX('(',mystr)END +1) FROM docs
If open to a Table-Valued Functions, consider the following:
Tired of extacting and parsing strings (left(), right(), charindex(), ...), I modified a parse function to accept two non-like delimiters.
Example
Declare #YourTable table (mystr varchar(50))
Insert Into #YourTable values
('122'),
('123(0)'),
('233 (123)'),
('23 (A)'),
('2 (122)')
Select A.*
,NewVal = IsNull('('+B.RetVal+')','') -- Adding back the ()'s
From #YourTable A
Outer Apply [dbo].[tvf-Str-Extract](A.mystr,'(',')') B
Returns
mystr NewVal
122
123(0) (0)
233 (123) (123)
23 (A) (A)
2 (122) (122)
The UDF if Interested
CREATE FUNCTION [dbo].[tvf-Str-Extract] (#String varchar(max),#Delimiter1 varchar(100),#Delimiter2 varchar(100))
Returns Table
As
Return (
with cte1(N) As (Select 1 From (Values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N(N)),
cte2(N) As (Select Top (IsNull(DataLength(#String),0)) Row_Number() over (Order By (Select NULL)) From (Select N=1 From cte1 N1,cte1 N2,cte1 N3,cte1 N4,cte1 N5,cte1 N6) A ),
cte3(N) As (Select 1 Union All Select t.N+DataLength(#Delimiter1) From cte2 t Where Substring(#String,t.N,DataLength(#Delimiter1)) = #Delimiter1),
cte4(N,L) As (Select S.N,IsNull(NullIf(CharIndex(#Delimiter1,#String,s.N),0)-S.N,8000) From cte3 S)
Select RetSeq = Row_Number() over (Order By N)
,RetPos = N
,RetVal = left(RetVal,charindex(#Delimiter2,RetVal)-1)
From (
Select *,RetVal = Substring(#String, N, L)
From cte4
) A
Where charindex(#Delimiter2,RetVal)>1
)
/*
Max Length of String 1MM characters
Declare #String varchar(max) = 'Dear [[FirstName]] [[LastName]], ...'
Select * From [dbo].[tvf-Str-Extract] (#String,'[[',']]')
*/
Try this:
DECLARE #x NVARCHAR(20) = '123(A)';
SELECT CASE WHEN CHARINDEX('(', #x) = 0 THEN NULL ELSE RIGHT(#x, LEN(#x) - CHARINDEX('(', #x) + 1) END AS x
In your case you have nothing after the last ), so you just can use some large number to point how many symbols to get:
DECLARE #DataSource TABLE
(
[value] VARCHAR(48)
);
INSERT INTO #DataSource ([value])
VALUES ('123(0)')
,('233 (123)')
,('23 (A)')
,('2 (122)');
SELECT CASE WHEN CHARINDEX('(', [value]) <> 0 THEN SUBSTRING([value], CHARINDEX('(', [value]), 100) ELSE '' END
FROM #DataSource;
If there are values after the final ):
DECLARE #DataSource TABLE
(
[value] VARCHAR(48)
);
INSERT INTO #DataSource ([value])
VALUES ('123(0) test')
,('233 (123) test 12')
,('23 (A)')
,('2 (122) sometthing');
SELECT SUBSTRING([value], CHARINDEX('(', [value]), CHARINDEX(')', [value]) - CHARINDEX('(', [value]) + 1)
FROM #DataSource;

Regular expressions in TSQL

In cells of column e_vis_name I have organization structure where divisions divided with \ symbol, e.g.
Moscow\Direction
Yaroslavl\Sales
Omsk\Commercial center\Sales
I need to cut everything after first \ symbol to get the following result:
Moscow
Yaroslavl
Omsk
How can I do it?
You can use a combination of LEFT and CHARINDEX like this:
SELECT LEFT(colname, CHARINDEX('\', colname)-1) FROM table
EDIT: In the case where you don't have a \ symbol, if you just want to grab the whole column instead you can do this:
SELECT
CASE WHEN CHARINDEX('\', colname) > 0 THEN LEFT(colname, CHARINDEX('\', colname)-1)
ELSE ISNULL(colname, '')
END
FROM table
This says, "If there is a \, then take the characters up to that point, otherwise take the whole column. And if the column is NULL then just set an empty string."
I'm sure you can adapt this to your purposes.
there are many options how you can achieve what you need, below several examples of them
--------------------------------------------------------------------------------
-- TEMP TABLE WITH DATA SAMPLE
DECLARE #table AS TABLE ( Division VARCHAR(100) )
INSERT INTO #table ( Division )
VALUES ( 'Moscow\Direction' )
, ( 'Yaroslavl\Sales' )
, ( 'Omsk\Commercial center\Sales' )
, ( 'Voronezh' )
--------------------------------------------------------------------------------
-- variant using PARSENAME
SELECT REVERSE(PARSENAME(REVERSE(REPLACE(Division, '\', '.')), 1)) AS Town
FROM #table AS T
-------------------------------------------------------------------------------
-- variant using SUBSTRING AND CHARINDEX
SELECT SUBSTRING(division, 1,
CASE WHEN CHARINDEX('\', division) = 0 THEN LEN(Division)
ELSE CHARINDEX('\', division) - 1
END) AS Town
FROM #table AS T
--------------------------------------------------------------------------------
-- variant using SUBSTRING AND PATINDEX
SELECT SUBSTRING(division, 1,
CASE WHEN PATINDEX('%\%', division) = 0 THEN LEN(Division)
ELSE PATINDEX('%\%', division) - 1
END) AS Town
FROM #table AS T
--------------------------------------------------------------------------------
-- variant using LEFT AND PATINDEX
SELECT LEFT(division,
CASE WHEN PATINDEX('%\%', division) = 0 THEN LEN(Division)
ELSE PATINDEX('%\%', division) - 1
END) AS Town
FROM #table AS T
--------------------------------------------------------------------------------
-- variant using LEFT AND CHARINDEX
SELECT LEFT(division,
CASE WHEN CHARINDEX('\', division) = 0 THEN LEN(Division)
ELSE CHARINDEX('\', division) - 1
END) AS Town
FROM #table AS T
--------------------------------------------------------------------------------
-- variant using recursive cte, substring, top with ties by Row_number()
;
WITH tally
AS (SELECT n = 1
UNION ALL
SELECT n = n + 1
FROM tally
WHERE n < 100)
SELECT TOP 1 WITH TIES SUBSTRING(A.Division,1,B.n-1) AS Town
FROM #table AS A
JOIN tally AS B ON SUBSTRING(A.Division + '\', B.n , 1)= '\'
ORDER BY ROW_NUMBER() OVER (PARTITION BY A.Division ORDER BY B.n)
--------------------------------------------------------------------------------
-- variant using recursive cte, substring, row_number, subquery
;
WITH tally
AS (SELECT n = 1
UNION ALL
SELECT n = n + 1
FROM tally
WHERE n < 100)
SELECT T.TOWN
FROM (SELECT SUBSTRING(A.Division,1,B.n-1) AS TOWN,
ROW_NUMBER() OVER (PARTITION BY A.Division ORDER BY B.n) AS RN
FROM #table AS A JOIN tally AS B ON SUBSTRING(A.Division + '\', B.n , 1)= '\'
) AS T
WHERE RN = 1
As some alternatives:
Using LEFT :
REPLACE(LEFT(e_vis_name, CHARINDEX('\', e_vis_name + '\', 1)), '\', '')
Using SUBSTRING :
REPLACE(SUBSTRING(e_vis_name, 1, CHARINDEX('\', e_vis_name + '\', 1)), '\', '')
Using STUFF :
STUFF(e_vis_name + '\', CHARINDEX('\', e_vis_name + '\', 1), 512, '')
Using PARSENAME :
REVERSE(PARSENAME(REVERSE(REPLACE(e_vis_name, '\','.')), 1))
-- or REPLACE(REVERSE(PARSENAME(REPLACE(REVERSE(REPLACE(e_vis_name, '.', CHAR(8))), '\','.'), 1)), CHAR(8), '.')
or
PARSENAME(REPLACE(e_vis_name, '\','.'), LEN(e_vis_name) - LEN(REPLACE(e_vis_name, '\', '')) + 1)
-- or REPLACE(PARSENAME(REPLACE(REPLACE(e_vis_name, '.', CHAR(8)), '\','.'), LEN(e_vis_name) - LEN(REPLACE(e_vis_name, '\', '')) + 1) , CHAR(8), '.')

Find missing integers in a list of Values

Currently, I have 12 rows with column Named 'Value'. The sample like this (just sample data, real data will be more):
Value
1
2
3
4
6
7
8
9
10
11
12
14
What I want is select them to get result like this:
Result Result_Miss
1-4, 6-12, 14 5, 13
I want to avoid using a cursor to work row-by-row.
Dynamic, set-based approach using CTEs to hunt down the missing values, and write out the ranges available based on those missing values.
--(I can't seem to get SqlFiddle to work with CTE's or I'd post one up here)--
Reworked to be more dynamic for number of records:
This works provided you always have '1' in your set of value
CREATE TABLE #OneTen
(
Value INT NOT NULL
);
INSERT INTO #OneTen
VALUES (1), (2), (3), (4), (6), (8), (9), (10), (11), (12), (14);
WITH ExpectedActual AS
(
SELECT ot.Value AS Actual, ROW_NUMBER() OVER (ORDER BY Value) AS Expected
FROM #OneTen AS ot
)
, DegreesOff AS
(
SELECT ea.Expected, ea.Actual, (ea.Actual - ea.Expected) AS Change
FROM ExpectedActual AS ea
)
, Missing AS
(
SELECT CASE
WHEN MIN(do.Expected) = 1 THEN 0
ELSE MIN(do.Expected) + do.Change - 1
END AS Missing
, ROW_NUMBER() OVER (ORDER BY MIN(do.Expected)) AS RowNumber
FROM DegreesOff AS do
GROUP BY do.Change
UNION ALL
SELECT MAX(do.Actual + 1), MAX(do.Change + 2) --Adding Last Value 1 higher than Actual so the code below that takes mNext.Missing - 1 brings it down to the proper value:
--Change + 2 to account for 0 plus being 1 higher
FROM DegreesOff AS do
)
SELECT STUFF((
SELECT ', ' + CASE
WHEN m.Missing + 1 = mNext.Missing - 1 THEN CAST(m.Missing + 1 AS NVARCHAR(4))
ELSE CAST(m.Missing + 1 AS NVARCHAR(4)) + '-' + CAST(mNext.Missing - 1 AS NVARCHAR(4))
END
FROM Missing AS m
LEFT JOIN Missing AS mNext ON m.RowNumber = mNext.RowNumber - 1
FOR XML PATH('')), 1, 2, '') AS Result
, STUFF((
SELECT ', ' + CAST(MIN(do.Expected + do.Change - 1) AS NVARCHAR(4))
FROM DegreesOff AS do
WHERE do.Change > 0
GROUP BY do.Change
FOR XML PATH('')), 1, 2, '') AS Result_Miss
Try the following script:
DDL
CREATE TABLE Numbers
(
Value INT NOT NULL
);
INSERT INTO Numbers
VALUES (1), (2), (3), (4), (6), (7), (8), (9), (10), (12),(13);
Script
DECLARE #MinValue INT
DECLARE #MaxValue INT
DECLARE #Temp TABLE(MissingValues INT)
DECLARE #MissingValues VARCHAR(50)
SELECT #MinValue = MIN(Value),
#MaxValue = MAX(Value)
FROM Numbers
;WITH CTE AS
(
SELECT #MinValue Value
UNION ALL
SELECT Value + 1
FROM CTE
WHERE Value + 1 <= #MaxValue
)
INSERT INTO #Temp
SELECT CTE.Value
FROM CTE
LEFT JOIN Numbers N
ON CTE.Value = N.Value
WHERE N.Value IS NULL
OPTION (MAXRECURSION 1000)
SELECT #MissingValues =
STUFF(( SELECT ',' + CAST(MissingValues AS VARCHAR)
FROM #Temp
FOR XML PATH('')),1,1,'')
INSERT INTO #Temp
SELECT #MinValue - 1
UNION ALL
SELECT #MaxValue + 1
;WITH CTE AS
(
SELECT MissingValues,
ROW_NUMBER() OVER(ORDER BY MissingValues ASC) RN
FROM #Temp
)
,Ranges AS
(
SELECT CAST(T1.MissingValues + 1 AS VARCHAR) + '-' +
CAST(T2.MissingValues - 1 AS VARCHAR) Ranges
FROM CTE AS T1
INNER JOIN CTE AS T2
ON T1.RN = T2.RN - 1
)
SELECT STUFF(( SELECT ',' + R.Ranges
FROM Ranges R
FOR XML PATH('')),1,1,'') Result,
#MissingValues AS Result_Miss

Find non-ASCII characters in varchar columns using SQL Server

How can rows with non-ASCII characters be returned using SQL Server?
If you can show how to do it for one column would be great.
I am doing something like this now, but it is not working
select *
from Staging.APARMRE1 as ar
where ar.Line like '%[^!-~ ]%'
For extra credit, if it can span all varchar columns in a table, that would be outstanding! In this solution, it would be nice to return three columns:
The identity field for that record. (This will allow the whole record to be reviewed with another query.)
The column name
The text with the invalid character
Id | FieldName | InvalidText |
----+-----------+-------------------+
25 | LastName | Solís |
56 | FirstName | François |
100 | Address1 | 123 Ümlaut street |
Invalid characters would be any outside the range of SPACE (3210) through ~ (12710)
Here is a solution for the single column search using PATINDEX.
It also displays the StartPosition, InvalidCharacter and ASCII code.
select line,
patindex('%[^ !-~]%' COLLATE Latin1_General_BIN,Line) as [Position],
substring(line,patindex('%[^ !-~]%' COLLATE Latin1_General_BIN,Line),1) as [InvalidCharacter],
ascii(substring(line,patindex('%[^ !-~]%' COLLATE Latin1_General_BIN,Line),1)) as [ASCIICode]
from staging.APARMRE1
where patindex('%[^ !-~]%' COLLATE Latin1_General_BIN,Line) >0
I've been running this bit of code with success
declare #UnicodeData table (
data nvarchar(500)
)
insert into
#UnicodeData
values
(N'Horse�')
,(N'Dog')
,(N'Cat')
select
data
from
#UnicodeData
where
data collate LATIN1_GENERAL_BIN != cast(data as varchar(max))
Which works well for known columns.
For extra credit, I wrote this quick script to search all nvarchar columns in a given table for Unicode characters.
declare
#sql varchar(max) = ''
,#table sysname = 'mytable' -- enter your table here
;with ColumnData as (
select
RowId = row_number() over (order by c.COLUMN_NAME)
,c.COLUMN_NAME
,ColumnName = '[' + c.COLUMN_NAME + ']'
,TableName = '[' + c.TABLE_SCHEMA + '].[' + c.TABLE_NAME + ']'
from
INFORMATION_SCHEMA.COLUMNS c
where
c.DATA_TYPE = 'nvarchar'
and c.TABLE_NAME = #table
)
select
#sql = #sql + 'select FieldName = ''' + c.ColumnName + ''', InvalidCharacter = [' + c.COLUMN_NAME + '] from ' + c.TableName + ' where ' + c.ColumnName + ' collate LATIN1_GENERAL_BIN != cast(' + c.ColumnName + ' as varchar(max)) ' + case when c.RowId <> (select max(RowId) from ColumnData) then ' union all ' else '' end + char(13)
from
ColumnData c
-- check
-- print #sql
exec (#sql)
I'm not a fan of dynamic SQL but it does have its uses for exploratory queries like this.
try something like this:
DECLARE #YourTable table (PK int, col1 varchar(20), col2 varchar(20), col3 varchar(20));
INSERT #YourTable VALUES (1, 'ok','ok','ok');
INSERT #YourTable VALUES (2, 'BA'+char(182)+'D','ok','ok');
INSERT #YourTable VALUES (3, 'ok',char(182)+'BAD','ok');
INSERT #YourTable VALUES (4, 'ok','ok','B'+char(182)+'AD');
INSERT #YourTable VALUES (5, char(182)+'BAD','ok',char(182)+'BAD');
INSERT #YourTable VALUES (6, 'BAD'+char(182),'B'+char(182)+'AD','BAD'+char(182)+char(182)+char(182));
--if you have a Numbers table use that, other wise make one using a CTE
WITH AllNumbers AS
( SELECT 1 AS Number
UNION ALL
SELECT Number+1
FROM AllNumbers
WHERE Number<1000
)
SELECT
pk, 'Col1' BadValueColumn, CONVERT(varchar(20),col1) AS BadValue --make the XYZ in convert(varchar(XYZ), ...) the largest value of col1, col2, col3
FROM #YourTable y
INNER JOIN AllNumbers n ON n.Number <= LEN(y.col1)
WHERE ASCII(SUBSTRING(y.col1, n.Number, 1))<32 OR ASCII(SUBSTRING(y.col1, n.Number, 1))>127
UNION
SELECT
pk, 'Col2' BadValueColumn, CONVERT(varchar(20),col2) AS BadValue --make the XYZ in convert(varchar(XYZ), ...) the largest value of col1, col2, col3
FROM #YourTable y
INNER JOIN AllNumbers n ON n.Number <= LEN(y.col2)
WHERE ASCII(SUBSTRING(y.col2, n.Number, 1))<32 OR ASCII(SUBSTRING(y.col2, n.Number, 1))>127
UNION
SELECT
pk, 'Col3' BadValueColumn, CONVERT(varchar(20),col3) AS BadValue --make the XYZ in convert(varchar(XYZ), ...) the largest value of col1, col2, col3
FROM #YourTable y
INNER JOIN AllNumbers n ON n.Number <= LEN(y.col3)
WHERE ASCII(SUBSTRING(y.col3, n.Number, 1))<32 OR ASCII(SUBSTRING(y.col3, n.Number, 1))>127
order by 1
OPTION (MAXRECURSION 1000);
OUTPUT:
pk BadValueColumn BadValue
----------- -------------- --------------------
2 Col1 BA¶D
3 Col2 ¶BAD
4 Col3 B¶AD
5 Col1 ¶BAD
5 Col3 ¶BAD
6 Col1 BAD¶
6 Col2 B¶AD
6 Col3 BAD¶¶¶
(8 row(s) affected)
This script searches for non-ascii characters in one column. It generates a string of all valid characters, here code point 32 to 127. Then it searches for rows that don't match the list:
declare #str varchar(128);
declare #i int;
set #str = '';
set #i = 32;
while #i <= 127
begin
set #str = #str + '|' + char(#i);
set #i = #i + 1;
end;
select col1
from YourTable
where col1 like '%[^' + #str + ']%' escape '|';
running the various solutions on some real world data - 12M rows varchar length ~30, around 9k dodgy rows, no full text index in play, the patIndex solution is the fastest, and it also selects the most rows.
(pre-ran km. to set the cache to a known state, ran the 3 processes, and finally ran km again - the last 2 runs of km gave times within 2 seconds)
patindex solution by Gerhard Weiss -- Runtime 0:38, returns 9144 rows
select dodgyColumn from myTable fcc
WHERE patindex('%[^ !-~]%' COLLATE Latin1_General_BIN,dodgyColumn ) >0
the substring-numbers solution by MT. -- Runtime 1:16, returned 8996 rows
select dodgyColumn from myTable fcc
INNER JOIN dbo.Numbers32k dn ON dn.number<(len(fcc.dodgyColumn ))
WHERE ASCII(SUBSTRING(fcc.dodgyColumn , dn.Number, 1))<32
OR ASCII(SUBSTRING(fcc.dodgyColumn , dn.Number, 1))>127
udf solution by Deon Robertson -- Runtime 3:47, returns 7316 rows
select dodgyColumn
from myTable
where dbo.udf_test_ContainsNonASCIIChars(dodgyColumn , 1) = 1
There is a user defined function available on the web 'Parse Alphanumeric'. Google UDF parse alphanumeric and you should find the code for it. This user defined function removes all characters that doesn't fit between 0-9, a-z, and A-Z.
Select * from Staging.APARMRE1 ar
where udf_parsealpha(ar.last_name) <> ar.last_name
That should bring back any records that have a last_name with invalid chars for you...though your bonus points question is a bit more of a challenge, but I think a case statement could handle it. This is a bit psuedo code, I'm not entirely sure if it'd work.
Select id, case when udf_parsealpha(ar.last_name) <> ar.last_name then 'last name'
when udf_parsealpha(ar.first_name) <> ar.first_name then 'first name'
when udf_parsealpha(ar.Address1) <> ar.last_name then 'Address1'
end,
case when udf_parsealpha(ar.last_name) <> ar.last_name then ar.last_name
when udf_parsealpha(ar.first_name) <> ar.first_name then ar.first_name
when udf_parsealpha(ar.Address1) <> ar.last_name then ar.Address1
end
from Staging.APARMRE1 ar
where udf_parsealpha(ar.last_name) <> ar.last_name or
udf_parsealpha(ar.first_name) <> ar.first_name or
udf_parsealpha(ar.Address1) <> ar.last_name
I wrote this in the forum post box...so I'm not quite sure if that'll function as is, but it should be close. I'm not quite sure how it will behave if a single record has two fields with invalid chars either.
As an alternative, you should be able to change the from clause away from a single table and into a subquery that looks something like:
select id,fieldname,value from (
Select id,'last_name' as 'fieldname', last_name as 'value'
from Staging.APARMRE1 ar
Union
Select id,'first_name' as 'fieldname', first_name as 'value'
from Staging.APARMRE1 ar
---(and repeat unions for each field)
)
where udf_parsealpha(value) <> value
Benefit here is for every column you'll only need to extend the union statement here, while you need to put that comparisson three times for every column in the case statement version of this script
To find which field has invalid characters:
SELECT * FROM Staging.APARMRE1 FOR XML AUTO, TYPE
You can test it with this query:
SELECT top 1 'char 31: '+char(31)+' (hex 0x1F)' field
from sysobjects
FOR XML AUTO, TYPE
The result will be:
Msg 6841, Level 16, State 1, Line 3 FOR XML could not serialize the
data for node 'field' because it contains a character (0x001F) which
is not allowed in XML. To retrieve this data using FOR XML, convert it
to binary, varbinary or image data type and use the BINARY BASE64
directive.
It is very useful when you write xml files and get error of invalid characters when validate it.
Here is a UDF I built to detectc columns with extended ascii charaters. It is quick and you can extended the character set you want to check. The second parameter allows you to switch between checking anything outside the standard character set or allowing an extended set:
create function [dbo].[udf_ContainsNonASCIIChars]
(
#string nvarchar(4000),
#checkExtendedCharset bit
)
returns bit
as
begin
declare #pos int = 0;
declare #char varchar(1);
declare #return bit = 0;
while #pos < len(#string)
begin
select #char = substring(#string, #pos, 1)
if ascii(#char) < 32 or ascii(#char) > 126
begin
if #checkExtendedCharset = 1
begin
if ascii(#char) not in (9,124,130,138,142,146,150,154,158,160,170,176,180,181,183,184,185,186,192,193,194,195,196,197,199,200,201,202,203,204,205,206,207,209,210,211,212,213,214,216,217,218,219,220,221,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,248,249,250,251,252,253,254,255)
begin
select #return = 1;
select #pos = (len(#string) + 1)
end
else
begin
select #pos = #pos + 1
end
end
else
begin
select #return = 1;
select #pos = (len(#string) + 1)
end
end
else
begin
select #pos = #pos + 1
end
end
return #return;
end
USAGE:
select Address1
from PropertyFile_English
where udf_ContainsNonASCIIChars(Address1, 1) = 1

Converting an integer to a 0-padded string

In SQL Server 2008, I want to represent an integer as a 3-character string - so:
3 becomes '003'
5 becomes '005'
107 becomes '107'
How can I do this?
/* Method 1 Using RIGHT function */
SELECT RIGHT('000' + CAST(NumericColumn AS VARCHAR(3)), 3) PaddedCnumericColumn
FROM MyTable
/* Method 2 Using RIGHT AND REPLICATE function */
SELECT RIGHT(REPLICATE('0', 3) + CAST(NumericColumn AS VARCHAR(3)), 3) PaddedCnumericColumn
FROM MyTable
You can try this
DECLARE #Table TABLE(
Val INT
)
INSERT INTO #Table (Val) SELECT 1
INSERT INTO #Table (Val) SELECT 10
INSERT INTO #Table (Val) SELECT 100
SELECT REPLICATE('0',3 - LEN(CAST(Val AS VARCHAR(3)))) + CAST(Val AS VARCHAR(3))
FROM #Table
WHERE ABS(Val) < 1000

Resources