Counting number of Populated Columns by Row - sql-server

I wanted to know if there is a way of counting the number of populated columns per row of a table.
For example if I have the simple table below Called Customer:
**Name** **Customer** **DOB** **Order number** **Populated Columns**
ABC Ltd Jo Blogg 2/1/78 123 3
Umbrella Co A Sherman 232 2
Nike 14/5/98 1
What I want is a query which will give me an extra column with a number saying how many columns have a value in them.
Any ideas?

Can be done via trivial check on NULL (and empty strings for such columns):
SELECT
[Name]
, [Customer]
, [DOB]
, [Order number]
, CASE WHEN ISNULL([Name], '') != '' THEN 1 ELSE 0 END
+ CASE WHEN ISNULL([Customer], '') != '' THEN 1 ELSE 0 END
+ CASE WHEN [DOB] IS NOT NULL THEN 1 ELSE 0 END
+ CASE WHEN [Order number] IS NOT NULL THEN 1 ELSE 0 END AS [Populated Columns]
This will work nicely for a fixed and known number of columns.
Such an approach can be perhaps more universal if columns list fetched from the metadata. As a downside - this requires a dynamic SQL.
Below is an example for SQL Server 2017 and higher:
DECLARE #_SQL NVARCHAR(max)
DECLARE #_TableName sysname = 'Table1'
SELECT #_SQL =
'SELECT '
+ STRING_AGG(QUOTENAME(COLUMN_NAME), ',
')
+ ', '
+ STRING_AGG('
CASE WHEN ['+COLUMN_NAME+'] IS NOT NULL THEN 1 ELSE 0 END', ' +')
+ ' AS [Populated Columns]
FROM ' + QUOTENAME(MIN(TABLE_SCHEMA)) + '.' + QUOTENAME(MIN(TABLE_NAME))
FROM INFORMATION_SCHEMA.COLUMNs
WHERE TABLE_NAME = #_TableName
EXEC sys.sp_executesql #_SQL
It will generate and execute a code:
SELECT
[Col1],
[Col2],
[Col3],
CASE WHEN [Col1] IS NOT NULL THEN 1 ELSE 0 END +
CASE WHEN [Col2] IS NOT NULL THEN 1 ELSE 0 END +
CASE WHEN [Col3] IS NOT NULL THEN 1 ELSE 0 END AS [Populated Columns]
FROM [dbo].[Table1]
In older versions, such result is achievable but with other string aggregation workarounds, like XML STUFF or SQLCLR functions...

Just thought of sharing another approach using UNPIVOT to calculate the same, assuming that you will have a primary key/identity in your table.
declare #tmp table (id int, [Name] varchar(100), Customer varchar(100), dob datetime, orderno int)
insert into #tmp select 1, 'name1','c1',getdate(),123
insert into #tmp select 2,'name2',null,getdate(),123
insert into #tmp select 3,'name3',null,null,null
SELECT t.*,
t1.notpopulated
FROM #tmp t
INNER JOIN (SELECT 4 - Count(*) AS NotPopulated,
id
FROM
(SELECT id,
u.x,
u.y
FROM (SELECT id,
Cast([name]AS VARCHAR(100)) [name],
Cast(customer AS VARCHAR(100)) AS customer,
Cast(dob AS VARCHAR(100)) AS dob1,
Cast(orderno AS VARCHAR(100)) orderno
FROM #tmp) AS s
UNPIVOT ( [y]
FOR [x] IN ([name],
[Customer],
dob1,
[orderno]) ) u) t
GROUP BY id) t1
ON t1.id = t.id
Online Demo

Related

return value at a position from STRING_SPLIT in SQL Server 2016

Can I return a value at a particular position with the STRING_SPLIT function in SQL Server 2016 or higher?
I know the order from a select is not guaranteed, but is it with STRING_SPLIT?
DROP TABLE IF EXISTS #split
SELECT 'z_y_x' AS splitIt
INTO #split UNION
SELECT 'a_b_c'
SELECT * FROM #split;
WITH cte
AS (
SELECT ROW_NUMBER() OVER ( PARTITION BY s.splitIt ORDER BY s.splitIt ) AS position,
s.splitIt,
value
FROM #split s
CROSS APPLY STRING_SPLIT(s.splitIt, '_')
)
SELECT * FROM cte WHERE position = 2
Will this always return the value at the 2nd element? b for a_b_c and y for z_y_x?
I don't understand why Microsoft doesn't return a position indicator column alongside the value for this function.
There is - starting with v2016 - a solution via FROM OPENJSON():
DECLARE #str VARCHAR(100) = 'val1,val2,val3';
SELECT *
FROM OPENJSON('["' + REPLACE(#str,',','","') + '"]');
The result
key value type
0 val1 1
1 val2 1
2 val3 1
The documentation tells clearly:
When OPENJSON parses a JSON array, the function returns the indexes of the elements in the JSON text as keys.
For your case this was:
SELECT 'z_y_x' AS splitIt
INTO #split UNION
SELECT 'a_b_c'
DECLARE #delimiter CHAR(1)='_';
SELECT *
FROM #split
CROSS APPLY OPENJSON('["' + REPLACE(splitIt,#delimiter,'","') + '"]') s
WHERE s.[key]=1; --zero based
Let's hope, that future versions of STRING_SPLIT() will include this information
UPDATE Performance tests, compare with popular Jeff-Moden-splitter
Try this out:
USE master;
GO
CREATE DATABASE dbTest;
GO
USE dbTest;
GO
--Jeff Moden's splitter
CREATE FUNCTION [dbo].[DelimitedSplit8K](#pString VARCHAR(8000), #pDelimiter CHAR(1))
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (
SELECT TOP (ISNULL(DATALENGTH(#pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(#pString,t.N,1) = #pDelimiter
),
cteLen(N1,L1) AS(
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(#pDelimiter,#pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(#pString, l.N1, l.L1)
FROM cteLen l
;
GO
--Avoid first call bias
SELECT * FROM dbo.DelimitedSplit8K('a,b,c',',');
GO
--Table to keep the results
CREATE TABLE Results(ID INT IDENTITY,ResultSource VARCHAR(100),durationMS INT, RowsCount INT);
GO
--Table with strings to split
CREATE TABLE dbo.DelimitedItems(ID INT IDENTITY,DelimitedNString nvarchar(4000),DelimitedString varchar(8000));
GO
--Get rows wiht randomly mixed strings of 100 items
--Try to play with the count of rows (count behind GO) and the count with TOP
INSERT INTO DelimitedItems(DelimitedNString)
SELECT STUFF((
SELECT TOP 100 ','+REPLACE(v.[name],',',';')
FROM master..spt_values v
WHERE LEN(v.[name])>0
ORDER BY NewID()
FOR XML PATH('')),1,1,'')
--Keep it twice in varchar and nvarchar
UPDATE DelimitedItems SET DelimitedString=DelimitedNString;
GO 500 --create 500 differently mixed rows
--The tests
DECLARE #d DATETIME2;
SET #d = SYSUTCDATETIME();
SELECT DI.ID, DS.Item, DS.ItemNumber
INTO #TEMP
FROM dbo.DelimitedItems DI
CROSS APPLY dbo.DelimitedSplit8K(DI.DelimitedNString,',') DS;
INSERT INTO Results(ResultSource,RowsCount,durationMS)
SELECT 'delimited8K with NVARCHAR(4000)'
,(SELECT COUNT(*) FROM #TEMP) AS RowCountInTemp
,DATEDIFF(MILLISECOND,#d,SYSUTCDATETIME()) AS Duration_NV_ms_delimitedSplit8K
SET #d = SYSUTCDATETIME();
SELECT DI.ID, DS.Item, DS.ItemNumber
INTO #TEMP2
FROM dbo.DelimitedItems DI
CROSS APPLY dbo.DelimitedSplit8K(DI.DelimitedString,',') DS;
INSERT INTO Results(ResultSource,RowsCount,durationMS)
SELECT 'delimited8K with VARCHAR(8000)'
,(SELECT COUNT(*) FROM #TEMP2) AS RowCountInTemp
,DATEDIFF(MILLISECOND,#d,SYSUTCDATETIME()) AS Duration_V_ms_delimitedSplit8K
SET #d = SYSUTCDATETIME();
SELECT DI.ID, OJ.[Value] AS Item, OJ.[Key] AS ItemNumber
INTO #TEMP3
FROM dbo.DelimitedItems DI
CROSS APPLY OPENJSON('["' + REPLACE(DI.DelimitedNString,',','","') + '"]') OJ;
INSERT INTO Results(ResultSource,RowsCount,durationMS)
SELECT 'OPENJSON with NVARCHAR(4000)'
,(SELECT COUNT(*) FROM #TEMP3) AS RowCountInTemp
,DATEDIFF(MILLISECOND,#d,SYSUTCDATETIME()) AS Duration_NV_ms_OPENJSON
SET #d = SYSUTCDATETIME();
SELECT DI.ID, OJ.[Value] AS Item, OJ.[Key] AS ItemNumber
INTO #TEMP4
FROM dbo.DelimitedItems DI
CROSS APPLY OPENJSON('["' + REPLACE(DI.DelimitedString,',','","') + '"]') OJ;
INSERT INTO Results(ResultSource,RowsCount,durationMS)
SELECT 'OPENJSON with VARCHAR(8000)'
,(SELECT COUNT(*) FROM #TEMP4) AS RowCountInTemp
,DATEDIFF(MILLISECOND,#d,SYSUTCDATETIME()) AS Duration_V_ms_OPENJSON
GO
SELECT * FROM Results;
GO
--Clean up
DROP TABLE #TEMP;
DROP TABLE #TEMP2;
DROP TABLE #TEMP3;
DROP TABLE #TEMP4;
USE master;
GO
DROP DATABASE dbTest;
Results:
200 items in 500 rows
1220 delimited8K with NVARCHAR(4000)
274 delimited8K with VARCHAR(8000)
417 OPENJSON with NVARCHAR(4000)
443 OPENJSON with VARCHAR(8000)
100 items in 500 rows
421 delimited8K with NVARCHAR(4000)
140 delimited8K with VARCHAR(8000)
213 OPENJSON with NVARCHAR(4000)
212 OPENJSON with VARCHAR(8000)
100 items in 5 rows
10 delimited8K with NVARCHAR(4000)
5 delimited8K with VARCHAR(8000)
3 OPENJSON with NVARCHAR(4000)
4 OPENJSON with VARCHAR(8000)
5 items in 500 rows
32 delimited8K with NVARCHAR(4000)
30 delimited8K with VARCHAR(8000)
28 OPENJSON with NVARCHAR(4000)
24 OPENJSON with VARCHAR(8000)
--unlimited length (only possible with OPENJSON)
--Wihtout a TOP clause while filling
--results in about 500 items in 500 rows
1329 OPENJSON with NVARCHAR(4000)
1117 OPENJSON with VARCHAR(8000)
Facit:
the popular splitter function does not like NVARCHAR
the function is limited to strings within 8k byte volumen
Only the case with many items and many rows in VARCHAR lets the splitter function be ahead.
In all other cases OPENJSON seems to be more or less faster...
OPENJSON can deal with (almost) unlimited counts
OPENJSON demands for v2016
Everybody is waiting for STRING_SPLIT with the position
UPDATE Added STRING_SPLIT to the test
In the meanwhile I re-run the test with two more test sections using STRING_SPLIT(). As position I had to return a hardcoded value as this function does not return the part's index.
In all tested cases OPENJSON was close with STRING_SPLIT and often faster:
5 items in 1000 rows
250 delimited8K with NVARCHAR(4000)
124 delimited8K with VARCHAR(8000) --this function is best with many rows in VARCHAR
203 OPENJSON with NVARCHAR(4000)
204 OPENJSON with VARCHAR(8000)
235 STRING_SPLIT with NVARCHAR(4000)
234 STRING_SPLIT with VARCHAR(8000)
200 items in 30 rows
140 delimited8K with NVARCHAR(4000)
31 delimited8K with VARCHAR(8000)
47 OPENJSON with NVARCHAR(4000)
31 OPENJSON with VARCHAR(8000)
47 STRING_SPLIT with NVARCHAR(4000)
31 STRING_SPLIT with VARCHAR(8000)
100 items in 10.000 rows
8145 delimited8K with NVARCHAR(4000)
2806 delimited8K with VARCHAR(8000) --fast with many rows!
5112 OPENJSON with NVARCHAR(4000)
4501 OPENJSON with VARCHAR(8000)
5028 STRING_SPLIT with NVARCHAR(4000)
5126 STRING_SPLIT with VARCHAR(8000)
The simple answer is, no. Microsoft so far have refused to provide Ordinal position as part of the return dataset in STRING_SPLIT. You'll need to use a different solution I'm afraid. For example Jeff Moden's DelimitedSplit8k.
(Yes, I realise this is more or less a link only answer, however, pasting Jeff's solution here would effectively be plagiarism).
If you were to use Jeff's solution, then you would be able to do something like:
SELECT *
FROM dbo.DelimitedSplit8K('a,b,c,d,e,f,g,h,i,j,k',',') DS
WHERE ItemNumber = 2;
Of course, you'd likely be passing column rather than a literal string.
I just extended #Shnugo's answer if the splitted text would contain line breaks, unicode and other non json compatible characters, to use
STRING_ESCAPE
My Test code with pipe as separator instead comma:
DECLARE #Separator VARCHAR(5) = STRING_ESCAPE('|', 'json'); -- here pipe or use any other separator (even ones escaped by json)
DECLARE #LongText VARCHAR(MAX) = 'Albert says: "baby, listen!"|ve Çağrı söylüyor: "Elma"|1st Line' + CHAR(13) + CHAR(10) + '2nd line';
SELECT * FROM OPENJSON('["' + REPLACE(STRING_ESCAPE(#LongText, 'json'), #Separator ,'","') + '"]'); -- ok
-- SELECT * FROM OPENJSON('["' + REPLACE(#LongText, #Separator ,'","') + '"]'); -- fails with: JSON text is not properly formatted. ...
Updated due to comment from Simon Zeinstra
I didn't want to deal with OPENJSON, but still wanted to get string_split() value by index.
The performance was not an issue in my case.
I used CTE (Common Table Expression)
Assume you have string str = "part1 part2 part3".
WITH split_res_list as
(
SELECT value FROM STRING_SPLIT('part1 part2 part3', ' ')
),
split_res_list_with_index as
(
SELECT [value],
ROW_NUMBER() OVER (ORDER BY [value] ASC) as [RowNumber]
FROM split_res_list
)
SELECT * FROM split_res_list_with_index WHERE RowNumber = 2
BUT: please be aware that the order of 3 parts is changed according to ORDER BY condition!
The output for the second row with "part2" value:
Using STRING_SPLIT:
STRING_SPLIT ( string , separator [ , enable_ordinal ] )
enable_ordinal
An int or bit expression that serves as a flag to enable or disable the ordinal output column. A value of 1 enables the ordinal column. If enable_ordinal is omitted, NULL, or has a value of 0, the ordinal column is disabled.
The enable_ordinal argument and ordinal output column are currently only supported in Azure SQL Database, Azure SQL Managed Instance, and Azure Synapse Analytics (serverless SQL pool only).
Query:
SELECT value FROM STRING_SPLIT('part1_part2_part3', '_', 1) WHERE ordinal = 2;
Here is my workaround. I will follow the Question waiting for a better answer:
UPDATED: Original code did not take into consideration if a word contains another.
UPDATE 2: Performance was horrible in production so i have to think another way. you have it at the end as option 2, implementation for table.
UPDATE 3: Added code for UDF in the implementation in a string.
Implementation in a string:
declare #a as nvarchar(100) = 'Lorem ipsum dolor dol ol sit amet. D Lorem DO ipsum DOL dolor sit amet. DOLORES ipsum';
WITH T AS (
SELECT T1.value
,charindex(' ' + T1.value + ' ',' ' + #a + ' ' ,0) AS INDX
,RN = ROW_NUMBER() OVER (PARTITION BY value order BY value)
FROM STRING_SPLIT(#a, ' ') AS T1
WHERE T1.value <> ''
),
R (VALUE,INDX,RN) AS (
SELECT *
FROM T
WHERE T.RN = 1
UNION ALL
SELECT T.VALUE
,charindex(' ' + T.value + ' ',' ' + #a + ' ',R.INDX + 1) AS INDX
,T.RN
FROM T
JOIN R
ON T.value = R.VALUE
AND T.RN = R.RN + 1
)
SELECT * FROM R ORDER BY INDX
result:
tableOfResults
UDF:
CREATE FUNCTION DBO.UDF_get_word(#string nvarchar(100),#wordNumber int)
returns nvarchar(100)
AS
BEGIN
DECLARE #searchedWord nvarchar(100);
WITH T AS (
SELECT T1.value
,charindex(' ' + T1.value + ' ',' ' + #string + ' ' ,0) AS INDX
,RN = ROW_NUMBER() OVER (PARTITION BY value order BY value)
FROM STRING_SPLIT(#string, ' ') AS T1
WHERE T1.value <> ''
),
R (VALUE,INDX,RN) AS (
SELECT *
FROM T
WHERE T.RN = 1
UNION ALL
SELECT T.VALUE
,charindex(' ' + T.value + ' ',' ' + #string + ' ',R.INDX + 1) AS INDX
,T.RN
FROM T
JOIN R
ON T.value = R.VALUE
AND T.RN = R.RN + 1
)
SELECT #searchedWord = (value) FROM ( SELECT *, ORD = ROW_NUMBER() OVER (ORDER BY INDX) FROM R )AS TBL WHERE ORD = #wordNumber
RETURN #searchedword
END
GO
Modification for a column in a table, OPTION 1:
WITH T AS (
SELECT T1.stringToBeSplit
,T1.column1 --column1 is an example of column where stringToBeSplit is the same for more than one record. better to be avoid but if you need to added here it is how just follow column1 over the code
,T1.column2
,T1.value
,T1.column3
/*,...any other column*/
,charindex(' ' + T1.value + ' ',' ' + T1.stringToBeSplit + ' ' ,0) AS INDX
,RN = ROW_NUMBER() OVER (PARTITION BY t1.column1, T1.stringToBeSplit, T1.value order BY T1.column1, T1.T1.stringToBeSplit, T1.value) --any column that create duplicates need to be added here as example i added column1
FROM (SELECT TOP 10 * FROM YourTable D CROSS APPLY string_split(D.stringToBeSplit,' ')) AS T1
WHERE T1.value <> ''
),
R (stringToBeSplit, column1, column2, value, column3, INDX, RN) AS (
SELECT stringToBeSplit, column1, column2, value, column3, INDX, RN
FROM T
WHERE T.RN = 1
UNION ALL
SELECT T.stringToBeSplit, T.column1, column2, T.value, T.column3
,charindex(' ' + T.value + ' ',' ' + T.stringToBeSplit + ' ',R.INDX + 1) AS INDX
,T.RN
FROM T
JOIN R
ON T.value = R.VALUE AND T.COLUMN1 = R.COLUMN1 --any column that create duplicates need to be added here as exapmle i added column1
AND T.RN = R.RN + 1
)
SELECT * FROM R ORDER BY column1, stringToBeSplit, INDX
Modification for a column in a table, OPTION 2 (max performance i could get, main action came from removing the join and finding a way of properly execute (and stop) the recursive loop of the CTE, from 1.30 for 1000 lines to 2 sec for 30K lines of strings of similar type and length):
WITH T AS (
SELECT T1.stringToBeSplit --no extracolumns this time
,T1.value
,charindex(' ' + T1.value + ' ',' ' + T1.stringToBeSplit + ' ' ,0) AS INDX
,RN = ROW_NUMBER() OVER (PARTITION BY T1.stringToBeSplit,T1.value order BY T1.stringToBeSplit,T1.value) --from clause use distinct and where if possible
FROM (SELECT DISTINCT stringToBeSplit, VALUE FROM [your table] D CROSS APPLY string_split(D.stringToBeSplit,' ') WHERE [your filter]) AS T1
WHERE T1.value <> ''
),
R (stringToBeSplit, value, INDX, RN) AS (
SELECT stringToBeSplit, value, INDX, RN
FROM T
WHERE T.RN = 1
UNION ALL
SELECT R.stringToBeSplit, R.value
,charindex(' ' + R.value + ' ',' ' + R.stringToBeSplit + ' ',R.INDX + 1) AS INDX
,R.RN + 1
FROM R
WHERE charindex(' ' + R.value + ' ',' ' + R.stringToBeSplit + ' ',R.INDX + 1) <> 0
)
SELECT * FROM R ORDER BY stringToBeSplit, INDX
For getting the word ordinal instead of SELECT * FROM R USE:
SELECT stringToBeSplit ,value , ROW_NUMBER() OVER (PARTITION BY stringToBeSplit order BY [indX]) AS ORD FROM R
if instead of having one RW per word you prefer one column:
select * FROM (SELECT [name 1],value , ROW_NUMBER() OVER (PARTITION BY [name 1] order BY [indX]) AS ORD FROM R ) as R2
pivot (MAX(VALUE) FOR ORD in ([1],[2],[3]) ) AS PIV
if you don't want to specify the number of columns QUOTNAME() like in this link, in my case i only need first 4 words rest are irrelevant for the moment. Below the code from the page in case link fail:
DECLARE
#columns NVARCHAR(MAX) = '',
#sql NVARCHAR(MAX) = '';
-- select the category names
SELECT
#columns+=QUOTENAME(category_name) + ','
FROM
production.categories
ORDER BY
category_name;
-- remove the last comma
SET #columns = LEFT(#columns, LEN(#columns) - 1);
-- construct dynamic SQL
SET #sql ='
SELECT * FROM
(
SELECT
category_name,
model_year,
product_id
FROM
production.products p
INNER JOIN production.categories c
ON c.category_id = p.category_id
) t
PIVOT(
COUNT(product_id)
FOR category_name IN ('+ #columns +')
) AS pivot_table;';
-- execute the dynamic SQL
EXECUTE sp_executesql #sql;
Last but not least i'm really looking forward to know if there is an easier way with same performance either in SQL server or in C#. i just think everything that does not use external info should stay in the Server and run as query or batch but not sure to be honest as i heard the contrary (specially from people that use panda) but no one have convince me just yet.
This works
Example:
String = "pos1-pos2-pos3"
REVERSE(PARSENAME(REPLACE(REVERSE(String), '-', '.'), 1))
With 1 Returns "pos1"
With 2 will return "pos2"...

SQL Pivot table without aggregate

I have a number of text files that are in a format similar to what is shown below.
ENTRY,1,000000,Widget 4000,1,,,2,,
FIELD,Type,A
FIELD,Component,Widget 4000
FIELD,Vendor,Acme
ENTRY,2,000000,PRODUCT XYZ,1,,,3,
FIELD,Type,B
FIELD,ItemAssembly,ABCD
FIELD,Component,Product XYZ - 123
FIELD,Description1,Product
FIELD,Description2,XYZ-123
FIELD,Description3,Alternate Part #440
FIELD,Vendor,Contoso
They have been imported into a table with VARCHAR(MAX) as the only field. Each ENTRY is a "new" item, and all the subsequent FIELD rows are properties of that item. The data next to the FIELD is the column name of the property. The data to the right of the property is the data I want to display.
The desired output would be:
ENTRY Type Component Vendor ItemAssembly Description1
1,000000,Widget 4000 A Widget 4000 Acme
2,000000,Product XYZ B Product XYZ-123 Contoso ABCD Product
I've got the column names using the code below (there are several tables that I have UNIONed together to list all the property names).
select #cols =
STUFF (
(select Distinct ', ' + QUOTENAME(ColName) from
(SELECT
SUBSTRING(ltrim(textFileData),CHARINDEX(',', textFileData, 1)+1,CHARINDEX(',', textFileData, CHARINDEX(',', textFileData, 1)+1)- CHARINDEX(',', textFileData, 1)-1) as ColName
FROM [MyDatabase].[dbo].[MyTextFile]
where
(LEFT(textFileData,7) LIKE #c)
UNION
....
) A
FOR XML PATH(''), TYPE).value('.','NVARCHAR(MAX)'),1,1,'')
Is a Pivot table the best way to do this? No aggregation is needed. Is there a better way to accomplish this? I want to list out data next to the FIELD name in a column format.
Thanks!
Here is the solution in SQL fiddle:
http://sqlfiddle.com/#!3/8f0b0/8
Prepare raw data in format (entry, field, value), use dynamic SQL to make pivot on unknown column count.
MAX() for string is enough to simulate "without aggregate" behavior in this case.
create table t(data varchar(max))
insert into t values('ENTRY,1,000000,Widget 4000,1,,,2,,')
insert into t values('FIELD,Type,A')
insert into t values('FIELD,Component,Widget 4000')
insert into t values('FIELD,Vendor,Acme ')
insert into t values('ENTRY,2,000000,PRODUCT XYZ,1,,,3,')
insert into t values('FIELD,Type,B')
insert into t values('FIELD,ItemAssembly,ABCD')
insert into t values('FIELD,Component,Product XYZ - 123')
insert into t values('FIELD,Description1,Product ')
insert into t values('FIELD,Description2,XYZ-123 ')
insert into t values('FIELD,Description3,Alternate Part #440')
insert into t values('FIELD,Vendor,Contoso');
create type preparedtype as table (entry varchar(max), field varchar(max), value varchar(max))
declare #prepared preparedtype
;with identified as
(
select
row_number ( ) over (order by (select 1)) as id,
substring(data, 1, charindex(',', data) - 1) as type,
substring(data, charindex(',', data) + 1, len(data)) as data
from t
)
, tree as
(
select
id,
(select max(id)
from identified
where type = 'ENTRY'
and id <= i.id) as parentid,
type,
data
from identified as i
)
, pivotsrc as
(
select
p.data as entry,
substring(c.data, 1, charindex(',', c.data) - 1) as field,
substring(c.data, charindex(',', c.data) + 1, len(c.data)) as value
from tree as p
inner join tree as c on c.parentid = p.id
where p.id = p.parentid
and c.parentid <> c.id
)
insert into #prepared
select * from pivotsrc
declare #dynamicPivotQuery as nvarchar(max)
declare #columnName as nvarchar(max)
select #columnName = ISNULL(#ColumnName + ',','')
+ QUOTENAME(field)
from (select distinct field from #prepared) AS fields
set #dynamicPivotQuery = N'select * from #prepared
pivot (max(value) for field in (' + #columnName + ')) as result'
exec sp_executesql #DynamicPivotQuery, N'#prepared preparedtype readonly', #prepared
Here your are, this comes back exactly as you need it. I love tricky SQL :-). This is a real ad-hoc singel-statement call.
DECLARE #tbl TABLE(OneCol VARCHAR(MAX));
INSERT INTO #tbl
VALUES('ENTRY,1,000000,Widget 4000,1,,,2,,')
,('FIELD,Type,A')
,('FIELD,Component,Widget 4000')
,('FIELD,Vendor,Acme ')
,('ENTRY,2,000000,PRODUCT XYZ,1,,,3,')
,('FIELD,Type,B')
,('FIELD,ItemAssembly,ABCD')
,('FIELD,Component,Product XYZ - 123')
,('FIELD,Description1,Product ')
,('FIELD,Description2,XYZ-123 ')
,('FIELD,Description3,Alternate Part #440')
,('FIELD,Vendor,Contoso');
WITH OneColumn AS
(
SELECT ROW_NUMBER() OVER(ORDER BY (SELECT 1)) AS inx
,CAST('<root><r>' + REPLACE(OneCol,',','</r><r>') + '</r></root>' AS XML) AS Split
FROM #tbl AS tbl
)
,AsParts AS
(
SELECT inx
,Each.part.value('/root[1]/r[1]','varchar(max)') AS Part1
,Each.part.value('/root[1]/r[2]','varchar(max)') AS Part2
,Each.part.value('/root[1]/r[3]','varchar(max)') AS Part3
,Each.part.value('/root[1]/r[4]','varchar(max)') AS Part4
,Each.part.value('/root[1]/r[5]','varchar(max)') AS Part5
FROM OneColumn
CROSS APPLY Split.nodes('/root') AS Each(part)
)
,TheEntries AS
(
SELECT DISTINCT *
FROM AsParts
WHERE Part1='ENTRY'
)
SELECT TheEntries.Part2 + ',' + TheEntries.Part3 + ',' + TheEntries.Part4 AS [ENTRY]
,MyFields.AsXML.value('(fields[1]/field[Part2="Type"])[1]/Part3[1]','varchar(max)') AS [Type]
,MyFields.AsXML.value('(fields[1]/field[Part2="Component"])[1]/Part3[1]','varchar(max)') AS Component
,MyFields.AsXML.value('(fields[1]/field[Part2="Vendor"])[1]/Part3[1]','varchar(max)') AS Vendor
,MyFields.AsXML.value('(fields[1]/field[Part2="ItemAssembly"])[1]/Part3[1]','varchar(max)') AS ItemAssembly
,MyFields.AsXML.value('(fields[1]/field[Part2="Description1"])[1]/Part3[1]','varchar(max)') AS Description1
FROM TheEntries
CROSS APPLY
(
SELECT *
FROM AsParts AS ap
WHERE ap.Part1='FIELD' AND ap.inx>TheEntries.inx
AND ap.inx < ISNULL((SELECT TOP 1 nextEntry.inx FROM TheEntries AS nextEntry WHERE nextEntry.inx>TheEntries.inx ORDER BY nextEntry.inx DESC),10000000)
ORDER BY ap.inx
FOR XML PATH('field'), ROOT('fields'),TYPE
) AS MyFields(AsXML)

SQL: iterating through a list that contains some ranges

I am trying to get information for products that have an ID that is contained in a list. The problem is that the list contains some single values and some range values:
PX03 - PX069, PX20, PX202, PX25 - PX270, PX250 - PX2509, PX251, PX2511 -
PX2513
Basically what I am looking for is some way to take a list or string containing both values and ranges and the end output is a table or list that has all of the values within the ranges individually so that I can loop through them.
I have a stored procedure that loops through all the ID's in the main products table that use the 'PX' prefix, but the table has all ids (i.e. PX 1 - 9999, LX 00001 - 99999) and I only want to search through those contained in the above list. I could write it out all the id's individually but some of the ranges contain many values, which would be time consuming to go through.
My idea was to create a separate table containing this list, in which there would be three columns: an identity column, and then one column each for the beginning and end of the range. Any items that do not have a range would just have the same value for beginning and end range, i.e.:
----------------------------------
rownum | range_start | range_end|
----------------------------------
1 PX03 PX069
2 PX20 PX20
3 PX202 PX202
4 PX25 PX25
5 PX250 PX2509
and then populating a table using something like:
SELECT id from product_table
WHERE id BETWEEN listtable.range_start AND listtable.range_end
where product_table is my original table with the product id's and their information and listtable is the new table I just created. This would give me:
id|
---
PX03
PX030
PX031
PX032
PX033
.
.
.
PX067
PX068
PX069
PX20
PX202
PX25
PX250
PX251
etc.
but I am thinking I would need to iterate through the list and I am not sure how to do that. Any ideas, hints or suggestions?
UPDATE
After creating the table using the solution given by #asantaballa, it was as simple as using an inner join:
SELECT d.id
FROM product_table d
INNER JOIN #RangeTable r
ON d.id BETWEEN r.RangeFrom AND r.RangeTo
See if this works for you for the part about converting the string to a table.
Declare #StrList Varchar(1000) = 'PX03 - PX069, PX20, PX202, PX25 - PX270, PX250 - PX2509, PX251, PX2511 - PX2513'
Declare #RangeTable Table (RangeFrom VarChar(32), RangeTo VarChar(32))
Select #StrList = Replace(#StrList,' ', '') + ','
Declare #StrListItem Varchar(32)
While CHARINDEX(',', #StrList) > 0
Begin
Select #StrListItem = SUBSTRING(#StrList,1,CHARINDEX(',', #StrList) - 1)
Declare
#RangeFrom VarChar(32)
, #RangeTo VarChar(32)
If CHARINDEX('-', #StrListItem) = 0
Begin
Select
#RangeFrom = #StrListItem
, #RangeTo = #StrListItem
End
Else
Begin
Select
#RangeFrom = SUBSTRING(#StrListItem, 1, CHARINDEX('-', #StrListItem) - 1)
, #RangeTo = SUBSTRING(#StrListItem, CHARINDEX('-', #StrListItem) + 1, LEN(#StrListItem) - CHARINDEX('-', #StrListItem))
End
Insert Into #RangeTable (RangeFrom, RangeTo) Values (#RangeFrom, #RangeTo)
Select #StrList = SUBSTRING(#StrList, CHARINDEX(',', #StrList) + 1, LEN(#StrList) - CHARINDEX(',', #StrList))
End
Select * From #RangeTable
Here is your string and product_table
DECLARE #STR VARCHAR(100) = 'PX03 - PX069, PX20, PX202, PX25 - PX270, PX250 - PX2509, PX251, PX2511 - PX2513'
SELECT * INTO #product_table
FROM
(
SELECT 'PX4' PRODID
UNION ALL
SELECT 'PX26'
UNION ALL
SELECT 'PX75'
UNION ALL
SELECT 'PX77'
)TAB
Now create a table to hold the value
CREATE TABLE #listtable(ROWNUM int IDENTITY(1,1),range_start VARCHAR(100),range_end VARCHAR(100))
Now insert the splitted value to the table.
INSERT INTO #listtable
SELECT
ISNULL(PARSENAME(REPLACE(Split.a.value('.', 'VARCHAR(100)'),'-','.'),2),Split.a.value('.', 'VARCHAR(100)')) 'range_start' ,
PARSENAME(REPLACE(Split.a.value('.', 'VARCHAR(100)'),'-','.'),1) 'range_end'
FROM
(
SELECT CAST ('<M>' + REPLACE(#STR, ',', '</M><M>') + '</M>' AS XML) AS Data
) AS A
CROSS APPLY Data.nodes ('/M') AS Split(a)
Since Id is string, you need a function to extract numbers from Id(function created by God of SQL Server - Pinal Dave)
CREATE FUNCTION dbo.udf_GetNumeric
(#strAlphaNumeric VARCHAR(256))
RETURNS VARCHAR(256)
AS
BEGIN
DECLARE #intAlpha INT
SET #intAlpha = PATINDEX('%[^0-9]%', #strAlphaNumeric)
BEGIN
WHILE #intAlpha > 0
BEGIN
SET #strAlphaNumeric = STUFF(#strAlphaNumeric, #intAlpha, 1, '' )
SET #intAlpha = PATINDEX('%[^0-9]%', #strAlphaNumeric )
END
END
RETURN ISNULL(#strAlphaNumeric,0)
END
First of all keep in mind that we will not get PX1,PX2,PX3,PX4 if you give id BETWEEN listtable.range_start AND listtable.range_end because those are of varchar type and not numbers. So we need to extract numbers from each PX and get the values between them and append PX.
Here is the query which filters the IDs in product_table which are in the range between listtable
;WITH CTE AS
(
SELECT ROWNUM,CAST(dbo.udf_GetNumeric(range_start)AS INT) NUMBERS,
CAST(dbo.udf_GetNumeric(range_end)AS INT) RTO1
FROM #listtable
UNION ALL
SELECT T.ROWNUM,NUMBERS+1,RTO1
FROM #listtable T
JOIN CTE ON CTE.ROWNUM = T.ROWNUM
WHERE NUMBERS < RTO1
)
SELECT PRODID IDS--,ROWNUM,NUMBERS NUMS,'PX'+CAST(NUMBERS AS VARCHAR(10)) IDS2
FROM CTE
JOIN #product_table ON PRODID='PX'+CAST(NUMBERS AS VARCHAR(10))
ORDER BY NUMBERS
option (MaxRecursion 0)
SQL FIDDLE

How to query data when primary key exists twice as foreign key in another table?

I am am trying to query data but it shows the same record twice how to distinct?
Table 1: UserBasics
user_Id , user_Fullname , user_Zip , user_Need
--------------------------------------------------------------------
10 Alberto Cesaro 98001 Sales, Marketing & Public Relations
Table 2: UserProfession
Prof_ID , Company , Designation
----------------------------------
10 Young's Marketing Manager
10 Young's Regional Manager
My procedure:
CREATE PROC P #Zip VARCHAR(20)=NULL,
#Company VARCHAR(200)=NULL,
#Designation VARCHAR(100)=NULL,
#Interest VARCHAR(200)=NULL,
#CurrentID VARCHAR(200)=NULL
--#JobFunc varchar(200)=NULL
AS
BEGIN
-- SET NOCOUNT ON added to prevent extra result sets from
-- interfering with SELECT statements.
SET NOCOUNT ON;
SELECT ub.user_Id,
ub.user_Fullname,
up.Designation,
up.Company
FROM UserBasics UB
INNER JOIN UserProfession up
ON ub.user_Id = up.Prof_ID
WHERE ( #Zip IS NULL
OR ub.user_Zip LIKE '%' + #Zip + '%' )
AND ( #Interest IS NULL
OR ub.user_Need LIKE '%' + #Interest + '%' )
AND ( #Company IS NULL
OR up.Company LIKE '%' + #Company + '%' )
AND ( ub.user_Id != #CurrentID )
AND ( #Designation IS NULL
OR up.Designation LIKE '%' + #Designation + '%' )
END
as above is using a stored procedure just to make condition and variable clear
How to show every user data distinct hopes for your suggestion ?
Thanks !
EDIT:
Out put should be look alike,
10 Alberto Cesaro Marketing Manager,Regional Manager Young's
EDIT second time :
i have done with company name but there is some problem if i wanna use filter on user profession table column then how would i relate it ?? my query ,
SELECT
user_Id, user_Fullname,
STUFF(
(SELECT ', ' + Designation
FROM UserProfession as up
WHERE Prof_ID = a.user_Id
FOR XML PATH (''))
, 1, 1, '') Designation,
STUFF(
(SELECT ', ' + Company
FROM UserProfession
WHERE Prof_ID = a.user_Id
FOR XML PATH (''))
, 1, 1, '') AS Company
FROM UserBasics AS a
where (#Zip is null or a.user_Zip like '%'+#Zip+'%') and
(#Interest is null or a.user_Need like '%'+#Interest+'%') and
-- (#JobFunc is null or m.mentor_jobFunction= #JobFunc) and
(#Company is null or up.Company like '%'+#Company+'%') and
(a.user_Id != #CurrentID) and
(#Designation is null or up.Designation like '%'+#Designation+'%')
--where a.user_Zip like '%90005%'
-- WHERE clause here
GROUP BY user_Id, user_Fullname
-- As i am getting error on Company and Designation in where clause Hopes for last suggestion ???
SELECT
user_Id, user_Fullname,
STUFF(
(SELECT ', ' + Designation
FROM UserProfession
WHERE Prof_ID = a.user_Id
FOR XML PATH (''))
, 1, 1, '') AS DesignationList
FROM UserBasics AS a
-- WHERE clause here
GROUP BY user_Id, user_Fullname
SQLFiddle Demo

Find non-ASCII characters in varchar columns using SQL Server

How can rows with non-ASCII characters be returned using SQL Server?
If you can show how to do it for one column would be great.
I am doing something like this now, but it is not working
select *
from Staging.APARMRE1 as ar
where ar.Line like '%[^!-~ ]%'
For extra credit, if it can span all varchar columns in a table, that would be outstanding! In this solution, it would be nice to return three columns:
The identity field for that record. (This will allow the whole record to be reviewed with another query.)
The column name
The text with the invalid character
Id | FieldName | InvalidText |
----+-----------+-------------------+
25 | LastName | Solís |
56 | FirstName | François |
100 | Address1 | 123 Ümlaut street |
Invalid characters would be any outside the range of SPACE (3210) through ~ (12710)
Here is a solution for the single column search using PATINDEX.
It also displays the StartPosition, InvalidCharacter and ASCII code.
select line,
patindex('%[^ !-~]%' COLLATE Latin1_General_BIN,Line) as [Position],
substring(line,patindex('%[^ !-~]%' COLLATE Latin1_General_BIN,Line),1) as [InvalidCharacter],
ascii(substring(line,patindex('%[^ !-~]%' COLLATE Latin1_General_BIN,Line),1)) as [ASCIICode]
from staging.APARMRE1
where patindex('%[^ !-~]%' COLLATE Latin1_General_BIN,Line) >0
I've been running this bit of code with success
declare #UnicodeData table (
data nvarchar(500)
)
insert into
#UnicodeData
values
(N'Horse�')
,(N'Dog')
,(N'Cat')
select
data
from
#UnicodeData
where
data collate LATIN1_GENERAL_BIN != cast(data as varchar(max))
Which works well for known columns.
For extra credit, I wrote this quick script to search all nvarchar columns in a given table for Unicode characters.
declare
#sql varchar(max) = ''
,#table sysname = 'mytable' -- enter your table here
;with ColumnData as (
select
RowId = row_number() over (order by c.COLUMN_NAME)
,c.COLUMN_NAME
,ColumnName = '[' + c.COLUMN_NAME + ']'
,TableName = '[' + c.TABLE_SCHEMA + '].[' + c.TABLE_NAME + ']'
from
INFORMATION_SCHEMA.COLUMNS c
where
c.DATA_TYPE = 'nvarchar'
and c.TABLE_NAME = #table
)
select
#sql = #sql + 'select FieldName = ''' + c.ColumnName + ''', InvalidCharacter = [' + c.COLUMN_NAME + '] from ' + c.TableName + ' where ' + c.ColumnName + ' collate LATIN1_GENERAL_BIN != cast(' + c.ColumnName + ' as varchar(max)) ' + case when c.RowId <> (select max(RowId) from ColumnData) then ' union all ' else '' end + char(13)
from
ColumnData c
-- check
-- print #sql
exec (#sql)
I'm not a fan of dynamic SQL but it does have its uses for exploratory queries like this.
try something like this:
DECLARE #YourTable table (PK int, col1 varchar(20), col2 varchar(20), col3 varchar(20));
INSERT #YourTable VALUES (1, 'ok','ok','ok');
INSERT #YourTable VALUES (2, 'BA'+char(182)+'D','ok','ok');
INSERT #YourTable VALUES (3, 'ok',char(182)+'BAD','ok');
INSERT #YourTable VALUES (4, 'ok','ok','B'+char(182)+'AD');
INSERT #YourTable VALUES (5, char(182)+'BAD','ok',char(182)+'BAD');
INSERT #YourTable VALUES (6, 'BAD'+char(182),'B'+char(182)+'AD','BAD'+char(182)+char(182)+char(182));
--if you have a Numbers table use that, other wise make one using a CTE
WITH AllNumbers AS
( SELECT 1 AS Number
UNION ALL
SELECT Number+1
FROM AllNumbers
WHERE Number<1000
)
SELECT
pk, 'Col1' BadValueColumn, CONVERT(varchar(20),col1) AS BadValue --make the XYZ in convert(varchar(XYZ), ...) the largest value of col1, col2, col3
FROM #YourTable y
INNER JOIN AllNumbers n ON n.Number <= LEN(y.col1)
WHERE ASCII(SUBSTRING(y.col1, n.Number, 1))<32 OR ASCII(SUBSTRING(y.col1, n.Number, 1))>127
UNION
SELECT
pk, 'Col2' BadValueColumn, CONVERT(varchar(20),col2) AS BadValue --make the XYZ in convert(varchar(XYZ), ...) the largest value of col1, col2, col3
FROM #YourTable y
INNER JOIN AllNumbers n ON n.Number <= LEN(y.col2)
WHERE ASCII(SUBSTRING(y.col2, n.Number, 1))<32 OR ASCII(SUBSTRING(y.col2, n.Number, 1))>127
UNION
SELECT
pk, 'Col3' BadValueColumn, CONVERT(varchar(20),col3) AS BadValue --make the XYZ in convert(varchar(XYZ), ...) the largest value of col1, col2, col3
FROM #YourTable y
INNER JOIN AllNumbers n ON n.Number <= LEN(y.col3)
WHERE ASCII(SUBSTRING(y.col3, n.Number, 1))<32 OR ASCII(SUBSTRING(y.col3, n.Number, 1))>127
order by 1
OPTION (MAXRECURSION 1000);
OUTPUT:
pk BadValueColumn BadValue
----------- -------------- --------------------
2 Col1 BA¶D
3 Col2 ¶BAD
4 Col3 B¶AD
5 Col1 ¶BAD
5 Col3 ¶BAD
6 Col1 BAD¶
6 Col2 B¶AD
6 Col3 BAD¶¶¶
(8 row(s) affected)
This script searches for non-ascii characters in one column. It generates a string of all valid characters, here code point 32 to 127. Then it searches for rows that don't match the list:
declare #str varchar(128);
declare #i int;
set #str = '';
set #i = 32;
while #i <= 127
begin
set #str = #str + '|' + char(#i);
set #i = #i + 1;
end;
select col1
from YourTable
where col1 like '%[^' + #str + ']%' escape '|';
running the various solutions on some real world data - 12M rows varchar length ~30, around 9k dodgy rows, no full text index in play, the patIndex solution is the fastest, and it also selects the most rows.
(pre-ran km. to set the cache to a known state, ran the 3 processes, and finally ran km again - the last 2 runs of km gave times within 2 seconds)
patindex solution by Gerhard Weiss -- Runtime 0:38, returns 9144 rows
select dodgyColumn from myTable fcc
WHERE patindex('%[^ !-~]%' COLLATE Latin1_General_BIN,dodgyColumn ) >0
the substring-numbers solution by MT. -- Runtime 1:16, returned 8996 rows
select dodgyColumn from myTable fcc
INNER JOIN dbo.Numbers32k dn ON dn.number<(len(fcc.dodgyColumn ))
WHERE ASCII(SUBSTRING(fcc.dodgyColumn , dn.Number, 1))<32
OR ASCII(SUBSTRING(fcc.dodgyColumn , dn.Number, 1))>127
udf solution by Deon Robertson -- Runtime 3:47, returns 7316 rows
select dodgyColumn
from myTable
where dbo.udf_test_ContainsNonASCIIChars(dodgyColumn , 1) = 1
There is a user defined function available on the web 'Parse Alphanumeric'. Google UDF parse alphanumeric and you should find the code for it. This user defined function removes all characters that doesn't fit between 0-9, a-z, and A-Z.
Select * from Staging.APARMRE1 ar
where udf_parsealpha(ar.last_name) <> ar.last_name
That should bring back any records that have a last_name with invalid chars for you...though your bonus points question is a bit more of a challenge, but I think a case statement could handle it. This is a bit psuedo code, I'm not entirely sure if it'd work.
Select id, case when udf_parsealpha(ar.last_name) <> ar.last_name then 'last name'
when udf_parsealpha(ar.first_name) <> ar.first_name then 'first name'
when udf_parsealpha(ar.Address1) <> ar.last_name then 'Address1'
end,
case when udf_parsealpha(ar.last_name) <> ar.last_name then ar.last_name
when udf_parsealpha(ar.first_name) <> ar.first_name then ar.first_name
when udf_parsealpha(ar.Address1) <> ar.last_name then ar.Address1
end
from Staging.APARMRE1 ar
where udf_parsealpha(ar.last_name) <> ar.last_name or
udf_parsealpha(ar.first_name) <> ar.first_name or
udf_parsealpha(ar.Address1) <> ar.last_name
I wrote this in the forum post box...so I'm not quite sure if that'll function as is, but it should be close. I'm not quite sure how it will behave if a single record has two fields with invalid chars either.
As an alternative, you should be able to change the from clause away from a single table and into a subquery that looks something like:
select id,fieldname,value from (
Select id,'last_name' as 'fieldname', last_name as 'value'
from Staging.APARMRE1 ar
Union
Select id,'first_name' as 'fieldname', first_name as 'value'
from Staging.APARMRE1 ar
---(and repeat unions for each field)
)
where udf_parsealpha(value) <> value
Benefit here is for every column you'll only need to extend the union statement here, while you need to put that comparisson three times for every column in the case statement version of this script
To find which field has invalid characters:
SELECT * FROM Staging.APARMRE1 FOR XML AUTO, TYPE
You can test it with this query:
SELECT top 1 'char 31: '+char(31)+' (hex 0x1F)' field
from sysobjects
FOR XML AUTO, TYPE
The result will be:
Msg 6841, Level 16, State 1, Line 3 FOR XML could not serialize the
data for node 'field' because it contains a character (0x001F) which
is not allowed in XML. To retrieve this data using FOR XML, convert it
to binary, varbinary or image data type and use the BINARY BASE64
directive.
It is very useful when you write xml files and get error of invalid characters when validate it.
Here is a UDF I built to detectc columns with extended ascii charaters. It is quick and you can extended the character set you want to check. The second parameter allows you to switch between checking anything outside the standard character set or allowing an extended set:
create function [dbo].[udf_ContainsNonASCIIChars]
(
#string nvarchar(4000),
#checkExtendedCharset bit
)
returns bit
as
begin
declare #pos int = 0;
declare #char varchar(1);
declare #return bit = 0;
while #pos < len(#string)
begin
select #char = substring(#string, #pos, 1)
if ascii(#char) < 32 or ascii(#char) > 126
begin
if #checkExtendedCharset = 1
begin
if ascii(#char) not in (9,124,130,138,142,146,150,154,158,160,170,176,180,181,183,184,185,186,192,193,194,195,196,197,199,200,201,202,203,204,205,206,207,209,210,211,212,213,214,216,217,218,219,220,221,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,248,249,250,251,252,253,254,255)
begin
select #return = 1;
select #pos = (len(#string) + 1)
end
else
begin
select #pos = #pos + 1
end
end
else
begin
select #return = 1;
select #pos = (len(#string) + 1)
end
end
else
begin
select #pos = #pos + 1
end
end
return #return;
end
USAGE:
select Address1
from PropertyFile_English
where udf_ContainsNonASCIIChars(Address1, 1) = 1

Resources