SQL Server Conversion failed varchar to int - sql-server

I have a table (no.1) which has 10 columns. One of them clm01 is integer and not allowed with null values.
There is a second table (no.2) which has many columns. One of them is string type clm02. An example of this column data is 1,2,3.
I'd like to make a query like:
select *
from table1 t1, table2 t2
where t1.clm01 not in (t2.clm2)
For example in table1 I have 5 records with values in clm01 1,2,3,4,5 and in table2 I've got 1 record with value in clm02 = 1,2,3
So I would like with the query to return only the record with the value 4 and 5 in the clm01.
Instead I get:
Conversion failed when converting the varchar value '1,2,3' to data type int
Any ideas?

Use STRING_SPLIT() function to split the comma separated values, if you are using SQL Server 2016.
SELECT *
FROM table1 t1
WHERE t1.clm1 NOT IN (SELECT Value FROM table2 t2
CROSS APPLY STRING_SPLIT(t2.clm2,','))
If you are using any lower versions of SQL server write a UDF to split string and use the function in CROSS APPLY clause.
CREATE FUNCTION [dbo].[SplitString]
(
#string NVARCHAR(MAX),
#delimiter CHAR(1)
)
RETURNS #output TABLE(Value NVARCHAR(MAX)
)
BEGIN
DECLARE #start INT, #end INT
SELECT #start = 1, #end = CHARINDEX(#delimiter, #string)
WHILE #start < LEN(#string) + 1 BEGIN
IF #end = 0
SET #end = LEN(#string) + 1
INSERT INTO #output (Value)
VALUES(SUBSTRING(#string, #start, #end - #start))
SET #start = #end + 1
SET #end = CHARINDEX(#delimiter, #string, #start)
END
RETURN
END

I decided to give you a couple of options but this really is a duplicate question I see pretty often.
There are two main ways of going about the problem.
1) Use LIKE to and compare the strings but you actually have to build strings a little oddly to do it:
SELECT *
FROM
#Table1 t1
WHERE
NOT EXISTS (SELECT *
FROM #Table2 t2
WHERE ',' + t2.clm02 + ',' LIKE '%,' + CAST(t1.clm01 AS VARCHAR(15)) + ',%')
What you see is ,1,2,3, is like %,clm01value,% you must add the delimiter to the strings for this to work properly and you have to cast/convert clm01 to a char datatype. There are drawbacks to this solution but if your data sets are straight forward it could work for you.
2) Split the comma delimited string to rows and then use a left join, not exists, or not in. here is a method to convert your csv to xml and then split
;WITH cteClm02Split AS (
SELECT
clm02
FROM
(SELECT
CAST('<X>' + REPLACE(clm02,',','</X><X>') + '</X>' AS XML) as xclm02
FROM
#Table2) t
CROSS APPLY (SELECT t.n.value('.','INT') clm02
FROM
t.xclm02.nodes('X') as t(n)) ca
)
SELECT t1.*
FROM
#Table1 t1
LEFT JOIN cteClm02Split t2
ON t1.clm01 = t2.clm02
WHERE
t2.clm02 IS NULL
OR use NOT EXISTS with same cte
SELECT t1.*
FROM
#Table1 t1
WHERE
NOT EXISTS (SELECT * FROM cteClm02Split t2 WHERE t1.clm01 = t2.clm02)
There are dozens of other ways to split delimited strings and you can choose whatever way works for you.
Note: I am not showing IN/NOT IN as an answer because I don't recommend the use of it. If you do use it make sure that you are never comparing a NULL in the select etc. Here is another good post concerning performance etc. NOT IN vs NOT EXISTS
here are the table variables that were used:
DECLARE #Table1 AS TABLE (clm01 INT)
DECLARE #Table2 AS TABLE (clm02 VARCHAR(15))
INSERT INTO #Table1 VALUES (1),(2),(3),(4),(5)
INSERT INTO #Table2 VALUES ('1,2,3')

Related

Substring is slow with while loop in SQL Server

One of my table column stores ~650,000 characters (each value of the column contains entire table). I know its bad design however, Client will not be able to change it.
I am tasked to convert the column into multiple columns.
I chose to use dbo.DelimitedSplit8K function
Unfortunately, it can only handle 8k characters at max.
So I decided to split the column into 81 8k batches using while loop and store the same in a variable table (temp or normal table made no improvement)
DECLARE #tab1 table ( serialnumber int, etext nvarchar(1000))
declare #scriptquan int = (select MAX(len (errortext)/8000) from mytable)
DECLARE #Counter INT
DECLARE #A bigint = 1
DECLARE #B bigint = 8000
SET #Counter=1
WHILE ( #Counter <= #scriptquan + 1)
BEGIN
insert into #tab1 select ItemNumber, Item from dbo.mytable cross apply dbo.DelimitedSplit8K(substring(errortext, #A, #B), CHAR(13)+CHAR(10))
SET #A = #A + 8000
SET #B = #B + 8000
SET #Counter = #Counter + 1
END
This followed by using below code
declare #tab2 table (Item nvarchar(max),itemnumber int, Colseq varchar(10)) -- declare table variable
;with cte as (
select [etext] ,ItemNumber, Item from #tab1 -- insert table name
cross apply dbo.DelimitedSplit8K(etext,' ')) -- insert table columns name that contains text
insert into #tab2 Select Item,itemnumber, 'a'+ cast (ItemNumber as varchar) colseq
from cte -- insert values to table variable
;WITH Tbl(item, colseq) AS(
select item, colseq from #tab2
),
CteRn AS(
SELECT item, colseq,
Rn = ROW_NUMBER() OVER(PARTITION BY colseq ORDER BY colseq)
FROM Tbl
)
SELECT
a1 Time,a2 Number,a3 Type,a4 Remarks
FROM CteRn r
PIVOT(
MAX(item)
FOR colseq IN(a1,a2,a3,a4)
)p
where a3 = 'error'
gives the desired output. However, just the loop takes 15 minutes to complete and overall query completes by 27 minutes. Is there any way I can make it faster? Total row count in my table is 2. So I don't think Index can help.
Client uses Azure SQL Database so I can't choose PowerShell or Python to accomplish this either.
Please let me know if more information is needed. I tried my best to mention everything I could.

remove duplicates from comma or pipeline operator string

I have been looking into this for a while now and I cannot find a way to remove duplicate strings from a comma-separated as well as pipeline seperated string in SQL Server.
Given the string
test1,test2,test1|test2,test3|test4,test4|test4
does anyone know how would you return test1,test2,test3,test4?
Approach
The following approach can be used to de-duplicate a delimited list of values.
Use the REPLACE() function to convert different delimiters into the same delimiter.
Use the REPLACE() function to inject XML closing and opening tags to create an XML fragment
Use the CAST(expr AS XML) function to convert the above fragment into the XML data type
Use OUTER APPLY to apply the table-valued function nodes() to split the XML fragment into its constituent XML tags. This returns each XML tag on a separate row.
Extract just the value from the XML tag using the value() function, and returns the value using the specified data type.
Append a comma after the above-mentioned value.
Note that these values are returned on separate rows. The usage of the DISTINCT keyword now removes duplicate rows (i.e. values).
Use the FOR XML PATH('') clause to concatenate the values across multiple rows into a single row.
Query
Putting the above approach in query form:
SELECT DISTINCT PivotedTable.PivotedColumn.value('.','nvarchar(max)') + ','
FROM (
-- This query returns the following in theDataXml column:
-- <tag>test1</tag><tag>test2</tag><tag>test1</tag><tag>test2</tag><tag>test3</tag><tag>test4</tag><tag>test4</tag><tag>test4</tag>
-- i.e. it has turned the original delimited data into an XML fragment
SELECT
DataTable.DataColumn AS DataRaw
, CAST(
'<tag>'
-- First replace commas with pipes to have only a single delimiter
-- Then replace the pipe delimiters with a closing and opening tag
+ replace(replace(DataTable.DataColumn, ',','|'), '|','</tag><tag>')
-- Add a final set of closing tags
+ '</tag>'
AS XML) AS DataXml
FROM ( SELECT 'test1,test2,test1|test2,test3|test4,test4|test4' AS DataColumn) AS DataTable
) AS x
OUTER APPLY DataXml.nodes('tag') AS PivotedTable(PivotedColumn)
-- Running the query without the following line will return the data in separate rows
-- Running the query with the following line returns the rows concatenated, i.e. it returns:
-- test1,test2,test3,test4,
FOR XML PATH('')
Input & Result
Given the input:
test1,test2,test1|test2,test3|test4,test4|test4
The above query will return the result:
test1,test2,test3,test4,
Notice the trailing comma at the end. I'll leave it as an exercise to you to remove that.
EDIT: Count of Duplicates
OP requested in a comment "how do i get t5he count of duplicates as well? in a seperate column".
The simplest way would be to use the above query but remove the last line FOR XML PATH(''). Then, counting all values and distinct values returned by the SELECT expression in the above query (i.e. PivotedTable.PivotedColumn.value('.','nvarchar(max)')). The difference between the count of all values and the count of distinct values is the count of duplicate values.
SELECT
COUNT(PivotedTable.PivotedColumn.value('.','nvarchar(max)')) AS CountOfAllValues
, COUNT(DISTINCT PivotedTable.PivotedColumn.value('.','nvarchar(max)')) AS CountOfUniqueValues
-- The difference of the previous two counts is the number of duplicate values
, COUNT(PivotedTable.PivotedColumn.value('.','nvarchar(max)'))
- COUNT(DISTINCT PivotedTable.PivotedColumn.value('.','nvarchar(max)')) AS CountOfDuplicateValues
FROM (
-- This query returns the following in theDataXml column:
-- <tag>test1</tag><tag>test2</tag><tag>test1</tag><tag>test2</tag><tag>test3</tag><tag>test4</tag><tag>test4</tag><tag>test4</tag>
-- i.e. it has turned the original delimited data into an XML fragment
SELECT
DataTable.DataColumn AS DataRaw
, CAST(
'<tag>'
-- First replace commas with pipes to have only a single delimiter
-- Then replace the pipe delimiters with a closing and opening tag
+ replace(replace(DataTable.DataColumn, ',','|'), '|','</tag><tag>')
-- Add a final set of closing tags
+ '</tag>'
AS XML) AS DataXml
FROM ( SELECT 'test1,test2,test1|test2,test3|test4,test4|test4' AS DataColumn) AS DataTable
) AS x
OUTER APPLY DataXml.nodes('tag') AS PivotedTable(PivotedColumn)
For the same input shown above, the output of this query is:
CountOfAllValues CountOfUniqueValues CountOfDuplicateValues
---------------- ------------------- ----------------------
8 4 4
Solution to your problem is as given below :
DECLARE #Data_String AS VARCHAR(1000), #Result as varchar(1000)=''
SET #Data_String = 'test1,test2,test1|test2,test3|test4,test4|test4'
SET #Data_String = REPLACE(#Data_String,'|',',')
SELECT #Result=#Result+col+',' from(
SELECT DISTINCT t.c.value('.','varchar(100)') col from(
SELECT cast('<A>'+replace(#Data_String,',','</A><A>')+'</A>' as xml)col1)data cross apply col1.nodes('/A') as t(c))Data
SELECT LEFT(#Result,LEN(#Result)-1)
Result
test1,test2,test3,test4
DECLARE #string AS VARCHAR(1000)
SET #string = 'test1,test2,test1|test2,test3|test4,test4|test4'
SET #string = REPLACE(#string,'|',',')
DECLARE #t TABLE (val VARCHAR(MAX))
DECLARE #xml XML
SET #xml = N'<root><r>' + REPLACE(#string, ',', '</r><r>') + '</r></root>'
INSERT INTO #t(val) SELECT r.value('.','VARCHAR(MAX)') as Item FROM #xml.nodes('//root/r') AS RECORDS(r)
;WITH cte
AS (SELECT ROW_NUMBER() OVER (PARTITION BY val ORDER BY val desc) RN
FROM #t)
DELETE FROM cte
WHERE RN > 1
Try Following SQL Script :
declare #List nvarchar(max)='test1,test2,test1|test2,test3|test4,test4|test4';
declare #Delimiter CHAR(1) =','
declare #XML AS XML
declare #result varchar(max)
set #List=Replace(#List,'|',',')
--Select #List
SET #XML = CAST(('<X>'+REPLACE(#List,#Delimiter ,'</X><X>')+'</X>') AS XML)
DECLARE #temp TABLE (Data nvarchar(100))
INSERT INTO #temp
SELECT N.value('.', 'nvarchar(100)') AS Data FROM #XML.nodes('X') AS T(N)
--SELECT distinct * FROM #temp
IF OBJECT_ID('tempdb..#temp') IS NOT NULL DROP TABLE #temp
Select distinct Data into #temp from #temp
SET #result = ''
select #result = #result + Data + ', ' from #temp
select SUBSTRING(#result, 0, LEN(#result))
I just tried following script working perfectly :
declare #List VARCHAR(MAX)='test1,test2,test1|test2,test3|test4,test4|test4'
declare #Delim CHAR=','
DECLARE #ParsedList TABLE
(
Item VARCHAR(MAX)
)
DECLARE #list1 VARCHAR(MAX), #Pos INT, #rList VARCHAR(MAX)
set #List=Replace(#List,'|',',')
SET #list = LTRIM(RTRIM(#list)) + #Delim
SET #pos = CHARINDEX(#delim, #list, 1)
WHILE #pos > 0
BEGIN
SET #list1 = LTRIM(RTRIM(LEFT(#list, #pos - 1)))
IF #list1 <> ''
INSERT INTO #ParsedList VALUES (CAST(#list1 AS VARCHAR(MAX)))
SET #list = SUBSTRING(#list, #pos+1, LEN(#list))
SET #pos = CHARINDEX(#delim, #list, 1)
END
SELECT #rlist = COALESCE(#rlist+',','') + item
FROM (SELECT DISTINCT Item FROM #ParsedList) t
Select #rlist

Remove some characters from string sql [duplicate]

I've got dirty data in a column with variable alpha length. I just want to strip out anything that is not 0-9.
I do not want to run a function or proc. I have a script that is similar that just grabs the numeric value after text, it looks like this:
Update TableName
set ColumntoUpdate=cast(replace(Columnofdirtydata,'Alpha #','') as int)
where Columnofdirtydata like 'Alpha #%'
And ColumntoUpdate is Null
I thought it would work pretty good until I found that some of the data fields I thought would just be in the format Alpha # 12345789 are not.
Examples of data that needs to be stripped
AB ABCDE # 123
ABCDE# 123
AB: ABC# 123
I just want the 123. It is true that all data fields do have the # prior to the number.
I tried substring and PatIndex, but I'm not quite getting the syntax correct or something. Anyone have any advice on the best way to address this?
See this blog post on extracting numbers from strings in SQL Server. Below is a sample using a string in your example:
DECLARE #textval NVARCHAR(30)
SET #textval = 'AB ABCDE # 123'
SELECT LEFT(SUBSTRING(#textval, PATINDEX('%[0-9.-]%', #textval), 8000),
PATINDEX('%[^0-9.-]%', SUBSTRING(#textval, PATINDEX('%[0-9.-]%', #textval), 8000) + 'X') -1)
Here is an elegant solution if your server supports the TRANSLATE function (on sql server it's available on sql server 2017+ and also sql azure).
First, it replaces any non numeric characters with a # character.
Then, it removes all # characters.
You may need to add additional characters that you know may be present in the second parameter of the TRANSLATE call.
select REPLACE(TRANSLATE([Col], 'abcdefghijklmnopqrstuvwxyz+()- ,#+', '##################################'), '#', '')
You can use stuff and patindex.
stuff(Col, 1, patindex('%[0-9]%', Col)-1, '')
SQL Fiddle
This works well for me:
CREATE FUNCTION [dbo].[StripNonNumerics]
(
#Temp varchar(255)
)
RETURNS varchar(255)
AS
Begin
Declare #KeepValues as varchar(50)
Set #KeepValues = '%[^0-9]%'
While PatIndex(#KeepValues, #Temp) > 0
Set #Temp = Stuff(#Temp, PatIndex(#KeepValues, #Temp), 1, '')
Return #Temp
End
Then call the function like so to see the original something next to the sanitized something:
SELECT Something, dbo.StripNonNumerics(Something) FROM TableA
In case if there are some characters possible between digits (e.g. thousands separators), you may try following:
declare #table table (DirtyCol varchar(100))
insert into #table values
('AB ABCDE # 123')
,('ABCDE# 123')
,('AB: ABC# 123')
,('AB#')
,('AB # 1 000 000')
,('AB # 1`234`567')
,('AB # (9)(876)(543)')
;with tally as (select top (100) N=row_number() over (order by ##spid) from sys.all_columns),
data as (
select DirtyCol, Col
from #table
cross apply (
select (select C + ''
from (select N, substring(DirtyCol, N, 1) C from tally where N<=datalength(DirtyCol)) [1]
where C between '0' and '9'
order by N
for xml path(''))
) p (Col)
where p.Col is not NULL
)
select DirtyCol, cast(Col as int) IntCol
from data
Output is:
DirtyCol IntCol
--------------------- -------
AB ABCDE # 123 123
ABCDE# 123 123
AB: ABC# 123 123
AB # 1 000 000 1000000
AB # 1`234`567 1234567
AB # (9)(876)(543) 9876543
For update, add ColToUpdate to select list of the data cte:
;with num as (...),
data as (
select ColToUpdate, /*DirtyCol, */Col
from ...
)
update data
set ColToUpdate = cast(Col as int)
CREATE FUNCTION FN_RemoveNonNumeric (#Input NVARCHAR(512))
RETURNS NVARCHAR(512)
AS
BEGIN
DECLARE #Trimmed NVARCHAR(512)
SELECT #Trimmed = #Input
WHILE PATINDEX('%[^0-9]%', #Trimmed) > 0
SELECT #Trimmed = REPLACE(#Trimmed, SUBSTRING(#Trimmed, PATINDEX('%[^0-9]%', #Trimmed), 1), '')
RETURN #Trimmed
END
GO
SELECT dbo.FN_RemoveNonNumeric('ABCDE# 123')
Pretty late to the party, I found the following which I though worked brilliantialy.. if anyone is still looking
SELECT
(SELECT CAST(CAST((
SELECT SUBSTRING(FieldToStrip, Number, 1)
FROM master..spt_values
WHERE Type='p' AND Number <= LEN(FieldToStrip) AND
SUBSTRING(FieldToStrip, Number, 1) LIKE '[0-9]' FOR XML Path(''))
AS xml) AS varchar(MAX)))
FROM
SourceTable
Here's a version which pulls all digits from a string; i.e. given I'm 35 years old; I was born in 1982. The average family has 2.4 children. this would return 35198224. i.e. it's good where you've got numeric data which may have been formatted as a code (e.g. #123,456,789 / 123-00005), but isn't appropriate if you're looking to pull out specific numbers (i.e. as opposed to digits / just the numeric characters) from the text. Also it only handles digits; so won't return negative signs (-) or periods .).
declare #table table (id bigint not null identity (1,1), data nvarchar(max))
insert #table (data)
values ('hello 123 its 45613 then') --outputs: 12345613
,('1 some other string 98 example 4') --outputs: 1984
,('AB ABCDE # 123') --outputs: 123
,('ABCDE# 123') --outputs: 123
,('AB: ABC# 123') --outputs: 123
; with NonNumerics as (
select id
, data original
--the below line replaces all digits with blanks
, replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(data,'0',''),'1',''),'2',''),'3',''),'4',''),'5',''),'6',''),'7',''),'8',''),'9','') nonNumeric
from #table
)
--each iteration of the below CTE removes another non-numeric character from the original string, putting the result into the numerics column
, Numerics as (
select id
, replace(original, substring(nonNumeric,1,1), '') numerics
, replace(nonNumeric, substring(nonNumeric,1,1), '') charsToreplace
, len(replace(nonNumeric, substring(nonNumeric,1,1), '')) charsRemaining
from NonNumerics
union all
select id
, replace(numerics, substring(charsToreplace,1,1), '') numerics
, replace(charsToreplace, substring(charsToreplace,1,1), '') charsToreplace
, len(replace(charsToreplace, substring(charsToreplace,1,1), '')) charsRemaining
from Numerics
where charsRemaining > 0
)
--we select only those strings with `charsRemaining=0`; i.e. the rows for which all non-numeric characters have been removed; there should be 1 row returned for every 1 row in the original data set.
select * from Numerics where charsRemaining = 0
This code works by removing all the digits (i.e. the characters we want) from a the given strings by replacing them with blanks. Then it goes through the original string (which includes the digits) removing all of the characters that were left (i.e. the non-numeric characters), thus leaving only the digits.
The reason we do this in 2 steps, rather than just removing all non-numeric characters in the first place is there are only 10 digits, whilst there are a huge number of possible characters; so replacing that small list is relatively fast; then gives us a list of those non-numeric characters which actually exist in the string, so we can then replace that small set.
The method makes use of recursive SQL, using common table expressions (CTEs).
To add on to Ken's answer, this handles commas and spaces and parentheses
--Handles parentheses, commas, spaces, hyphens..
declare #table table (c varchar(256))
insert into #table
values
('This is a test 111-222-3344'),
('Some Sample Text (111)-222-3344'),
('Hello there 111222 3344 / How are you?'),
('Hello there 111 222 3344 ? How are you?'),
('Hello there 111 222 3344. How are you?')
select
replace(LEFT(SUBSTRING(replace(replace(replace(replace(replace(c,'(',''),')',''),'-',''),' ',''),',',''), PATINDEX('%[0-9.-]%', replace(replace(replace(replace(replace(c,'(',''),')',''),'-',''),' ',''),',','')), 8000),
PATINDEX('%[^0-9.-]%', SUBSTRING(replace(replace(replace(replace(replace(c,'(',''),')',''),'-',''),' ',''),',',''), PATINDEX('%[0-9.-]%', replace(replace(replace(replace(replace(c,'(',''),')',''),'-',''),' ',''),',','')), 8000) + 'X') -1),'.','')
from #table
Create function fn_GetNumbersOnly(#pn varchar(100))
Returns varchar(max)
AS
BEGIN
Declare #r varchar(max) ='', #len int ,#c char(1), #x int = 0
Select #len = len(#pn)
while #x <= #len
begin
Select #c = SUBSTRING(#pn,#x,1)
if ISNUMERIC(#c) = 1 and #c <> '-'
Select #r = #r + #c
Select #x = #x +1
end
return #r
End
In your case It seems like the # will always be after teh # symbol so using CHARINDEX() with LTRIM() and RTRIM() would probably perform the best. But here is an interesting method of getting rid of ANY non digit. It utilizes a tally table and table of digits to limit which characters are accepted then XML technique to concatenate back to a single string without the non-numeric characters. The neat thing about this technique is it could be expanded to included ANY Allowed characters and strip out anything that is not allowed.
DECLARE #ExampleData AS TABLE (Col VARCHAR(100))
INSERT INTO #ExampleData (Col) VALUES ('AB ABCDE # 123'),('ABCDE# 123'),('AB: ABC# 123')
DECLARE #Digits AS TABLE (D CHAR(1))
INSERT INTO #Digits (D) VALUES ('0'),('1'),('2'),('3'),('4'),('5'),('6'),('7'),('8'),('9')
;WITH cteTally AS (
SELECT
I = ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM
#Digits d10
CROSS APPLY #Digits d100
--add more cross applies to cover longer fields this handles 100
)
SELECT *
FROM
#ExampleData e
OUTER APPLY (
SELECT CleansedPhone = CAST((
SELECT TOP 100
SUBSTRING(e.Col,t.I,1)
FROM
cteTally t
INNER JOIN #Digits d
ON SUBSTRING(e.Col,t.I,1) = d.D
WHERE
I <= LEN(e.Col)
ORDER BY
t.I
FOR XML PATH('')) AS VARCHAR(100))) o
Declare #MainTable table(id int identity(1,1),TextField varchar(100))
INSERT INTO #MainTable (TextField)
VALUES
('6B32E')
declare #i int=1
Declare #originalWord varchar(100)=''
WHile #i<=(Select count(*) from #MainTable)
BEGIN
Select #originalWord=TextField from #MainTable where id=#i
Declare #r varchar(max) ='', #len int ,#c char(1), #x int = 0
Select #len = len(#originalWord)
declare #pn varchar(100)=#originalWord
while #x <= #len
begin
Select #c = SUBSTRING(#pn,#x,1)
if(#c!='')
BEGIN
if ISNUMERIC(#c) = 0 and #c <> '-'
BEGIN
Select #r = cast(#r as varchar) + cast(replace((SELECT ASCII(#c)-64),'-','') as varchar)
end
ELSE
BEGIN
Select #r = #r + #c
END
END
Select #x = #x +1
END
Select #r
Set #i=#i+1
END
I have created a function for this
Create FUNCTION RemoveCharacters (#text varchar(30))
RETURNS VARCHAR(30)
AS
BEGIN
declare #index as int
declare #newtexval as varchar(30)
set #index = (select PATINDEX('%[A-Z.-/?]%', #text))
if (#index =0)
begin
return #text
end
else
begin
set #newtexval = (select STUFF ( #text , #index , 1 , '' ))
return dbo.RemoveCharacters(#newtexval)
end
return 0
END
GO
Here is the answer:
DECLARE #t TABLE (tVal VARCHAR(100))
INSERT INTO #t VALUES('123')
INSERT INTO #t VALUES('123S')
INSERT INTO #t VALUES('A123,123')
INSERT INTO #t VALUES('a123..A123')
;WITH cte (original, tVal, n)
AS
(
SELECT t.tVal AS original,
LOWER(t.tVal) AS tVal,
65 AS n
FROM #t AS t
UNION ALL
SELECT tVal AS original,
CAST(REPLACE(LOWER(tVal), LOWER(CHAR(n)), '') AS VARCHAR(100)),
n + 1
FROM cte
WHERE n <= 90
)
SELECT t1.tVal AS OldVal,
t.tval AS NewVal
FROM (
SELECT original,
tVal,
ROW_NUMBER() OVER(PARTITION BY tVal + original ORDER BY original) AS Sl
FROM cte
WHERE PATINDEX('%[a-z]%', tVal) = 0
) t
INNER JOIN #t t1
ON t.original = t1.tVal
WHERE t.sl = 1
You can create SQL CLR scalar function in order to be able to use regular expressions like replace patterns.
Here you can find example of how to create such function.
Having such function will solve the issue with just the following lines:
SELECT [dbo].[fn_Utils_RegexReplace] ('AB ABCDE # 123', '[^0-9]', '');
SELECT [dbo].[fn_Utils_RegexReplace] ('ABCDE# 123', '[^0-9]', '');
SELECT [dbo].[fn_Utils_RegexReplace] ('AB: ABC# 123', '[^0-9]', '');
More important, you will be able to solve more complex issues as the regular expressions will bring a whole new world of options directly in your T-SQL statements.
Use this:
REPLACE(TRANSLATE(SomeString, REPLACE(TRANSLATE(SomeString, '0123456789', '##########'), '#', ''), REPLICATE('#', LEN(REPLACE(TRANSLATE(SomeString, '0123456789', '##########'), '#', '') + 'x') - 1)), '#', '')
Demo:
DROP TABLE IF EXISTS #MyTempTable;
CREATE TABLE #MyTempTable (SomeString VARCHAR(255));
INSERT INTO #MyTempTable
VALUES ('ssss123ssg99d362sdg')
, ('hey 62q&*^(n43')
, (NULL)
, ('')
, ('hi')
, ('123');
SELECT SomeString
, REPLACE(TRANSLATE(SomeString, REPLACE(TRANSLATE(SomeString, '0123456789', '##########'), '#', ''), REPLICATE('#', LEN(REPLACE(TRANSLATE(SomeString, '0123456789', '##########'), '#', '') + 'x') - 1)), '#', '')
FROM #MyTempTable;
DROP TABLE IF EXISTS #MyTempTable;
Results:
SomeString
(No column name)
ssss123ssg99d362sdg
12399362
hey62q&*^(n43
6243
NULL
NULL
hi
123
123
While the OP wanted to "strip out anything that is not 0-9", the post is also tagged with "substring" and "patindex", and the OP mentioned the concern "not quite getting the syntax correct or something". To address that the requirements note that "all data fields do have the # prior to the number" and to provide an answer that addresses the challenges with substring/patindex, consider the following:
/* A sample select */
;WITH SampleValues AS
( SELECT 'AB ABCDE # 123' [Columnofdirtydata]
UNION ALL SELECT 'AB2: ABC# 123')
SELECT
s.Columnofdirtydata,
f1.pos1,
'['+ f2.substr +']' [InspectOutput]
FROM
SampleValues s
CROSS APPLY (SELECT PATINDEX('%# %',s.Columnofdirtydata) [pos1]) f1
CROSS APPLY (SELECT SUBSTRING(s.Columnofdirtydata, f1.pos1 + LEN('#-'),LEN(s.Columnofdirtydata)) [substr]) f2
/* Using update scenario from OP */
UPDATE t1
SET t1.Columntoupdate = CAST(f2.substr AS INT)
FROM
TableName t1
CROSS APPLY (SELECT PATINDEX('%# %',t1.Columnofdirtydata) [pos1]) f1
CROSS APPLY (SELECT SUBSTRING(t1.Columnofdirtydata, f1.pos1 + LEN('#-'),LEN(t1.Columnofdirtydata)) [substr]) f2
Note that my syntax advice for patindex/substring, is to:
consider using APPLY as a way to temporarily alias results from one function for use as parameters in the next. It's not uncommon to (in ETL, for example) need to parse out parameter/position-based substrings in an updatable column of a staging table. If you need to "debug" and potentially fix some parsing logic, this style will help.
consider using LEN('PatternSample') in your substring logic, to account for reusing this pattern or adjusting it when your source data changes (instead of "+ 1"
SUBSTRING() requires a length parameter, but it can be greater than the length of the string. Therefore, if you are getting "the rest of the string" after the pattern, you can just use "The source length"
DECLARE #STR VARCHAR(400)
DECLARE #specialchars VARCHAR(50) = '%[~,#,#,$,%,&,*,(,),!^?:]%'
SET #STR = '1, 45 4,3 68.00-'
WHILE PATINDEX( #specialchars, #STR ) > 0
---Remove special characters using Replace function
SET #STR = Replace(Replace(REPLACE( #STR, SUBSTRING( #STR, PATINDEX( #specialchars, #STR ), 1 ),''),'-',''), ' ','')
SELECT #STR
SELECT REGEXP_REPLACE( col, '[^[:digit:]]', '' ) AS new_col FROM my_table

How to use regular expression to remove number in MS SQL Server Management Studio?

I have a field in a table containing different IDs for different programmes like this:
ProgrammeID
-----------
Prog201604L
Prog201503L
Pro2015N
Pro2014N
Programme2010
Programme2011
Each programme ID has its meaning. The number in the mid of the string indicates the time or month. It is obvious that Prog201604L and Prog201503L indicate the same programme but in different years (so do the rest). What I want to do is to remove the numbers so after removal the programmeID will be like:
ProgrammeID
-----------
ProgL
ProgL
ProN
ProN
Programme
Programme
Then later I can aggregate this programmes together.
I am currently using SSMS 2012 not sure if there is a sql statement like RegEx. I have been searching for a long time but the solution online are mainly about Oracle and MySQL. What I found is PATINDEX() and it seems to support regular expression. Can anybody tell me how to create a pattern that suits my situation and what kind of statement I should use?
Thanks in advance
If the Number part is always 6 characters below can be used.
DECLARE #ProgrammeID VARCHAR(50) = 'Prog201604L'
SELECT STUFF(#ProgrammeID, PATINDEX( '%[0-9]%', #ProgrammeID), 6, '')
If the numbers are not fixed... to extend above
CREATE TABLE #Programme ( ProgrammeID VARCHAR(50) )
INSERT INTO #Programme
VALUES
('Prog201604L')
,('Pro2015N')
,('Programme2010')
,('Prog2016L')
,('Pro2N')
,('Prog')
,('2010')
SELECT ProgrammeID,
ISNULL(
STUFF(ProgrammeID,
PATINDEX( '%[0-9]%', ProgrammeID), -- get number start index
IIF(PATINDEX( '%[0-9][a-z]%',ProgrammeID)= 0, PATINDEX( '%[0-9]',ProgrammeID), PATINDEX( '%[0-9][a-z]%',ProgrammeID)) + 1 -- get the last number index
- PATINDEX( '%[0-9]%', ProgrammeID), -- get the number character length
'')
,ProgrammeID) -- Where there are no numbers in the string you will get Null, replace it with actual string
AS [Without Numbers]
FROM #Programme
this will handle cases with varying numbers and even string without number.
Hope this helps
You can create a function and pass the value of each row to function
as (just run this query)
Create Function [dbo].[RemoveNonAlphaCharacters](#Temp VarChar(1000))
Returns VarChar(1000)
AS
Begin
Declare #KeepValues as varchar(50)
Set #KeepValues = '%[^a-z]%'
While PatIndex(#KeepValues, #Temp) > 0
Set #Temp = Stuff(#Temp, PatIndex(#KeepValues, #Temp), 1, '')
Return #Temp
End
---Call it like this:
Declare #tbl table (ProgrammeID varchar(20))
insert into #tbl values ('ProgL'),('ProgL'),('ProN'),('ProN'),('Programme'),('Programme')
select * from #tbl
Select dbo.RemoveNonAlphaCharacters(ProgrammeID) from #tbl
How to strip all non-alphabetic characters from string in SQL Server?
Remove numbers from string sql server
One clever option is to take the substring of the ProgrammeID column from the left, until hitting the first number, and concatenate that with the reverse of the substring from the right until hitting the first number:
SELECT
SUBSTRING(ProgrammeID,
1,
PATINDEX('%[0-9]%', ProgrammeID) - 1) +
REVERSE(SUBSTRING(REVERSE(ProgrammeID),
1,
PATINDEX('%[0-9]%', REVERSE(ProgrammeID)) - 1))
FROM yourTable
I have created a user-defined function for SQL Server to remove non-numeric characters in a string expression
We can modify it to remove the opposite, numeric characters from the input string as follows
while patindex('%[0-9]%', #str) > 0
set #str = stuff(#str, patindex('%[0-9]%', #str), 1, '')
return #str
I hope it helps
Alan Burstein wrote an iTVF exactly for this. The function is called PatExclude8K. Here is the function definition (some comments removed):
CREATE FUNCTION dbo.PatExclude8K
(
#String VARCHAR(8000),
#Pattern VARCHAR(50)
)
/*******************************************************************************
Purpose:
Given a string (#String) and a pattern (#Pattern) of characters to remove,
remove the patterned characters from the string.
*******************************************************************************/
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
WITH
E1(N) AS (SELECT N FROM (VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL)) AS X(N)),
itally(N) AS
(
SELECT TOP(CONVERT(INT,LEN(#String),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM E1 T1 CROSS JOIN E1 T2 CROSS JOIN E1 T3 CROSS JOIN E1 T4
)
SELECT NewString =
((
SELECT SUBSTRING(#String,N,1)
FROM iTally
WHERE 0 = PATINDEX(#Pattern,SUBSTRING(#String COLLATE Latin1_General_BIN,N,1))
FOR XML PATH(''),TYPE
).value('.[1]','varchar(8000)'));
GO
And here is how you would use it:
SELECT *
FROM #Programme p
CROSS APPLY dbo.PatExclude8K(p.ProgrammeID, '[0-9]');
Using your sample data, here is the result:
ProgrammeID NewString
-------------------- -----------------
Prog201604L ProgL
Prog201503L ProgL
Pro2015N ProN
Pro2014N ProN
Programme2010 Programme
Programme2011 Programme
I created this solution building on a solution to extracting values from a comma separated list inside a string.
It seems to work find and even be a bit more effective than using while - I will be happy for feedback about that assumption, though.
On on table with 461.358 rows it takes 3 minutes and 27 seconds to do this (0.44 ms per row) (I put it into a function).
select count(*)
from Mytable
where dbo.StripNumeric(inputFromUser) is null
Here's the solutions
For stripping away numeric:
declare #input nvarchar(max) = null
select #input = '1a2 3b4' + char(13) + char(10) + '5(678)*90c'
DECLARE #output nvarchar(max) = '';
WITH cte AS
(
SELECT cast(1 as int) as [index]
UNION ALL
SELECT [index]+ 1 as [index]
from cte
where [index] < len(#input)
)
select #output = iif(PATINDEX('%[0-9]%', substring(#input, [index], 1))= 1, #output, #output + substring(#input, [index], 1))
from cte;
select iif(COALESCE( #output, '') = '', null, ltrim(rtrim(#output)))
For stripping away non-numeric:
declare #input nvarchar(max) = null
select #input = '1a2 3b4' + char(13) + char(10) + '5(678)*90c'
DECLARE #output nvarchar(max) = '';
WITH cte AS
(
SELECT cast(1 as int) as [index]
UNION ALL
SELECT [index]+ 1 as [index]
from cte
where [index] < len(#input) --len(substring(#input, index, 1)) >
)
select #output = iif(PATINDEX('%[0-9]%', substring(#input, [index], 1))= 1, #output + substring(#input, [index], 1), #output)
from cte;
select iif(COALESCE( #output, '') = '', null, ltrim(rtrim(#output)))

SQL Server: UPDATE a table by using ORDER BY

I would like to know if there is a way to use an order by clause when updating a table. I am updating a table and setting a consecutive number, that's why the order of the update is important. Using the following sql statement, I was able to solve it without using a cursor:
DECLARE #Number INT = 0
UPDATE Test
SET #Number = Number = #Number +1
now what I'd like to to do is an order by clause like so:
DECLARE #Number INT = 0
UPDATE Test
SET #Number = Number = #Number +1
ORDER BY Test.Id DESC
I've read: How to update and order by using ms sql The solutions to this question do not solve the ordering problem - they just filter the items on which the update is applied.
Take care,
Martin
No.
Not a documented 100% supported way. There is an approach sometimes used for calculating running totals called "quirky update" that suggests that it might update in order of clustered index if certain conditions are met but as far as I know this relies completely on empirical observation rather than any guarantee.
But what version of SQL Server are you on? If SQL2005+ you might be able to do something with row_number and a CTE (You can update the CTE)
With cte As
(
SELECT id,Number,
ROW_NUMBER() OVER (ORDER BY id DESC) AS RN
FROM Test
)
UPDATE cte SET Number=RN
You can not use ORDER BY as part of the UPDATE statement (you can use in sub-selects that are part of the update).
UPDATE Test
SET Number = rowNumber
FROM Test
INNER JOIN
(SELECT ID, row_number() OVER (ORDER BY ID DESC) as rowNumber
FROM Test) drRowNumbers ON drRowNumbers.ID = Test.ID
Edit
Following solution could have problems with clustered indexes involved as mentioned here. Thanks to Martin for pointing this out.
The answer is kept to educate those (like me) who don't know all side-effects or ins and outs of SQL Server.
Expanding on the answer gaven by Quassnoi in your link, following works
DECLARE #Test TABLE (Number INTEGER, AText VARCHAR(2), ID INTEGER)
DECLARE #Number INT
INSERT INTO #Test VALUES (1, 'A', 1)
INSERT INTO #Test VALUES (2, 'B', 2)
INSERT INTO #Test VALUES (1, 'E', 5)
INSERT INTO #Test VALUES (3, 'C', 3)
INSERT INTO #Test VALUES (2, 'D', 4)
SET #Number = 0
;WITH q AS (
SELECT TOP 1000000 *
FROM #Test
ORDER BY
ID
)
UPDATE q
SET #Number = Number = #Number + 1
The row_number() function would be the best approach to this problem.
UPDATE T
SET T.Number = R.rowNum
FROM Test T
JOIN (
SELECT T2.id,row_number() over (order by T2.Id desc) rowNum from Test T2
) R on T.id=R.id
update based on Ordering by the order of values in a SQL IN() clause
Solution:
DECLARE #counter int
SET #counter = 0
;WITH q AS
(
select * from Products WHERE ID in (SELECT TOP (10) ID FROM Products WHERE ID IN( 3,2,1)
ORDER BY ID DESC)
)
update q set Display= #counter, #counter = #counter + 1
This updates based on descending 3,2,1
Hope helps someone.
I had a similar problem and solved it using ROW_NUMBER() in combination with the OVER keyword. The task was to retrospectively populate a new TicketNo (integer) field in a simple table based on the original CreatedDate, and grouped by ModuleId - so that ticket numbers started at 1 within each Module group and incremented by date. The table already had a TicketID primary key (a GUID).
Here's the SQL:
UPDATE Tickets SET TicketNo=T2.RowNo
FROM Tickets
INNER JOIN
(select TicketID, TicketNo,
ROW_NUMBER() OVER (PARTITION BY ModuleId ORDER BY DateCreated) AS RowNo from Tickets)
AS T2 ON T2.TicketID = Tickets.TicketID
Worked a treat!
I ran into the same problem and was able to resolve it in very powerful way that allows unlimited sorting possibilities.
I created a View using (saving) 2 sort orders (*explanation on how to do so below).
After that I simply applied the update queries to the View created and it worked great.
Here are the 2 queries I used on the view:
1st Query:
Update MyView
Set SortID=0
2nd Query:
DECLARE #sortID int
SET #sortID = 0
UPDATE MyView
SET #sortID = sortID = #sortID + 1
*To be able to save the sorting on the View I put TOP into the SELECT statement. This very useful workaround allows the View results to be returned sorted as set when the View was created when the View is opened. In my case it looked like:
(NOTE: Using this workaround will place an big load on the server if using a large table and it is therefore recommended to include as few fields as possible in the view if working with large tables)
SELECT TOP (600000)
dbo.Items.ID, dbo.Items.Code, dbo.Items.SortID, dbo.Supplier.Date,
dbo.Supplier.Code AS Expr1
FROM dbo.Items INNER JOIN
dbo.Supplier ON dbo.Items.SupplierCode = dbo.Supplier.Code
ORDER BY dbo.Supplier.Date, dbo.Items.ID DESC
Running: SQL Server 2005 on a Windows Server 2003
Additional Keywords: How to Update a SQL column with Ascending or Descending Numbers - Numeric Values / how to set order in SQL update statement / how to save order by in sql view / increment sql update / auto autoincrement sql update / create sql field with ascending numbers
SET #pos := 0;
UPDATE TABLE_NAME SET Roll_No = ( SELECT #pos := #pos + 1 ) ORDER BY First_Name ASC;
In the above example query simply update the student Roll_No column depending on the student Frist_Name column. From 1 to No_of_records in the table. I hope it's clear now.
IF OBJECT_ID('tempdb..#TAB') IS NOT NULL
BEGIN
DROP TABLE #TAB
END
CREATE TABLE #TAB(CH1 INT,CH2 INT,CH3 INT)
DECLARE #CH2 INT = NULL , #CH3 INT=NULL,#SPID INT=NULL,#SQL NVARCHAR(4000)='', #ParmDefinition NVARCHAR(50)= '',
#RET_MESSAGE AS VARCHAR(8000)='',#RET_ERROR INT=0
SET #ParmDefinition='#SPID INT,#CH2 INT OUTPUT,#CH3 INT OUTPUT'
SET #SQL='UPDATE T
SET CH1=#SPID,#CH2= T.CH2,#CH3= T.CH3
FROM #TAB T WITH(ROWLOCK)
INNER JOIN (
SELECT TOP(1) CH1,CH2,CH3
FROM
#TAB WITH(NOLOCK)
WHERE CH1 IS NULL
ORDER BY CH2 DESC) V ON T.CH2= V.CH2 AND T.CH3= V.CH3'
INSERT INTO #TAB
(CH2 ,CH3 )
SELECT 1,2 UNION ALL
SELECT 2,3 UNION ALL
SELECT 3,4
BEGIN TRY
WHILE EXISTS(SELECT TOP 1 1 FROM #TAB WHERE CH1 IS NULL)
BEGIN
EXECUTE #RET_ERROR = sp_executesql #SQL, #ParmDefinition,#SPID =##SPID, #CH2=#CH2 OUTPUT,#CH3=#CH3 OUTPUT;
SELECT * FROM #TAB
SELECT #CH2,#CH3
END
END TRY
BEGIN CATCH
SET #RET_ERROR=ERROR_NUMBER()
SET #RET_MESSAGE = '#ERROR_NUMBER : ' + CAST(ERROR_NUMBER() AS VARCHAR(255)) + '#ERROR_SEVERITY :' + CAST( ERROR_SEVERITY() AS VARCHAR(255))
+ '#ERROR_STATE :' + CAST(ERROR_STATE() AS VARCHAR(255)) + '#ERROR_LINE :' + CAST( ERROR_LINE() AS VARCHAR(255))
+ '#ERROR_MESSAGE :' + ERROR_MESSAGE() ;
SELECT #RET_ERROR,#RET_MESSAGE;
END CATCH

Resources