Remove all Non-Numeric Chars from String - sql-server

I would like to truncate all characters in a column, no matter where they are.
Example:
"+49123/4567890(testnumber)"
Should be changed to
"491234567890"
Is there a way without doing a replace for each char?
I have tried to replace it with several, but it is very time-consuming.

As you mentioned, if you are expecting only [a-zA-z()/+], you can use the translate function which is available from 2017+
declare #table TABLE (str varchar(max))
insert into #table
select '+49123/4567890(estnumber)'
select replace(translate(str, '/+()abcdefghijklmnopqrstuvwxyz', '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'), '~', '') digits
from #table
For more complex scenarios where the characters are not known, you can try using recursive CTE on a string column to extract only digits like following query.
;with cte
as (
select v.txt originalstring
,v.txt
,convert(varchar(max), '') as digits
,1 as lev
from (
values ('+49123/4567890(testnumber)')
,('+234&*#$%!##')
) v(txt)
union all
select originalstring
,stuff(txt, 1, 1, '')
,(
case
when left(txt, 1) LIKE '[0-9]'
then digits + left(txt, 1)
else digits
end
)
,lev + 1
from cte
where txt > ''
)
select originalstring
,digits
from (
select c.originalstring
,c.digits
,row_number() over (partition by c.originalstring order by lev desc
) rn
from cte c
) t
where rn = 1
Output
originalstring digits
--------------- --------
+234&*#$%!## 234
+49123/4567890(testnumber) 491234567890

A set-based option that exists in SQL Server 2017+ is to utilise translate.
You can hopefully adapt the following to your specific use-case:
select col, Replace(Translate(col, r, Replicate('*', Len(r))), '*', '') Newcol
from t
cross apply(values(' ABCDEFGHIJKLMNOPQRSTUVWXYZ/\+()'))r(r);
Example DB<>Fiddle

Instead of hardcoding the list of "bad" characters you can use a double TRANSLATE to first get the unwanted characters and then plug that back into TRANSLATE.
DECLARE #table TABLE
(
str VARCHAR(max)
)
INSERT INTO #table
SELECT '+49123/4567890(testnumber) '
DECLARE #CharactersToKeep VARCHAR(30) = '0123456789'
SELECT REPLACE(TRANSLATE(str, bad_chars, REPLICATE('X', LEN(bad_chars + 'X') - 1)), 'X', '')
FROM #table
CROSS APPLY (SELECT REPLACE(TRANSLATE(str, #CharactersToKeep, REPLICATE(LEFT(#CharactersToKeep, 1), LEN(#CharactersToKeep))), LEFT(#CharactersToKeep, 1), '')) ca(bad_chars)

Related

How to get "," instead of "and" in the rows in SQL Server

I have a table Test with 1 column
Module_name
Table
Computer
Laptop
Chair
My expected output:
Table,Computer,Laptop and Chair
My Query:
declare #module_name varchar(50)
SELECT #Module_Name = COALESCE(#Module_Name + ' and ', '') + module_name FROM
(SELECT DISTINCT module_name FROM Test) T
select #module_name
I am getting the output as:
Table and Computer and Laptop and Chair
My concern is how to get the "," instead of "and".
Have you tried xml method with stuff() function ?
declare #Module_names varchar(max)
set #Module_names = stuff((select distinct ',' +Module_name
from table t
for xml path('')),1,1, '')
select REVERSE(STUFF(REVERSE(#Module_names),
CHARINDEX(',', REVERSE(#Module_names)), 1,' dna ')) as Module_names
I don't endorse this solution, like I said in the comments, "grammarisation" should be done in your presentation layer.. You can, however, achieve this in SQL like so:
Edit: Slight update to cater for a single value return.
CREATE TABLE #Sample (Module varchar(10));
INSERT INTO #Sample
VALUES ('Table'),
('Computer'),
('Laptop'),
('Chair');
GO
WITH RNs AS (
SELECT Module,
ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS RN --SELECT NULL as there is no ID field to work with here, thus the order will be random
FROM #Sample)
SELECT STUFF((SELECT CASE WHEN RN = MAX(RN) OVER () AND RN != 1 THEN ' and ' ELSE ', ' END + Module
FROM RNs
ORDER BY RN
FOR XML PATH('')),1,2,'');
GO
DROP TABLE #Sample;
Use the following. First gather all records together with comma, then replace just the last one with "and". Will have to make sure that your column values don't contain comma or it will be misplaced with an "and" if on last occurence.
DECLARE #result VARCHAR(MAX) = STUFF(
(
SELECT DISTINCT
', ' + T.module_name
FROM
Test AS T
FOR XML
PATH('')
),
1, 2, '')
SET #result =
REVERSE(
STUFF( -- Replace
REVERSE(#result), -- ... in the reversed string
CHARINDEX(',', REVERSE(#result)), -- ... at the first position of the comma (the last one on the original string)
1, -- just 1 character (the comma)
'dna ') -- for the reversed " and"
)
SELECT #result
Used Row_number to capture last row,
CREATE TABLE test
([Module_name] varchar(8))
;
INSERT INTO test
([Module_name])
VALUES
('Table'),
('Computer'),
('Laptop'),
('Chair')
;
SELECT STUFF((SELECT CASE WHEN RN = MAX(RN) OVER () THEN ' and ' ELSE ', ' END + Module_name
from
(
SELECT Module_name,
ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS RN
FROM test
) rns
ORDER BY RN
FOR XML PATH('')),1,2,'');

Replacing Part of a String without Replace/Stuff

This is my string: 11.0000.0101.000.000.0101.000.000
I need to replace ONLY the first "0101" with "101." I can't use replace, as it replaces the 2nd instance of 0101 as well.
I tried
stuff(string, 9, 3, '101')
but since the replacement string is shorter than the existing string, I end up with
11.0000.1011.000.000.0101.000.000
What can I use besides REPLACE or STUFF? Thanks!
declare #t table(s varchar(100))
insert into #t values
( '11.0000.0101.000.000.0101.000.000'), ('abc');
select case charindex('0101', s)
when 0 then s
else stuff(s, charindex('0101', s), 4, '101')
end as new_s
from #t;
Your expression was almost right.
Just tell it to replace 4 characters of the original string instead of 3 :
stuff(string, 9, 4, '101')
But this will only work if your string has always the same positions.
you can do something like:
replace (stuff(string,9,3,'#101'),'#','')
If you need replace all 0101 to 101 go with below code
DECLARE #TempData TABLE(Data VARCHAR(1000));
INSERT INTO #TempData VALUES
('11.0000.0101.000.000.0101.000.000');
;WITH Cte
AS (
SELECT CASE
WHEN DATA = '0101'
THEN '101'
ELSE CAST(DATA AS VARCHAR(10))
END AS DATA
FROM (
SELECT Split.a.value('.', 'VARCHAR(1000)') AS Data
FROM (
SELECT CAST('<S>' + REPLACE(Data, '.', '</S><S>') + '</S>' AS XML) AS Data
FROM #TempData
) AS A
CROSS APPLY Data.nodes('/S') AS Split(a)
) Dt
)
SELECT STUFF((
SELECT '.' + DATA
FROM cte
FOR XML PATH('')
), 1, 1, '') AS ExpectedResult
OutPut
ExpectedResult
11.0000.101.000.000.101.000.000
Yet another option if you don't know the 1st observation
Declare #S varchar(50) = '11.0000.0101.000.000.0101.000.000'
Declare #Find varchar(25) = '0101'
Declare #Repl varchar(25) = '101'
Select isnull(stuff(#S, charindex(#Find,#S), len(#Find), #Repl),#S)
Returns
11.0000.101.000.000.0101.000.000

Remove some characters from string sql [duplicate]

I've got dirty data in a column with variable alpha length. I just want to strip out anything that is not 0-9.
I do not want to run a function or proc. I have a script that is similar that just grabs the numeric value after text, it looks like this:
Update TableName
set ColumntoUpdate=cast(replace(Columnofdirtydata,'Alpha #','') as int)
where Columnofdirtydata like 'Alpha #%'
And ColumntoUpdate is Null
I thought it would work pretty good until I found that some of the data fields I thought would just be in the format Alpha # 12345789 are not.
Examples of data that needs to be stripped
AB ABCDE # 123
ABCDE# 123
AB: ABC# 123
I just want the 123. It is true that all data fields do have the # prior to the number.
I tried substring and PatIndex, but I'm not quite getting the syntax correct or something. Anyone have any advice on the best way to address this?
See this blog post on extracting numbers from strings in SQL Server. Below is a sample using a string in your example:
DECLARE #textval NVARCHAR(30)
SET #textval = 'AB ABCDE # 123'
SELECT LEFT(SUBSTRING(#textval, PATINDEX('%[0-9.-]%', #textval), 8000),
PATINDEX('%[^0-9.-]%', SUBSTRING(#textval, PATINDEX('%[0-9.-]%', #textval), 8000) + 'X') -1)
Here is an elegant solution if your server supports the TRANSLATE function (on sql server it's available on sql server 2017+ and also sql azure).
First, it replaces any non numeric characters with a # character.
Then, it removes all # characters.
You may need to add additional characters that you know may be present in the second parameter of the TRANSLATE call.
select REPLACE(TRANSLATE([Col], 'abcdefghijklmnopqrstuvwxyz+()- ,#+', '##################################'), '#', '')
You can use stuff and patindex.
stuff(Col, 1, patindex('%[0-9]%', Col)-1, '')
SQL Fiddle
This works well for me:
CREATE FUNCTION [dbo].[StripNonNumerics]
(
#Temp varchar(255)
)
RETURNS varchar(255)
AS
Begin
Declare #KeepValues as varchar(50)
Set #KeepValues = '%[^0-9]%'
While PatIndex(#KeepValues, #Temp) > 0
Set #Temp = Stuff(#Temp, PatIndex(#KeepValues, #Temp), 1, '')
Return #Temp
End
Then call the function like so to see the original something next to the sanitized something:
SELECT Something, dbo.StripNonNumerics(Something) FROM TableA
In case if there are some characters possible between digits (e.g. thousands separators), you may try following:
declare #table table (DirtyCol varchar(100))
insert into #table values
('AB ABCDE # 123')
,('ABCDE# 123')
,('AB: ABC# 123')
,('AB#')
,('AB # 1 000 000')
,('AB # 1`234`567')
,('AB # (9)(876)(543)')
;with tally as (select top (100) N=row_number() over (order by ##spid) from sys.all_columns),
data as (
select DirtyCol, Col
from #table
cross apply (
select (select C + ''
from (select N, substring(DirtyCol, N, 1) C from tally where N<=datalength(DirtyCol)) [1]
where C between '0' and '9'
order by N
for xml path(''))
) p (Col)
where p.Col is not NULL
)
select DirtyCol, cast(Col as int) IntCol
from data
Output is:
DirtyCol IntCol
--------------------- -------
AB ABCDE # 123 123
ABCDE# 123 123
AB: ABC# 123 123
AB # 1 000 000 1000000
AB # 1`234`567 1234567
AB # (9)(876)(543) 9876543
For update, add ColToUpdate to select list of the data cte:
;with num as (...),
data as (
select ColToUpdate, /*DirtyCol, */Col
from ...
)
update data
set ColToUpdate = cast(Col as int)
CREATE FUNCTION FN_RemoveNonNumeric (#Input NVARCHAR(512))
RETURNS NVARCHAR(512)
AS
BEGIN
DECLARE #Trimmed NVARCHAR(512)
SELECT #Trimmed = #Input
WHILE PATINDEX('%[^0-9]%', #Trimmed) > 0
SELECT #Trimmed = REPLACE(#Trimmed, SUBSTRING(#Trimmed, PATINDEX('%[^0-9]%', #Trimmed), 1), '')
RETURN #Trimmed
END
GO
SELECT dbo.FN_RemoveNonNumeric('ABCDE# 123')
Pretty late to the party, I found the following which I though worked brilliantialy.. if anyone is still looking
SELECT
(SELECT CAST(CAST((
SELECT SUBSTRING(FieldToStrip, Number, 1)
FROM master..spt_values
WHERE Type='p' AND Number <= LEN(FieldToStrip) AND
SUBSTRING(FieldToStrip, Number, 1) LIKE '[0-9]' FOR XML Path(''))
AS xml) AS varchar(MAX)))
FROM
SourceTable
Here's a version which pulls all digits from a string; i.e. given I'm 35 years old; I was born in 1982. The average family has 2.4 children. this would return 35198224. i.e. it's good where you've got numeric data which may have been formatted as a code (e.g. #123,456,789 / 123-00005), but isn't appropriate if you're looking to pull out specific numbers (i.e. as opposed to digits / just the numeric characters) from the text. Also it only handles digits; so won't return negative signs (-) or periods .).
declare #table table (id bigint not null identity (1,1), data nvarchar(max))
insert #table (data)
values ('hello 123 its 45613 then') --outputs: 12345613
,('1 some other string 98 example 4') --outputs: 1984
,('AB ABCDE # 123') --outputs: 123
,('ABCDE# 123') --outputs: 123
,('AB: ABC# 123') --outputs: 123
; with NonNumerics as (
select id
, data original
--the below line replaces all digits with blanks
, replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(data,'0',''),'1',''),'2',''),'3',''),'4',''),'5',''),'6',''),'7',''),'8',''),'9','') nonNumeric
from #table
)
--each iteration of the below CTE removes another non-numeric character from the original string, putting the result into the numerics column
, Numerics as (
select id
, replace(original, substring(nonNumeric,1,1), '') numerics
, replace(nonNumeric, substring(nonNumeric,1,1), '') charsToreplace
, len(replace(nonNumeric, substring(nonNumeric,1,1), '')) charsRemaining
from NonNumerics
union all
select id
, replace(numerics, substring(charsToreplace,1,1), '') numerics
, replace(charsToreplace, substring(charsToreplace,1,1), '') charsToreplace
, len(replace(charsToreplace, substring(charsToreplace,1,1), '')) charsRemaining
from Numerics
where charsRemaining > 0
)
--we select only those strings with `charsRemaining=0`; i.e. the rows for which all non-numeric characters have been removed; there should be 1 row returned for every 1 row in the original data set.
select * from Numerics where charsRemaining = 0
This code works by removing all the digits (i.e. the characters we want) from a the given strings by replacing them with blanks. Then it goes through the original string (which includes the digits) removing all of the characters that were left (i.e. the non-numeric characters), thus leaving only the digits.
The reason we do this in 2 steps, rather than just removing all non-numeric characters in the first place is there are only 10 digits, whilst there are a huge number of possible characters; so replacing that small list is relatively fast; then gives us a list of those non-numeric characters which actually exist in the string, so we can then replace that small set.
The method makes use of recursive SQL, using common table expressions (CTEs).
To add on to Ken's answer, this handles commas and spaces and parentheses
--Handles parentheses, commas, spaces, hyphens..
declare #table table (c varchar(256))
insert into #table
values
('This is a test 111-222-3344'),
('Some Sample Text (111)-222-3344'),
('Hello there 111222 3344 / How are you?'),
('Hello there 111 222 3344 ? How are you?'),
('Hello there 111 222 3344. How are you?')
select
replace(LEFT(SUBSTRING(replace(replace(replace(replace(replace(c,'(',''),')',''),'-',''),' ',''),',',''), PATINDEX('%[0-9.-]%', replace(replace(replace(replace(replace(c,'(',''),')',''),'-',''),' ',''),',','')), 8000),
PATINDEX('%[^0-9.-]%', SUBSTRING(replace(replace(replace(replace(replace(c,'(',''),')',''),'-',''),' ',''),',',''), PATINDEX('%[0-9.-]%', replace(replace(replace(replace(replace(c,'(',''),')',''),'-',''),' ',''),',','')), 8000) + 'X') -1),'.','')
from #table
Create function fn_GetNumbersOnly(#pn varchar(100))
Returns varchar(max)
AS
BEGIN
Declare #r varchar(max) ='', #len int ,#c char(1), #x int = 0
Select #len = len(#pn)
while #x <= #len
begin
Select #c = SUBSTRING(#pn,#x,1)
if ISNUMERIC(#c) = 1 and #c <> '-'
Select #r = #r + #c
Select #x = #x +1
end
return #r
End
In your case It seems like the # will always be after teh # symbol so using CHARINDEX() with LTRIM() and RTRIM() would probably perform the best. But here is an interesting method of getting rid of ANY non digit. It utilizes a tally table and table of digits to limit which characters are accepted then XML technique to concatenate back to a single string without the non-numeric characters. The neat thing about this technique is it could be expanded to included ANY Allowed characters and strip out anything that is not allowed.
DECLARE #ExampleData AS TABLE (Col VARCHAR(100))
INSERT INTO #ExampleData (Col) VALUES ('AB ABCDE # 123'),('ABCDE# 123'),('AB: ABC# 123')
DECLARE #Digits AS TABLE (D CHAR(1))
INSERT INTO #Digits (D) VALUES ('0'),('1'),('2'),('3'),('4'),('5'),('6'),('7'),('8'),('9')
;WITH cteTally AS (
SELECT
I = ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM
#Digits d10
CROSS APPLY #Digits d100
--add more cross applies to cover longer fields this handles 100
)
SELECT *
FROM
#ExampleData e
OUTER APPLY (
SELECT CleansedPhone = CAST((
SELECT TOP 100
SUBSTRING(e.Col,t.I,1)
FROM
cteTally t
INNER JOIN #Digits d
ON SUBSTRING(e.Col,t.I,1) = d.D
WHERE
I <= LEN(e.Col)
ORDER BY
t.I
FOR XML PATH('')) AS VARCHAR(100))) o
Declare #MainTable table(id int identity(1,1),TextField varchar(100))
INSERT INTO #MainTable (TextField)
VALUES
('6B32E')
declare #i int=1
Declare #originalWord varchar(100)=''
WHile #i<=(Select count(*) from #MainTable)
BEGIN
Select #originalWord=TextField from #MainTable where id=#i
Declare #r varchar(max) ='', #len int ,#c char(1), #x int = 0
Select #len = len(#originalWord)
declare #pn varchar(100)=#originalWord
while #x <= #len
begin
Select #c = SUBSTRING(#pn,#x,1)
if(#c!='')
BEGIN
if ISNUMERIC(#c) = 0 and #c <> '-'
BEGIN
Select #r = cast(#r as varchar) + cast(replace((SELECT ASCII(#c)-64),'-','') as varchar)
end
ELSE
BEGIN
Select #r = #r + #c
END
END
Select #x = #x +1
END
Select #r
Set #i=#i+1
END
I have created a function for this
Create FUNCTION RemoveCharacters (#text varchar(30))
RETURNS VARCHAR(30)
AS
BEGIN
declare #index as int
declare #newtexval as varchar(30)
set #index = (select PATINDEX('%[A-Z.-/?]%', #text))
if (#index =0)
begin
return #text
end
else
begin
set #newtexval = (select STUFF ( #text , #index , 1 , '' ))
return dbo.RemoveCharacters(#newtexval)
end
return 0
END
GO
Here is the answer:
DECLARE #t TABLE (tVal VARCHAR(100))
INSERT INTO #t VALUES('123')
INSERT INTO #t VALUES('123S')
INSERT INTO #t VALUES('A123,123')
INSERT INTO #t VALUES('a123..A123')
;WITH cte (original, tVal, n)
AS
(
SELECT t.tVal AS original,
LOWER(t.tVal) AS tVal,
65 AS n
FROM #t AS t
UNION ALL
SELECT tVal AS original,
CAST(REPLACE(LOWER(tVal), LOWER(CHAR(n)), '') AS VARCHAR(100)),
n + 1
FROM cte
WHERE n <= 90
)
SELECT t1.tVal AS OldVal,
t.tval AS NewVal
FROM (
SELECT original,
tVal,
ROW_NUMBER() OVER(PARTITION BY tVal + original ORDER BY original) AS Sl
FROM cte
WHERE PATINDEX('%[a-z]%', tVal) = 0
) t
INNER JOIN #t t1
ON t.original = t1.tVal
WHERE t.sl = 1
You can create SQL CLR scalar function in order to be able to use regular expressions like replace patterns.
Here you can find example of how to create such function.
Having such function will solve the issue with just the following lines:
SELECT [dbo].[fn_Utils_RegexReplace] ('AB ABCDE # 123', '[^0-9]', '');
SELECT [dbo].[fn_Utils_RegexReplace] ('ABCDE# 123', '[^0-9]', '');
SELECT [dbo].[fn_Utils_RegexReplace] ('AB: ABC# 123', '[^0-9]', '');
More important, you will be able to solve more complex issues as the regular expressions will bring a whole new world of options directly in your T-SQL statements.
Use this:
REPLACE(TRANSLATE(SomeString, REPLACE(TRANSLATE(SomeString, '0123456789', '##########'), '#', ''), REPLICATE('#', LEN(REPLACE(TRANSLATE(SomeString, '0123456789', '##########'), '#', '') + 'x') - 1)), '#', '')
Demo:
DROP TABLE IF EXISTS #MyTempTable;
CREATE TABLE #MyTempTable (SomeString VARCHAR(255));
INSERT INTO #MyTempTable
VALUES ('ssss123ssg99d362sdg')
, ('hey 62q&*^(n43')
, (NULL)
, ('')
, ('hi')
, ('123');
SELECT SomeString
, REPLACE(TRANSLATE(SomeString, REPLACE(TRANSLATE(SomeString, '0123456789', '##########'), '#', ''), REPLICATE('#', LEN(REPLACE(TRANSLATE(SomeString, '0123456789', '##########'), '#', '') + 'x') - 1)), '#', '')
FROM #MyTempTable;
DROP TABLE IF EXISTS #MyTempTable;
Results:
SomeString
(No column name)
ssss123ssg99d362sdg
12399362
hey62q&*^(n43
6243
NULL
NULL
hi
123
123
While the OP wanted to "strip out anything that is not 0-9", the post is also tagged with "substring" and "patindex", and the OP mentioned the concern "not quite getting the syntax correct or something". To address that the requirements note that "all data fields do have the # prior to the number" and to provide an answer that addresses the challenges with substring/patindex, consider the following:
/* A sample select */
;WITH SampleValues AS
( SELECT 'AB ABCDE # 123' [Columnofdirtydata]
UNION ALL SELECT 'AB2: ABC# 123')
SELECT
s.Columnofdirtydata,
f1.pos1,
'['+ f2.substr +']' [InspectOutput]
FROM
SampleValues s
CROSS APPLY (SELECT PATINDEX('%# %',s.Columnofdirtydata) [pos1]) f1
CROSS APPLY (SELECT SUBSTRING(s.Columnofdirtydata, f1.pos1 + LEN('#-'),LEN(s.Columnofdirtydata)) [substr]) f2
/* Using update scenario from OP */
UPDATE t1
SET t1.Columntoupdate = CAST(f2.substr AS INT)
FROM
TableName t1
CROSS APPLY (SELECT PATINDEX('%# %',t1.Columnofdirtydata) [pos1]) f1
CROSS APPLY (SELECT SUBSTRING(t1.Columnofdirtydata, f1.pos1 + LEN('#-'),LEN(t1.Columnofdirtydata)) [substr]) f2
Note that my syntax advice for patindex/substring, is to:
consider using APPLY as a way to temporarily alias results from one function for use as parameters in the next. It's not uncommon to (in ETL, for example) need to parse out parameter/position-based substrings in an updatable column of a staging table. If you need to "debug" and potentially fix some parsing logic, this style will help.
consider using LEN('PatternSample') in your substring logic, to account for reusing this pattern or adjusting it when your source data changes (instead of "+ 1"
SUBSTRING() requires a length parameter, but it can be greater than the length of the string. Therefore, if you are getting "the rest of the string" after the pattern, you can just use "The source length"
DECLARE #STR VARCHAR(400)
DECLARE #specialchars VARCHAR(50) = '%[~,#,#,$,%,&,*,(,),!^?:]%'
SET #STR = '1, 45 4,3 68.00-'
WHILE PATINDEX( #specialchars, #STR ) > 0
---Remove special characters using Replace function
SET #STR = Replace(Replace(REPLACE( #STR, SUBSTRING( #STR, PATINDEX( #specialchars, #STR ), 1 ),''),'-',''), ' ','')
SELECT #STR
SELECT REGEXP_REPLACE( col, '[^[:digit:]]', '' ) AS new_col FROM my_table

How to use regular expression to remove number in MS SQL Server Management Studio?

I have a field in a table containing different IDs for different programmes like this:
ProgrammeID
-----------
Prog201604L
Prog201503L
Pro2015N
Pro2014N
Programme2010
Programme2011
Each programme ID has its meaning. The number in the mid of the string indicates the time or month. It is obvious that Prog201604L and Prog201503L indicate the same programme but in different years (so do the rest). What I want to do is to remove the numbers so after removal the programmeID will be like:
ProgrammeID
-----------
ProgL
ProgL
ProN
ProN
Programme
Programme
Then later I can aggregate this programmes together.
I am currently using SSMS 2012 not sure if there is a sql statement like RegEx. I have been searching for a long time but the solution online are mainly about Oracle and MySQL. What I found is PATINDEX() and it seems to support regular expression. Can anybody tell me how to create a pattern that suits my situation and what kind of statement I should use?
Thanks in advance
If the Number part is always 6 characters below can be used.
DECLARE #ProgrammeID VARCHAR(50) = 'Prog201604L'
SELECT STUFF(#ProgrammeID, PATINDEX( '%[0-9]%', #ProgrammeID), 6, '')
If the numbers are not fixed... to extend above
CREATE TABLE #Programme ( ProgrammeID VARCHAR(50) )
INSERT INTO #Programme
VALUES
('Prog201604L')
,('Pro2015N')
,('Programme2010')
,('Prog2016L')
,('Pro2N')
,('Prog')
,('2010')
SELECT ProgrammeID,
ISNULL(
STUFF(ProgrammeID,
PATINDEX( '%[0-9]%', ProgrammeID), -- get number start index
IIF(PATINDEX( '%[0-9][a-z]%',ProgrammeID)= 0, PATINDEX( '%[0-9]',ProgrammeID), PATINDEX( '%[0-9][a-z]%',ProgrammeID)) + 1 -- get the last number index
- PATINDEX( '%[0-9]%', ProgrammeID), -- get the number character length
'')
,ProgrammeID) -- Where there are no numbers in the string you will get Null, replace it with actual string
AS [Without Numbers]
FROM #Programme
this will handle cases with varying numbers and even string without number.
Hope this helps
You can create a function and pass the value of each row to function
as (just run this query)
Create Function [dbo].[RemoveNonAlphaCharacters](#Temp VarChar(1000))
Returns VarChar(1000)
AS
Begin
Declare #KeepValues as varchar(50)
Set #KeepValues = '%[^a-z]%'
While PatIndex(#KeepValues, #Temp) > 0
Set #Temp = Stuff(#Temp, PatIndex(#KeepValues, #Temp), 1, '')
Return #Temp
End
---Call it like this:
Declare #tbl table (ProgrammeID varchar(20))
insert into #tbl values ('ProgL'),('ProgL'),('ProN'),('ProN'),('Programme'),('Programme')
select * from #tbl
Select dbo.RemoveNonAlphaCharacters(ProgrammeID) from #tbl
How to strip all non-alphabetic characters from string in SQL Server?
Remove numbers from string sql server
One clever option is to take the substring of the ProgrammeID column from the left, until hitting the first number, and concatenate that with the reverse of the substring from the right until hitting the first number:
SELECT
SUBSTRING(ProgrammeID,
1,
PATINDEX('%[0-9]%', ProgrammeID) - 1) +
REVERSE(SUBSTRING(REVERSE(ProgrammeID),
1,
PATINDEX('%[0-9]%', REVERSE(ProgrammeID)) - 1))
FROM yourTable
I have created a user-defined function for SQL Server to remove non-numeric characters in a string expression
We can modify it to remove the opposite, numeric characters from the input string as follows
while patindex('%[0-9]%', #str) > 0
set #str = stuff(#str, patindex('%[0-9]%', #str), 1, '')
return #str
I hope it helps
Alan Burstein wrote an iTVF exactly for this. The function is called PatExclude8K. Here is the function definition (some comments removed):
CREATE FUNCTION dbo.PatExclude8K
(
#String VARCHAR(8000),
#Pattern VARCHAR(50)
)
/*******************************************************************************
Purpose:
Given a string (#String) and a pattern (#Pattern) of characters to remove,
remove the patterned characters from the string.
*******************************************************************************/
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
WITH
E1(N) AS (SELECT N FROM (VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL)) AS X(N)),
itally(N) AS
(
SELECT TOP(CONVERT(INT,LEN(#String),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM E1 T1 CROSS JOIN E1 T2 CROSS JOIN E1 T3 CROSS JOIN E1 T4
)
SELECT NewString =
((
SELECT SUBSTRING(#String,N,1)
FROM iTally
WHERE 0 = PATINDEX(#Pattern,SUBSTRING(#String COLLATE Latin1_General_BIN,N,1))
FOR XML PATH(''),TYPE
).value('.[1]','varchar(8000)'));
GO
And here is how you would use it:
SELECT *
FROM #Programme p
CROSS APPLY dbo.PatExclude8K(p.ProgrammeID, '[0-9]');
Using your sample data, here is the result:
ProgrammeID NewString
-------------------- -----------------
Prog201604L ProgL
Prog201503L ProgL
Pro2015N ProN
Pro2014N ProN
Programme2010 Programme
Programme2011 Programme
I created this solution building on a solution to extracting values from a comma separated list inside a string.
It seems to work find and even be a bit more effective than using while - I will be happy for feedback about that assumption, though.
On on table with 461.358 rows it takes 3 minutes and 27 seconds to do this (0.44 ms per row) (I put it into a function).
select count(*)
from Mytable
where dbo.StripNumeric(inputFromUser) is null
Here's the solutions
For stripping away numeric:
declare #input nvarchar(max) = null
select #input = '1a2 3b4' + char(13) + char(10) + '5(678)*90c'
DECLARE #output nvarchar(max) = '';
WITH cte AS
(
SELECT cast(1 as int) as [index]
UNION ALL
SELECT [index]+ 1 as [index]
from cte
where [index] < len(#input)
)
select #output = iif(PATINDEX('%[0-9]%', substring(#input, [index], 1))= 1, #output, #output + substring(#input, [index], 1))
from cte;
select iif(COALESCE( #output, '') = '', null, ltrim(rtrim(#output)))
For stripping away non-numeric:
declare #input nvarchar(max) = null
select #input = '1a2 3b4' + char(13) + char(10) + '5(678)*90c'
DECLARE #output nvarchar(max) = '';
WITH cte AS
(
SELECT cast(1 as int) as [index]
UNION ALL
SELECT [index]+ 1 as [index]
from cte
where [index] < len(#input) --len(substring(#input, index, 1)) >
)
select #output = iif(PATINDEX('%[0-9]%', substring(#input, [index], 1))= 1, #output + substring(#input, [index], 1), #output)
from cte;
select iif(COALESCE( #output, '') = '', null, ltrim(rtrim(#output)))

How to parse a string and create several columns from it?

I have a varchar(max) field containing Name Value pairs, in every line I have Name UnderScore Value.
I need to do a query against it so that it returns the Name, Value pairs in two columns (so by parsing the text, removing the underscore and the "new line" char.
So from this
select NameValue from Table
where I get this text:
Name1_Value1
Name2_Value2
Name3_Value3
I would like to have this output
Names Values
===== ======
Name1 Value1
Name2 Value2
Name3 Value3
SELECT substring(NameValue, 1, charindex('_', NameValue)-1) AS Names,
substring(NameValue, charindex('_', NameValue)+1, LEN(NameValue)) AS Values
FROM Table
EDIT:
Something like this put in a function or stored procedure combined with a temp table should work for more than one line, depending on the line delimiter you should also remove CHAR(13) before you start:
DECLARE #helper varchar(512)
DECLARE #current varchar(512)
SET #helper = NAMEVALUE
WHILE CHARINDEX(CHAR(10), #helper) > 0 BEGIN
SET #current = SUBSTRING(#helper, 1, CHARINDEX(CHAR(10), #helper)-1)
SELECT SUBSTRING(#current, 1, CHARINDEX('_', #current)-1) AS Names,
SUBSTRING(#current, CHARINDEX('_', #current)+1, LEN(#current)) AS Names
SET #helper = SUBSTRING(#helper, CHARINDEX(CHAR(10), #helper)+1, LEN(#helper))
END
SELECT SUBSTRING(#helper, 1, CHARINDEX('_', #helper)-1) AS Names,
SUBSTRING(#helper, CHARINDEX('_', #helper)+1, LEN(#helper)) AS Names
DECLARE #TExt NVARCHAR(MAX)= '***[ddd]***
dfdf
fdfdfdfdfdf
***[fff]***
4545445
45454
***[ahaASSDAD]***
DFDFDF
***[SOME TEXT]***
'
DECLARE #Delimiter VARCHAR(1000)= CHAR(13) + CHAR(10) ;
WITH numbers
AS ( SELECT ROW_NUMBER() OVER ( ORDER BY o.object_id, o2.object_id ) Number
FROM sys.objects o
CROSS JOIN sys.objects o2
),
c AS ( SELECT Number CHARBegin ,
ROW_NUMBER() OVER ( ORDER BY number ) RN
FROM numbers
WHERE SUBSTRING(#text, Number, LEN(#Delimiter)) = #Delimiter
),
res
AS ( SELECT CHARBegin ,
CAST(LEFT(#text, charbegin) AS NVARCHAR(MAX)) Res ,
RN
FROM c
WHERE rn = 1
UNION ALL
SELECT c.CHARBegin ,
CAST(SUBSTRING(#text, res.CHARBegin,
c.CHARBegin - res.CHARBegin) AS NVARCHAR(MAX)) ,
c.RN
FROM c
JOIN res ON c.RN = res.RN + 1
)
SELECT *
FROM res
He is an example that you can use:
-- Creating table:
create table demo (dID int, dRec varchar(100));
-- Inserting records:
insert into demo (dID, dRec) values (1, 'BCQP1 Sam');
insert into demo (dID, dRec) values (2, 'BCQP2 LD');
-- Selecting fields to retrive records:
select * from demo;
Then I want to show in one single row both rows combined and display only the values from the left removing the name on the right side up to the space character.
/*
The STUFF() function puts a string in another string, from an initial position.
The LEFT() function returns the left part of a character string with the specified number of characters.
The CHARINDEX() string function returns the starting position of the specified expression in a character string.
*/
SELECT
DISTINCT
STUFF((SELECT ' ' + LEFT(dt1.dRec, charindex(' ', dt1.dRec) - 1)
FROM demo dt1
ORDER BY dRec
FOR XML PATH('')), 1, 1, '') [Convined values]
FROM demo dt2
--
GROUP BY dt2.dID, dt2.dRec
ORDER BY 1
As you can see here when you run the function the output will be:
BCQP1 BCQP2
On the top of the script I explained what each function is used for (STUFF(), LEFT(), CHARINDEX() functions) I also used DISTINCT in order to eliminate duplicate values.
NOTE: dt stands for "demo table", I used the same table and use two alias dt1 and dt2, and dRec stands for "demo Record"
If you want to learn more about STUFF() Function here is a link:
https://www.mssqltips.com/sqlservertip/2914/rolling-up-multiple-rows-into-a-single-row-and-column-for-sql-server-data/
With a CTE you will have a problem with Recursion if more that 100 items
Msg 530, Level 16, State 1, Line 20 The statement terminated. The
maximum recursion 100 has been exhausted before statement completion.
DECLARE #TExt NVARCHAR(MAX)
SET #TExt = '100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203'
DECLARE #Delimiter VARCHAR(1000)= ',';
WITH numbers
AS ( SELECT ROW_NUMBER() OVER ( ORDER BY o.object_id, o2.object_id ) Number
FROM sys.objects o
CROSS JOIN sys.objects o2
),
c AS ( SELECT Number CHARBegin ,
ROW_NUMBER() OVER ( ORDER BY number ) RN
FROM numbers
WHERE SUBSTRING(#text, Number, LEN(#Delimiter)) = #Delimiter
),
res
AS ( SELECT CHARBegin ,
CAST(LEFT(#text, charbegin) AS NVARCHAR(MAX)) Res ,
RN
FROM c
WHERE rn = 1
UNION ALL
SELECT c.CHARBegin ,
CAST(SUBSTRING(#text, res.CHARBegin,
c.CHARBegin - res.CHARBegin) AS NVARCHAR(MAX)) ,
c.RN
FROM c
JOIN res ON c.RN = res.RN + 1
)
SELECT *
FROM res

Resources