Related
My problem is this; I have a field in a table that contains values like this:
NP
NP;MC;PE
MC;AB;AT;MI;TC;WM
OS
OG
I want to convert these abbreviations to their full name. i.e. NP becomes Nuclear Power, OG becomes Oil and Gas, MI becomes Military etc.
My desired output would be:
Nuclear Power
Nuclear Power;Military;Pesticides
and so on.
I'm creating this as a function. I got it working for just the one abbreviation and then the same for two. However my issue is that I may have 5 abbreviations or 7. I know my current approach is dreadful but cannot figure out how to loop it in the right way.
Please note: I've shortened the list of abbreviations for StackOverflow but there's 25 in total.
Please further note: I did the function bottom up (I don't know why) and got the two value and single value working. I've removed anything I did for values over 3 as nothing I did worked.
ALTER FUNCTION [dbo].[get_str_full]
(
-- Add the parameters for the function here
#str_input VARCHAR(250)
)
RETURNS VARCHAR(250)
AS
BEGIN
-- Declare the return variable here
DECLARE #Result VARCHAR(250)
DECLARE #TEMPSTRING VARCHAR(250)
DECLARE #TEMPSTRING_RIGHT AS VARCHAR(250)
-- DECLARE #PI_COUNT BIGINT
DECLARE #COUNTER INT
DECLARE #TOTAL_VALS BIGINT
DECLARE #STRING_ST VARCHAR(250)
DECLARE #POS_STR BIGINT
DECLARE #REMAINING_STR VARCHAR(250)
-- Used for easy loop skips
DECLARE #LEFTSKIP AS BIGINT
SET #LEFTSKIP = 1
SET #Result = #str_input
SET #STRING_ST = #Result
SET #COUNTER = (LEN(#Result) - LEN(REPLACE(#Result,';',''))) + 1
SET #TOTAL_VALS = (LEN(#Result) - LEN(REPLACE(#Result,';',''))) + 1
-- If the string has a semicolon then there's more than one PI value
IF CHARINDEX(';', #Result) > 0
BEGIN
WHILE #COUNTER > 0
BEGIN
IF #TOTAL_VALS >= 3 -- If counter is more than 2 then there's three or more
BEGIN
DECLARE #TEMP_VAL BIGINT
SET #TEMP_VAL = 5
END
ELSE IF #TOTAL_VALS = 2-- Theres 2
BEGIN
-- Do left two chars first
IF #LEFTSKIP = 1
BEGIN
SET #TEMPSTRING = LEFT(#Result, 2)
SELECT #TEMPSTRING = CASE #TEMPSTRING
WHEN 'MC' THEN 'Military Contracting'
WHEN 'NP' THEN 'Nuclear'
WHEN 'OG' THEN 'Oil & Gas'
WHEN 'OS' THEN 'Oil Sands'
WHEN 'PM' THEN 'Palm Oil'
WHEN 'PE' THEN 'Pesticides'
ELSE #TEMPSTRING
END
SET #LEFTSKIP = 2
END
ELSE IF #LEFTSKIP = 2
BEGIN
SET #TEMPSTRING_RIGHT = RIGHT(#Result, 2)
SELECT #TEMPSTRING_RIGHT = CASE #TEMPSTRING_RIGHT
WHEN 'MC' THEN 'Military Contracting'
WHEN 'NP' THEN 'Nuclear'
WHEN 'OG' THEN 'Oil & Gas'
WHEN 'OS' THEN 'Oil Sands'
WHEN 'PM' THEN 'Palm Oil'
WHEN 'PE' THEN 'Pesticides'
ELSE #TEMPSTRING_RIGHT
END
END
END
SET #COUNTER = #COUNTER - 1
END
SET #Result = CONCAT(#TEMPSTRING,';', #TEMPSTRING_RIGHT)
END
ELSE
BEGIN
SET #Result = REPLACE(#Result, 'MC', 'Military Contracting')
SET #Result = REPLACE(#RESULT, 'NP', 'Nuclear Power')
SET #Result = REPLACE(#Result, 'OG', 'Oil & Gas')
SET #Result = REPLACE(#Result, 'OS', 'Oil Sands')
SET #Result = REPLACE(#Result, 'PM', 'Palm Oil')
SET #Result = REPLACE(#Result, 'PE', 'Pesticides')
END
-- Return the result of the function
RETURN #Result
END
First for some easily consumable sample data:
DECLARE #tranlation TABLE(tCode VARCHAR(10), tString VARCHAR(40));
DECLARE #t TABLE(String VARCHAR(1000));
INSERT #t VALUES('PE;N'),('NP'),('NP;MC;PE;XX')
INSERT #tranlation VALUES ('N','Nukes'),('NP','Nuclear Power'),('MC','Military'),
('PE','Pesticides');
Note my updated sample data which includes "XX", which has no match , and an "N" for "Nukes" which would wreck any solution which leverages REPLACE. If you are on SQL 2016+ you can use STRING_SPLIT and STRING_AGG.
SELECT
OldString = t.String,
NewString = STRING_AGG(ISNULL(tx.tString,items.[value]),';')
FROM #t AS t
OUTER APPLY STRING_SPLIT(t.String,';') AS items
LEFT JOIN #tranlation AS tx
ON items.[value] = tx.tCode
GROUP BY t.String ;
Returns:
OldString NewString
----------------- -------------------------------------------
NP Nuclear Power
NP;MC;PE;XX Nuclear Power;Military;Pesticides;XX
PE;N Pesticides;Nukes
You should really fix your table design so that you do not store multiple pieces of info in one column.
If you would like it as a function, I would strongly recommend an inline Table-Valued function rather than a scalar function.
If you have SQL Server version 2017+ you can use STRING_SPLIT and STRING_AGG for this.
CREATE OR ALTER FUNCTION GetFullStr
( #str varchar(250) )
RETURNS TABLE
AS RETURN
(
SELECT STRING_AGG(ISNULL(v.FullStr, s.value), ';') result
FROM STRING_SPLIT(#str, ';') s
LEFT JOIN (VALUES
('MC', 'Military Contracting'),
('NP', 'Nuclear'),
('OG', 'Oil & Gas'),
('OS', 'Oil Sands'),
('PM', 'Palm Oil'),
('PE', 'Pesticides')
) v(Abbr, FullStr) ON v.Abbr = s.value
);
GO
You can, and should, replace the VALUES with a real table.
On 2016 you would need FOR XML PATH instead of STRING_AGG:
CREATE OR ALTER FUNCTION GetFullStr
( #str varchar(250) )
RETURNS TABLE
AS RETURN
(
SELECT STUFF(
(SELECT ';' + ISNULL(v.FullStr, s.value)
FROM STRING_SPLIT(#str, ';') s
LEFT JOIN (VALUES
('MC', 'Military Contracting'),
('NP', 'Nuclear'),
('OG', 'Oil & Gas'),
('OS', 'Oil Sands'),
('PM', 'Palm Oil'),
('PE', 'Pesticides')
) v(Abbr, FullStr) ON v.Abbr = s.value
FOR XML PATH(''), TYPE
).value('text()[1]','varchar(2500)'),
, 1, 1, '')
);
GO
You use it like this:
SELECT s.result AS FullStr
FROM table
OUTER APPLY GetFullStr(value) AS s;
-- alternatively
SELECT (SELECT * FROM GetFullStr(value)) AS FullStr
FROM table;
You could assign your abbreviation mappings to a TABLE variable and then use that for your REPLACE. You could build this into a function, then pass your string values in.
The test below returns Military:Nuclear Power:XX.
declare #mapping table (abbrev varchar(50), fullname varchar(100))
insert into #mapping(abbrev, fullname)
values ('NP','Nuclear Power'),
('MC','Military')
declare #testString varchar(100), #newString varchar(100)
set #teststring = 'MC:NP:XX'
set #newString = #testString
SELECT #newString = REPLACE(#newString, abbrev, fullname) FROM #mapping
select #newString
UPDATE SampleTable
SET Schemaname = #SchemaName,
SchemaCode = #SchemaCode,
ForeignKeyColumn = #ForeignKeyColumn,
IsChildSchema = #IsChildSchema,
ModifiedBy = #ModifiedBy,
ModifiedDate = #ModifiedDate
WHERE
DataSchemaID = #DataSchemaId
My #ForeignKeyColumn parameter is
2233^SITE_CLM_NUMBER,2236^SITE_ID_N,
Can anyone help me in updating ForeignKeyColumn='SITE_CLM_NUMBER' where DataSchemaID=2233 and ForeignKeyColumn='SITE_ID_N' where DataSchemaID=2236
It's easy to pass multiple parameter values to a query, using a Table Valued Parameter. These are available in all versions of SQL Server since 2008.
First, you need to create a Table type with the fields you want:
CREATE TYPE dbo.KeyValueType AS TABLE
( Key int, Value nvarchar(50) )
This allows you to specify a parameter of type KeyValueType with the Key/Value combinations you want, eg #updatedColumns.
You can join the target table with the TVP to update rows with matching DataSchemaID values:
Create Procedure UpdateSchemas(...., #updatedColumns dbo.KeyValueType)
UPDATE SampleTable
SET
Schemaname=#SchemaName
,SchemaCode=#SchemaCode
,ForeignKeyColumn=t.Value
,IsChildSchema=#IsChildSchema
,ModifiedBy=#ModifiedBy
,ModifiedDate=#ModifiedDate
FROM SampleTable
INNER JOIN #updatedColumns t
ON t.ID=DataSchemaID
You can add an SplitString function, like this one :
How to Split String by Character into Separate Columns in SQL Server
CREATE FUNCTION [dbo].[Split]
(
#String varchar(max)
,#Delimiter char
)
RETURNS #Results table
(
Ordinal int
,StringValue varchar(max)
)
as
begin
set #String = isnull(#String,'')
set #Delimiter = isnull(#Delimiter,'')
declare
#TempString varchar(max) = #String
,#Ordinal int = 0
,#CharIndex int = 0
set #CharIndex = charindex(#Delimiter, #TempString)
while #CharIndex != 0 begin
set #Ordinal += 1
insert #Results values
(
#Ordinal
,substring(#TempString, 0, #CharIndex)
)
set #TempString = substring(#TempString, #CharIndex + 1, len(#TempString) - #CharIndex)
set #CharIndex = charindex(#Delimiter, #TempString)
end
if #TempString != '' begin
set #Ordinal += 1
insert #Results values
(
#Ordinal
,#TempString
)
end
return
end
Now you can easily extract each part of your input parameter.
declare #I int;
declare #TMP nvarchar(255);
set #I = 1;
set #TMP = null;
set #TMP = (select StringValue from Split(#ForeignKeyCoumn, ',') where Ordinal = 1);
while #TMP <> null
begin
set #ForeignKeyColumn = (select StringValue from Split(#TMP, '^') where Ordinal = 1);
set #DataSchemaID = (select StringValue from Split(#TMP, '^') where Ordinal = 2);
-- Update here your table with #ForeignKeyColumn and #DataSchemaID values
set #I = #I + 1;
set #TMP = null;
set #TMP = (select StringValue from Split(#ForeignKeyCoumn, ',') where Ordinal = #I);
end
PS: If your are using SQL Server 2016 it already includes an SplitString function, so you won't need to add your own. https://learn.microsoft.com/en-us/sql/t-sql/functions/string-split-transact-sql
SELECT REPLACE('<strong>100</strong><b>.00 GB', '%^(^-?\d*\.{0,1}\d+$)%', '');
I want to replace any markup between two parts of the number with above regex, but it does not seem to work. I'm not sure if it is regex syntax that's wrong because I tried simpler one such as '%[^0-9]%' just to test but it didn't work either. Does anyone know how can I achieve this?
You can use PATINDEX
to find the first index of the pattern (string's) occurrence. Then use STUFF to stuff another string into the pattern(string) matched.
Loop through each row. Replace each illegal characters with what you want. In your case replace non numeric with blank. The inner loop is if you have more than one illegal character in a current cell that of the loop.
DECLARE #counter int
SET #counter = 0
WHILE(#counter < (SELECT MAX(ID_COLUMN) FROM Table))
BEGIN
WHILE 1 = 1
BEGIN
DECLARE #RetVal varchar(50)
SET #RetVal = (SELECT Column = STUFF(Column, PATINDEX('%[^0-9.]%', Column),1, '')
FROM Table
WHERE ID_COLUMN = #counter)
IF(#RetVal IS NOT NULL)
UPDATE Table SET
Column = #RetVal
WHERE ID_COLUMN = #counter
ELSE
break
END
SET #counter = #counter + 1
END
Caution: This is slow though! Having a varchar column may impact. So using LTRIM RTRIM may help a bit. Regardless, it is slow.
Credit goes to this StackOverFlow answer.
EDIT
Credit also goes to #srutzky
Edit (by #Tmdean)
Instead of doing one row at a time, this answer can be adapted to a more set-based solution. It still iterates the max of the number of non-numeric characters in a single row, so it's not ideal, but I think it should be acceptable in most situations.
WHILE 1 = 1 BEGIN
WITH q AS
(SELECT ID_Column, PATINDEX('%[^0-9.]%', Column) AS n
FROM Table)
UPDATE Table
SET Column = STUFF(Column, q.n, 1, '')
FROM q
WHERE Table.ID_Column = q.ID_Column AND q.n != 0;
IF ##ROWCOUNT = 0 BREAK;
END;
You can also improve efficiency quite a lot if you maintain a bit column in the table that indicates whether the field has been scrubbed yet. (NULL represents "Unknown" in my example and should be the column default.)
DECLARE #done bit = 0;
WHILE #done = 0 BEGIN
WITH q AS
(SELECT ID_Column, PATINDEX('%[^0-9.]%', Column) AS n
FROM Table
WHERE COALESCE(Scrubbed_Column, 0) = 0)
UPDATE Table
SET Column = STUFF(Column, q.n, 1, ''),
Scrubbed_Column = 0
FROM q
WHERE Table.ID_Column = q.ID_Column AND q.n != 0;
IF ##ROWCOUNT = 0 SET #done = 1;
-- if Scrubbed_Column is still NULL, then the PATINDEX
-- must have given 0
UPDATE table
SET Scrubbed_Column = CASE
WHEN Scrubbed_Column IS NULL THEN 1
ELSE NULLIF(Scrubbed_Column, 0)
END;
END;
If you don't want to change your schema, this is easy to adapt to store intermediate results in a table valued variable which gets applied to the actual table at the end.
Instead of stripping out the found character by its sole position, using Replace(Column, BadFoundCharacter, '') could be substantially faster. Additionally, instead of just replacing the one bad character found next in each column, this replaces all those found.
WHILE 1 = 1 BEGIN
UPDATE dbo.YourTable
SET Column = Replace(Column, Substring(Column, PatIndex('%[^0-9.-]%', Column), 1), '')
WHERE Column LIKE '%[^0-9.-]%'
If ##RowCount = 0 BREAK;
END;
I am convinced this will work better than the accepted answer, if only because it does fewer operations. There are other ways that might also be faster, but I don't have time to explore those right now.
In a general sense, SQL Server does not support regular expressions and you cannot use them in the native T-SQL code.
You could write a CLR function to do that. See here, for example.
For those looking for a performant and easy solution and are willing to enable CLR:
CREATE database TestSQLFunctions
go
use TestSQLFunctions
go
ALTER database TestSQLFunctions set trustworthy on
EXEC sp_configure 'clr enabled', 1
RECONFIGURE WITH OVERRIDE
go
CREATE ASSEMBLY [SQLFunctions]
AUTHORIZATION [dbo]
FROM 0x4D5A90000300000004000000FFFF0000B800000000000000400000000000000000000000000000000000000000000000000000000000000000000000800000000E1FBA0E00B409CD21B8014CCD21546869732070726F6772616D2063616E6E6F742062652072756E20696E20444F53206D6F64652E0D0D0A2400000000000000504500004C0103004BE8B85F0000000000000000E00022200B013000000800000006000000000000C2270000002000000040000000000010002000000002000004000000000000000600000000000000008000000002000000000000030060850000100000100000000010000010000000000000100000000000000000000000702700004F000000004000009803000000000000000000000000000000000000006000000C00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000200000080000000000000000000000082000004800000000000000000000002E74657874000000C8070000002000000008000000020000000000000000000000000000200000602E72737263000000980300000040000000040000000A0000000000000000000000000000400000402E72656C6F6300000C0000000060000000020000000E00000000000000000000000000004000004200000000000000000000000000000000A4270000000000004800000002000500682000000807000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000003A022D02022A020304281000000A2A1E02281100000A2A0042534A4201000100000000000C00000076342E302E33303331390000000005006C00000018020000237E000084020000CC02000023537472696E6773000000005005000004000000235553005405000010000000234755494400000064050000A401000023426C6F620000000000000002000001471500000900000000FA0133001600000100000013000000020000000200000003000000110000000F00000001000000030000000000CA01010000000000060025014F02060092014F02060044001D020F006F02000006006C00E20106000801E2010600D400E20106007901E20106004501E20106005E01E20106008300E2010600580030020600360030020600B700E20106009E00B0010600AA02DB010A00F300FC010A001F00FC010E00C3027E02000000000100000000000100010001001000C3029D0241000100010050200000000096002E001A0001005F2000000000861817020600040000000100BD0200000200F40100000300B102090017020100110017020600190017020A0029001702100031001702100039001702100041001702100049001702100051001702100059001702100061001702150069001702100071001702100079001702100089001702060099002E001A0081001702060020007B0010012E000B002A002E00130033002E001B0052002E0023005B002E002B006D002E0033006D002E003B006D002E0043005B002E004B0073002E0053006D002E005B006D002E0063008B002E006B00B5002E007300C2000480000001000000000000000000000000009D020000040000000000000000000000210016000000000004000000000000000000000021000A00000000000400000000000000000000002100DB010000000000000000003C4D6F64756C653E0053797374656D2E44617461006D73636F726C696200446174614163636573734B696E64005265706C61636500477569644174747269627574650044656275676761626C6541747472696275746500436F6D56697369626C6541747472696275746500417373656D626C795469746C6541747472696275746500417373656D626C7954726164656D61726B417474726962757465005461726765744672616D65776F726B41747472696275746500417373656D626C7946696C6556657273696F6E41747472696275746500417373656D626C79436F6E66696775726174696F6E4174747269627574650053716C46756E6374696F6E41747472696275746500417373656D626C794465736372697074696F6E41747472696275746500436F6D70696C6174696F6E52656C61786174696F6E7341747472696275746500417373656D626C7950726F6475637441747472696275746500417373656D626C79436F7079726967687441747472696275746500417373656D626C79436F6D70616E794174747269627574650052756E74696D65436F6D7061746962696C6974794174747269627574650053797374656D2E52756E74696D652E56657273696F6E696E670053514C46756E6374696F6E732E646C6C0053797374656D0053797374656D2E5265666C656374696F6E007061747465726E004D6963726F736F66742E53716C5365727665722E536572766572002E63746F720053797374656D2E446961676E6F73746963730053797374656D2E52756E74696D652E496E7465726F7053657276696365730053797374656D2E52756E74696D652E436F6D70696C6572536572766963657300446562756767696E674D6F6465730053797374656D2E546578742E526567756C617245787072657373696F6E730053514C46756E6374696F6E73004F626A656374007265706C6163656D656E7400696E70757400526567657800000000000000003A1617E607071B47B964858BCD87458B00042001010803200001052001011111042001010E04200101020600030E0E0E0E08B77A5C561934E0890801000800000000001E01000100540216577261704E6F6E457863657074696F6E5468726F7773010801000200000000001101000C53514C46756E6374696F6E73000005010000000017010012436F7079726967687420C2A920203230323000002901002434346436386231632D393735312D343938612D396665352D32316666333934303738303900000C010007312E302E302E3000004D01001C2E4E45544672616D65776F726B2C56657273696F6E3D76342E352E320100540E144672616D65776F726B446973706C61794E616D65142E4E4554204672616D65776F726B20342E352E32808F010001005455794D6963726F736F66742E53716C5365727665722E5365727665722E446174614163636573734B696E642C2053797374656D2E446174612C2056657273696F6E3D342E302E302E302C2043756C747572653D6E65757472616C2C205075626C69634B6579546F6B656E3D623737613563353631393334653038390A4461746141636365737301000000000000982700000000000000000000B2270000002000000000000000000000000000000000000000000000A4270000000000000000000000005F436F72446C6C4D61696E006D73636F7265652E646C6C0000000000FF25002000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001001000000018000080000000000000000000000000000001000100000030000080000000000000000000000000000001000000000048000000584000003C03000000000000000000003C0334000000560053005F00560045005200530049004F004E005F0049004E0046004F0000000000BD04EFFE00000100000001000000000000000100000000003F000000000000000400000002000000000000000000000000000000440000000100560061007200460069006C00650049006E0066006F00000000002400040000005400720061006E0073006C006100740069006F006E00000000000000B0049C020000010053007400720069006E006700460069006C00650049006E0066006F0000007802000001003000300030003000300034006200300000001A000100010043006F006D006D0065006E007400730000000000000022000100010043006F006D00700061006E0079004E0061006D006500000000000000000042000D000100460069006C0065004400650073006300720069007000740069006F006E0000000000530051004C00460075006E006300740069006F006E00730000000000300008000100460069006C006500560065007200730069006F006E000000000031002E0030002E0030002E003000000042001100010049006E007400650072006E0061006C004E0061006D0065000000530051004C00460075006E006300740069006F006E0073002E0064006C006C00000000004800120001004C006500670061006C0043006F007000790072006900670068007400000043006F0070007900720069006700680074002000A90020002000320030003200300000002A00010001004C006500670061006C00540072006100640065006D00610072006B00730000000000000000004A00110001004F0072006900670069006E0061006C00460069006C0065006E0061006D0065000000530051004C00460075006E006300740069006F006E0073002E0064006C006C00000000003A000D000100500072006F0064007500630074004E0061006D00650000000000530051004C00460075006E006300740069006F006E00730000000000340008000100500072006F006400750063007400560065007200730069006F006E00000031002E0030002E0030002E003000000038000800010041007300730065006D0062006C0079002000560065007200730069006F006E00000031002E0030002E0030002E0030000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002000000C000000C43700000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
WITH PERMISSION_SET = SAFE
go
CREATE FUNCTION RegexReplace(
#input nvarchar(max),
#pattern nvarchar(max),
#replacement nvarchar(max)
) RETURNS nvarchar (max)
AS EXTERNAL NAME SQLFunctions.[SQLFunctions.Regex].Replace;
go
-- outputs This is a test
SELECT dbo.RegexReplace('This is a test 12345','[0-9]','')
Content of the DLL:
I stumbled across this post looking for something else but thought I'd mention a solution I use which is far more efficient - and really should be the default implementation of any function when used with a set based query - which is to use a cross applied table function. Seems the topic is still active so hopefully this is useful to someone.
Example runtime on a few of the answers so far based on running recursive set based queries or scalar function, based on 1m rows test set removing the chars from a random newid, ranges from 34s to 2m05s for the WHILE loop examples and from 1m3s to {forever} for the function examples.
Using a table function with cross apply achieves the same goal in 10s. You may need to adjust it to suit your needs such as the max length it handles.
Function:
CREATE FUNCTION [dbo].[RemoveChars](#InputUnit VARCHAR(40))
RETURNS TABLE
AS
RETURN
(
WITH Numbers_prep(Number) AS
(
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
)
,Numbers(Number) AS
(
SELECT TOP (ISNULL(LEN(#InputUnit),0))
row_number() OVER (ORDER BY (SELECT NULL))
FROM Numbers_prep a
CROSS JOIN Numbers_prep b
)
SELECT
OutputUnit
FROM
(
SELECT
substring(#InputUnit,Number,1)
FROM Numbers
WHERE substring(#InputUnit,Number,1) like '%[0-9]%'
ORDER BY Number
FOR XML PATH('')
) Sub(OutputUnit)
)
Usage:
UPDATE t
SET column = o.OutputUnit
FROM ##t t
CROSS APPLY [dbo].[RemoveChars](t.column) o
Here is a function I wrote to accomplish this based off of the previous answers.
CREATE FUNCTION dbo.RepetitiveReplace
(
#P_String VARCHAR(MAX),
#P_Pattern VARCHAR(MAX),
#P_ReplaceString VARCHAR(MAX),
#P_ReplaceLength INT = 1
)
RETURNS VARCHAR(MAX)
BEGIN
DECLARE #Index INT;
-- Get starting point of pattern
SET #Index = PATINDEX(#P_Pattern, #P_String);
while #Index > 0
begin
--replace matching charactger at index
SET #P_String = STUFF(#P_String, PATINDEX(#P_Pattern, #P_String), #P_ReplaceLength, #P_ReplaceString);
SET #Index = PATINDEX(#P_Pattern, #P_String);
end
RETURN #P_String;
END;
[Gist][1]
[1]: https://gist.github.com/jkdba/ca13fe8f2a9855c4bdbfd0a5d3dfcda2
Edit:
Originally I had a recursive function here which does not play well with sql server as it has a 32 nesting level limit which would result in an error like the below any time you attempt to make 32+ replacements with the function. Instead of trying to make a server level change to allow more nesting (which could be dangerous like allow never ending loops) switching to a while loop makes a lot more sense.
Maximum stored procedure, function, trigger, or view nesting level exceeded (limit 32).
Wrapping the solution inside a SQL function could be useful if you want to reuse it.
I'm even doing it at the cell level, that's why I'm putting this as a different answer:
CREATE FUNCTION [dbo].[fnReplaceInvalidChars] (#string VARCHAR(300))
RETURNS VARCHAR(300)
BEGIN
DECLARE #str VARCHAR(300) = #string;
DECLARE #Pattern VARCHAR (20) = '%[^a-zA-Z0-9]%';
DECLARE #Len INT;
SELECT #Len = LEN(#String);
WHILE #Len > 0
BEGIN
SET #Len = #Len - 1;
IF (PATINDEX(#Pattern,#str) > 0)
BEGIN
SELECT #str = STUFF(#str, PATINDEX(#Pattern,#str),1,'');
END
ELSE
BEGIN
BREAK;
END
END
RETURN #str
END
A more speedy approach for large strings would look something like this:
CREATE FUNCTION [dbo].[fnReplaceInvalidChars] (#string VARCHAR(MAX))
RETURNS VARCHAR(MAX)
BEGIN
DECLARE #str VARCHAR(MAX) = #string;
DECLARE #Pattern VARCHAR (MAX) = '%[^a-zA-Z0-9]%';
WHILE PATINDEX(#Pattern,#str) > 0
BEGIN
SELECT #str = STUFF(#str, PATINDEX(#Pattern,#str),1,'');
END
RETURN #str
END
I've created this function to clean up a string that contained non numeric characters in a time field. The time contained question marks when they did not added the minutes, something like this 20:??. Function loops through each character and replaces the ? with a 0 :
CREATE FUNCTION [dbo].[CleanTime]
(
-- Add the parameters for the function here
#intime nvarchar(10)
)
RETURNS nvarchar(5)
AS
BEGIN
-- Declare the return variable here
DECLARE #ResultVar nvarchar(5)
DECLARE #char char(1)
-- Add the T-SQL statements to compute the return value here
DECLARE #i int = 1
WHILE #i <= LEN(#intime)
BEGIN
SELECT #char = CASE WHEN substring(#intime,#i,1) like '%[0-9:]%' THEN substring(#intime,#i,1) ELSE '0' END
SELECT #ResultVar = concat(#ResultVar,#char)
set #i = #i + 1
END;
-- Return the result of the function
RETURN #ResultVar
END
I think this solution is faster and simple. I use always CTE/recursive because WHILE is so slow on SQL Server.
I use it in projects I work with and large databases.
/*
Function: dbo.kSql_ReplaceRegExp
Create Date: 20.02.2021
Author: Karcan Ozbal
Description: The given string value will be replaced according to the given regexp/pattern.
Parameter(s): #Value : Value/Text to REPLACE.
#RegExp : The regexp/pattern to be used for REPLACE operation.
Usage: select dbo.kSql_ReplaceRegExp('2T3EST5','%[0-9]%')
Output: 'TEST'
*/
ALTER FUNCTION [dbo].[kSql_ReplaceRegExp](
#Value nvarchar(max),
#RegExp nvarchar(50)
)
RETURNS nvarchar(max)
AS
BEGIN
DECLARE #Result nvarchar(max)
;WITH CTE AS (
SELECT NUM = 1, VALUE = #Value, IDX = PATINDEX(#RegExp, #Value)
UNION ALL
SELECT NUM + 1, VALUE = REPLACE(VALUE, SUBSTRING(VALUE,IDX,1),''), IDX = PATINDEX(#RegExp, REPLACE(VALUE, SUBSTRING(VALUE,IDX,1),''))
FROM CTE
WHERE IDX > 0
)
SELECT TOP(1) #Result = VALUE
FROM CTE
ORDER BY NUM DESC
OPTION (maxrecursion 0)
RETURN #Result
END
If you are doing this just for a parameter coming into a Stored Procedure, you can use the following:
declare #badIndex int
set #badIndex = PatIndex('%[^0-9]%', #Param)
while #badIndex > 0
set #Param = Replace(#Param, Substring(#Param, #badIndex, 1), '')
set #badIndex = PatIndex('%[^0-9]%', #Param)
I thought this was clearer:
ALTER FUNCTION [dbo].[func_ReplaceChars](
#Value nvarchar(max),
#Chars nvarchar(50)
)
RETURNS nvarchar(max)
AS
BEGIN
DECLARE #cLen int = len(#Chars);
DECLARE #curChar int = 0;
WHILE #curChar<#cLen
BEGIN
set #Value = replace(#Value,substring(#Chars,#curChar,1),'');
set #curChar = #curChar + 1;
END;
RETURN #Value
END
I'm using this code similar to several codes above:
DROP FUNCTION [dbo].[fnCleanString]
GO
CREATE FUNCTION [dbo].[fnCleanString] (#input VARCHAR(max), #Pattern
VARCHAR (20))
RETURNS VARCHAR(max)
BEGIN
DECLARE #str VARCHAR(max) = #input;
DECLARE #Len INT;
DECLARE #INDEX INT;
SELECT #Len = LEN(#input);
WHILE #Len > 0
BEGIN
SET #INDEX = PATINDEX(#Pattern,#str);
IF (#INDEX > 0)
BEGIN
SET #str=REPLACE(#str,SUBSTRING(#str,#INDEX, 1), '');
END
ELSE
BEGIN
BREAK;
END
END
RETURN #str
END
You can use it like this:
SELECT CleanName = dbo.[fnCleanString](Name, '%[0-9]%') from YourTable
I think a simpler and faster approach is iterate by each character of the alphabet:
DECLARE #i int
SET #i = 0
WHILE(#i < 256)
BEGIN
IF char(#i) NOT IN ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.')
UPDATE Table SET Column = replace(Column, char(#i), '')
SET #i = #i + 1
END
To begin - I am new at SQL; be gentle.
I work with a school district and have recently been given "the keys" to access the database. I am interested in getting a list of students, and then generating a list of passwords for them. I have found some code which allows me to generate random passwords that I would like to incorporate into a SQL query which is gathering information from our Student database. (Thank you if this code is yours!)
My issue is that I have not been able to use the variables to create a different password for each record. I get the same randomly generated password for each student. (On a good note; at least the password changes each time I execute the query.)
I should mention that I have two accounts set up for the database access; one to simply read the information, and another with full editing rights. (I have only used this once and closed my eyes as I pushed the big red button to update some trivial information).
Results from the first run:
Name Password
JACEK mtwsz2ybu
CARL mtwsz2ybu
LARS mtwsz2ybu
Results from the second run:
Name Password
JACEK je4tm5ptw
CARL je4tm5ptw
LARS je4tm5ptw
This is the query I am running:
USE XXTableXX
DECLARE #position int, #string char(100), #length int,
#rand int, #newstring char(15), #newchar char(15);
SET #position = 1;
SET #string = 'abcdefghijkmnopqrstuvwxyz23456789';
SET #length = 9;
SET #newstring = ''
SET #newchar = ''
WHILE #position <= #length
BEGIN
SET #rand = FLOOR(RAND()*(33-1)+1);
SET #newchar = SUBSTRING(#string,#rand,1);
SET #newstring = STUFF(#newstring,len(#newstring)+1,1,#newchar)
SET #position = #position +1;
END;
SELECT DISTINCT s.firstname, #newstring AS [Password]
FROM XXStudentTableXX s
Put all the stuff above the query in a function and call the function in your query. A side note, it will execute for each row, which means a bit of a performance hit for large sets.
Here is a way to do this.
First, you will need to make the RAND() into it's own view since you can't call it from a function
CREATE VIEW [dbo].[NewRandom]
AS
SELECT RAND() AS [RandSeed]
GO
Now you must create your function that uses the view. This function accepts an integer so you can do variable password lengths. You can hardcode it in your query or take out the parameter and hard code it in the function.
CREATE FUNCTION [dbo].[ufn_GeneratePassword] ( #PasswordLength INT )
RETURNS VARCHAR(20)
AS
BEGIN
DECLARE #position int, #string char(100), #length int,
#rand int, #newstring char(15), #newchar char(15);
SET #position = 1;
SET #string = 'abcdefghijkmnopqrstuvwxyz23456789';
SET #length = #PasswordLength;
SET #newstring = ''
SET #newchar = ''
WHILE #position <= #length
BEGIN
SET #rand = FLOOR((SELECT RandSeed FROM dbo.[NewRandom])*(33-1)+1);
SET #newchar = SUBSTRING(#string,#rand,1);
SET #newstring = STUFF(#newstring,len(#newstring)+1,1,#newchar)
SET #position = #position +1;
END
RETURN #newstring
END
Now you can call the function on every row of your table
SELECT DISTINCT s.firstname, [dbo].[ufn_GeneratePassword](9) AS [Password]
FROM XXStudentTableXX s
I think what you will need to do for this is create a table, or table variable, then instead of selecting make it insert into the table. Also Add a loop that loops 1x for each student ID. This will then insert 1 row for each student and run the randomizer 1 time.
First covert your procedure into a scalar SQL function.
CREATE VIEW dbo.RandomNumberView
AS
SELECT RandomNumber = RAND();
GO
CREATE FUNCTION dbo.GeneratePassword()
RETURNS char(15)
AS
-- Generates and Returns a random password
BEGIN
DECLARE #position int, #string char(100), #length int,
#rand int, #newstring char(15), #newchar char(15);
SET #position = 1;
SET #string = 'abcdefghijkmnopqrstuvwxyz23456789';
SET #length = 9;
SET #newstring = ''
SET #newchar = ''
WHILE #position <= #length
BEGIN
SELECT #rand = FLOOR(RandomNumber *(33-1)+1) FROM RandomNumberView;
SET #newchar = SUBSTRING(#string,#rand,1);
SET #newstring = STUFF(#newstring,len(#newstring)+1,1,#newchar)
SET #position = #position +1;
END;
RETURN #newstring;
END;
GO
And then you can easily use it whatever way you want.
SELECT DISTINCT s.firstname, dbo.GeneratePassword() AS [Password]
FROM XXStudentTableXX s
Note that each time you run this query you would get different password against the same record. I would prefer to use this function to only generate and insert password in the table and not as a direct select statement.
To get the data from a random table, SQL Server provides the NEWID () function. As its name says every execution is generated a new GUID (Global Unique Identifier), these GUIDs are unique, so the value of the order will never be the same.
Therefore you do not need to create a random function that does it for you. So you may have your answer using a simple SQL.
CREATE TABLE STUDENT (
[ID] INT IDENTITY (1, 1) NOT NULL,
[NAME] NVARCHAR (100) NULL
);
INSERT INTO STUDENT (NAME) VALUES ('John');
INSERT INTO STUDENT (NAME) VALUES ('Carl');
INSERT INTO STUDENT (NAME) VALUES ('Mary');
INSERT INTO STUDENT (NAME) VALUES ('Joan');
select
ID
,NAME
,CONCAT(
CAST((ABS(CHECKSUM(NEWID()))%10) as varchar(1))
, CHAR(ASCII('a') + (ABS(CHECKSUM(NEWID())) % 25))
, CHAR(ASCII('A') + (ABS(CHECKSUM(NEWID())) % 25))
, LEFT(LOWER(NEWID()),2)
, LEFT(NEWID(),2)
, LEFT(LOWER(NEWID()),2)
) AS PASSWORD
from STUDENT
SQL Fiddle
However, you may wish to Make your own generator taking into account a high level of complexity generated password.
https://www.simple-talk.com/blogs/2009/09/30/strong-password-generator/
SELECT REPLACE('<strong>100</strong><b>.00 GB', '%^(^-?\d*\.{0,1}\d+$)%', '');
I want to replace any markup between two parts of the number with above regex, but it does not seem to work. I'm not sure if it is regex syntax that's wrong because I tried simpler one such as '%[^0-9]%' just to test but it didn't work either. Does anyone know how can I achieve this?
You can use PATINDEX
to find the first index of the pattern (string's) occurrence. Then use STUFF to stuff another string into the pattern(string) matched.
Loop through each row. Replace each illegal characters with what you want. In your case replace non numeric with blank. The inner loop is if you have more than one illegal character in a current cell that of the loop.
DECLARE #counter int
SET #counter = 0
WHILE(#counter < (SELECT MAX(ID_COLUMN) FROM Table))
BEGIN
WHILE 1 = 1
BEGIN
DECLARE #RetVal varchar(50)
SET #RetVal = (SELECT Column = STUFF(Column, PATINDEX('%[^0-9.]%', Column),1, '')
FROM Table
WHERE ID_COLUMN = #counter)
IF(#RetVal IS NOT NULL)
UPDATE Table SET
Column = #RetVal
WHERE ID_COLUMN = #counter
ELSE
break
END
SET #counter = #counter + 1
END
Caution: This is slow though! Having a varchar column may impact. So using LTRIM RTRIM may help a bit. Regardless, it is slow.
Credit goes to this StackOverFlow answer.
EDIT
Credit also goes to #srutzky
Edit (by #Tmdean)
Instead of doing one row at a time, this answer can be adapted to a more set-based solution. It still iterates the max of the number of non-numeric characters in a single row, so it's not ideal, but I think it should be acceptable in most situations.
WHILE 1 = 1 BEGIN
WITH q AS
(SELECT ID_Column, PATINDEX('%[^0-9.]%', Column) AS n
FROM Table)
UPDATE Table
SET Column = STUFF(Column, q.n, 1, '')
FROM q
WHERE Table.ID_Column = q.ID_Column AND q.n != 0;
IF ##ROWCOUNT = 0 BREAK;
END;
You can also improve efficiency quite a lot if you maintain a bit column in the table that indicates whether the field has been scrubbed yet. (NULL represents "Unknown" in my example and should be the column default.)
DECLARE #done bit = 0;
WHILE #done = 0 BEGIN
WITH q AS
(SELECT ID_Column, PATINDEX('%[^0-9.]%', Column) AS n
FROM Table
WHERE COALESCE(Scrubbed_Column, 0) = 0)
UPDATE Table
SET Column = STUFF(Column, q.n, 1, ''),
Scrubbed_Column = 0
FROM q
WHERE Table.ID_Column = q.ID_Column AND q.n != 0;
IF ##ROWCOUNT = 0 SET #done = 1;
-- if Scrubbed_Column is still NULL, then the PATINDEX
-- must have given 0
UPDATE table
SET Scrubbed_Column = CASE
WHEN Scrubbed_Column IS NULL THEN 1
ELSE NULLIF(Scrubbed_Column, 0)
END;
END;
If you don't want to change your schema, this is easy to adapt to store intermediate results in a table valued variable which gets applied to the actual table at the end.
Instead of stripping out the found character by its sole position, using Replace(Column, BadFoundCharacter, '') could be substantially faster. Additionally, instead of just replacing the one bad character found next in each column, this replaces all those found.
WHILE 1 = 1 BEGIN
UPDATE dbo.YourTable
SET Column = Replace(Column, Substring(Column, PatIndex('%[^0-9.-]%', Column), 1), '')
WHERE Column LIKE '%[^0-9.-]%'
If ##RowCount = 0 BREAK;
END;
I am convinced this will work better than the accepted answer, if only because it does fewer operations. There are other ways that might also be faster, but I don't have time to explore those right now.
In a general sense, SQL Server does not support regular expressions and you cannot use them in the native T-SQL code.
You could write a CLR function to do that. See here, for example.
For those looking for a performant and easy solution and are willing to enable CLR:
CREATE database TestSQLFunctions
go
use TestSQLFunctions
go
ALTER database TestSQLFunctions set trustworthy on
EXEC sp_configure 'clr enabled', 1
RECONFIGURE WITH OVERRIDE
go
CREATE ASSEMBLY [SQLFunctions]
AUTHORIZATION [dbo]
FROM 0x4D5A90000300000004000000FFFF0000B800000000000000400000000000000000000000000000000000000000000000000000000000000000000000800000000E1FBA0E00B409CD21B8014CCD21546869732070726F6772616D2063616E6E6F742062652072756E20696E20444F53206D6F64652E0D0D0A2400000000000000504500004C0103004BE8B85F0000000000000000E00022200B013000000800000006000000000000C2270000002000000040000000000010002000000002000004000000000000000600000000000000008000000002000000000000030060850000100000100000000010000010000000000000100000000000000000000000702700004F000000004000009803000000000000000000000000000000000000006000000C00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000200000080000000000000000000000082000004800000000000000000000002E74657874000000C8070000002000000008000000020000000000000000000000000000200000602E72737263000000980300000040000000040000000A0000000000000000000000000000400000402E72656C6F6300000C0000000060000000020000000E00000000000000000000000000004000004200000000000000000000000000000000A4270000000000004800000002000500682000000807000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000003A022D02022A020304281000000A2A1E02281100000A2A0042534A4201000100000000000C00000076342E302E33303331390000000005006C00000018020000237E000084020000CC02000023537472696E6773000000005005000004000000235553005405000010000000234755494400000064050000A401000023426C6F620000000000000002000001471500000900000000FA0133001600000100000013000000020000000200000003000000110000000F00000001000000030000000000CA01010000000000060025014F02060092014F02060044001D020F006F02000006006C00E20106000801E2010600D400E20106007901E20106004501E20106005E01E20106008300E2010600580030020600360030020600B700E20106009E00B0010600AA02DB010A00F300FC010A001F00FC010E00C3027E02000000000100000000000100010001001000C3029D0241000100010050200000000096002E001A0001005F2000000000861817020600040000000100BD0200000200F40100000300B102090017020100110017020600190017020A0029001702100031001702100039001702100041001702100049001702100051001702100059001702100061001702150069001702100071001702100079001702100089001702060099002E001A0081001702060020007B0010012E000B002A002E00130033002E001B0052002E0023005B002E002B006D002E0033006D002E003B006D002E0043005B002E004B0073002E0053006D002E005B006D002E0063008B002E006B00B5002E007300C2000480000001000000000000000000000000009D020000040000000000000000000000210016000000000004000000000000000000000021000A00000000000400000000000000000000002100DB010000000000000000003C4D6F64756C653E0053797374656D2E44617461006D73636F726C696200446174614163636573734B696E64005265706C61636500477569644174747269627574650044656275676761626C6541747472696275746500436F6D56697369626C6541747472696275746500417373656D626C795469746C6541747472696275746500417373656D626C7954726164656D61726B417474726962757465005461726765744672616D65776F726B41747472696275746500417373656D626C7946696C6556657273696F6E41747472696275746500417373656D626C79436F6E66696775726174696F6E4174747269627574650053716C46756E6374696F6E41747472696275746500417373656D626C794465736372697074696F6E41747472696275746500436F6D70696C6174696F6E52656C61786174696F6E7341747472696275746500417373656D626C7950726F6475637441747472696275746500417373656D626C79436F7079726967687441747472696275746500417373656D626C79436F6D70616E794174747269627574650052756E74696D65436F6D7061746962696C6974794174747269627574650053797374656D2E52756E74696D652E56657273696F6E696E670053514C46756E6374696F6E732E646C6C0053797374656D0053797374656D2E5265666C656374696F6E007061747465726E004D6963726F736F66742E53716C5365727665722E536572766572002E63746F720053797374656D2E446961676E6F73746963730053797374656D2E52756E74696D652E496E7465726F7053657276696365730053797374656D2E52756E74696D652E436F6D70696C6572536572766963657300446562756767696E674D6F6465730053797374656D2E546578742E526567756C617245787072657373696F6E730053514C46756E6374696F6E73004F626A656374007265706C6163656D656E7400696E70757400526567657800000000000000003A1617E607071B47B964858BCD87458B00042001010803200001052001011111042001010E04200101020600030E0E0E0E08B77A5C561934E0890801000800000000001E01000100540216577261704E6F6E457863657074696F6E5468726F7773010801000200000000001101000C53514C46756E6374696F6E73000005010000000017010012436F7079726967687420C2A920203230323000002901002434346436386231632D393735312D343938612D396665352D32316666333934303738303900000C010007312E302E302E3000004D01001C2E4E45544672616D65776F726B2C56657273696F6E3D76342E352E320100540E144672616D65776F726B446973706C61794E616D65142E4E4554204672616D65776F726B20342E352E32808F010001005455794D6963726F736F66742E53716C5365727665722E5365727665722E446174614163636573734B696E642C2053797374656D2E446174612C2056657273696F6E3D342E302E302E302C2043756C747572653D6E65757472616C2C205075626C69634B6579546F6B656E3D623737613563353631393334653038390A4461746141636365737301000000000000982700000000000000000000B2270000002000000000000000000000000000000000000000000000A4270000000000000000000000005F436F72446C6C4D61696E006D73636F7265652E646C6C0000000000FF25002000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001001000000018000080000000000000000000000000000001000100000030000080000000000000000000000000000001000000000048000000584000003C03000000000000000000003C0334000000560053005F00560045005200530049004F004E005F0049004E0046004F0000000000BD04EFFE00000100000001000000000000000100000000003F000000000000000400000002000000000000000000000000000000440000000100560061007200460069006C00650049006E0066006F00000000002400040000005400720061006E0073006C006100740069006F006E00000000000000B0049C020000010053007400720069006E006700460069006C00650049006E0066006F0000007802000001003000300030003000300034006200300000001A000100010043006F006D006D0065006E007400730000000000000022000100010043006F006D00700061006E0079004E0061006D006500000000000000000042000D000100460069006C0065004400650073006300720069007000740069006F006E0000000000530051004C00460075006E006300740069006F006E00730000000000300008000100460069006C006500560065007200730069006F006E000000000031002E0030002E0030002E003000000042001100010049006E007400650072006E0061006C004E0061006D0065000000530051004C00460075006E006300740069006F006E0073002E0064006C006C00000000004800120001004C006500670061006C0043006F007000790072006900670068007400000043006F0070007900720069006700680074002000A90020002000320030003200300000002A00010001004C006500670061006C00540072006100640065006D00610072006B00730000000000000000004A00110001004F0072006900670069006E0061006C00460069006C0065006E0061006D0065000000530051004C00460075006E006300740069006F006E0073002E0064006C006C00000000003A000D000100500072006F0064007500630074004E0061006D00650000000000530051004C00460075006E006300740069006F006E00730000000000340008000100500072006F006400750063007400560065007200730069006F006E00000031002E0030002E0030002E003000000038000800010041007300730065006D0062006C0079002000560065007200730069006F006E00000031002E0030002E0030002E0030000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002000000C000000C43700000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
WITH PERMISSION_SET = SAFE
go
CREATE FUNCTION RegexReplace(
#input nvarchar(max),
#pattern nvarchar(max),
#replacement nvarchar(max)
) RETURNS nvarchar (max)
AS EXTERNAL NAME SQLFunctions.[SQLFunctions.Regex].Replace;
go
-- outputs This is a test
SELECT dbo.RegexReplace('This is a test 12345','[0-9]','')
Content of the DLL:
I stumbled across this post looking for something else but thought I'd mention a solution I use which is far more efficient - and really should be the default implementation of any function when used with a set based query - which is to use a cross applied table function. Seems the topic is still active so hopefully this is useful to someone.
Example runtime on a few of the answers so far based on running recursive set based queries or scalar function, based on 1m rows test set removing the chars from a random newid, ranges from 34s to 2m05s for the WHILE loop examples and from 1m3s to {forever} for the function examples.
Using a table function with cross apply achieves the same goal in 10s. You may need to adjust it to suit your needs such as the max length it handles.
Function:
CREATE FUNCTION [dbo].[RemoveChars](#InputUnit VARCHAR(40))
RETURNS TABLE
AS
RETURN
(
WITH Numbers_prep(Number) AS
(
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
)
,Numbers(Number) AS
(
SELECT TOP (ISNULL(LEN(#InputUnit),0))
row_number() OVER (ORDER BY (SELECT NULL))
FROM Numbers_prep a
CROSS JOIN Numbers_prep b
)
SELECT
OutputUnit
FROM
(
SELECT
substring(#InputUnit,Number,1)
FROM Numbers
WHERE substring(#InputUnit,Number,1) like '%[0-9]%'
ORDER BY Number
FOR XML PATH('')
) Sub(OutputUnit)
)
Usage:
UPDATE t
SET column = o.OutputUnit
FROM ##t t
CROSS APPLY [dbo].[RemoveChars](t.column) o
Here is a function I wrote to accomplish this based off of the previous answers.
CREATE FUNCTION dbo.RepetitiveReplace
(
#P_String VARCHAR(MAX),
#P_Pattern VARCHAR(MAX),
#P_ReplaceString VARCHAR(MAX),
#P_ReplaceLength INT = 1
)
RETURNS VARCHAR(MAX)
BEGIN
DECLARE #Index INT;
-- Get starting point of pattern
SET #Index = PATINDEX(#P_Pattern, #P_String);
while #Index > 0
begin
--replace matching charactger at index
SET #P_String = STUFF(#P_String, PATINDEX(#P_Pattern, #P_String), #P_ReplaceLength, #P_ReplaceString);
SET #Index = PATINDEX(#P_Pattern, #P_String);
end
RETURN #P_String;
END;
[Gist][1]
[1]: https://gist.github.com/jkdba/ca13fe8f2a9855c4bdbfd0a5d3dfcda2
Edit:
Originally I had a recursive function here which does not play well with sql server as it has a 32 nesting level limit which would result in an error like the below any time you attempt to make 32+ replacements with the function. Instead of trying to make a server level change to allow more nesting (which could be dangerous like allow never ending loops) switching to a while loop makes a lot more sense.
Maximum stored procedure, function, trigger, or view nesting level exceeded (limit 32).
Wrapping the solution inside a SQL function could be useful if you want to reuse it.
I'm even doing it at the cell level, that's why I'm putting this as a different answer:
CREATE FUNCTION [dbo].[fnReplaceInvalidChars] (#string VARCHAR(300))
RETURNS VARCHAR(300)
BEGIN
DECLARE #str VARCHAR(300) = #string;
DECLARE #Pattern VARCHAR (20) = '%[^a-zA-Z0-9]%';
DECLARE #Len INT;
SELECT #Len = LEN(#String);
WHILE #Len > 0
BEGIN
SET #Len = #Len - 1;
IF (PATINDEX(#Pattern,#str) > 0)
BEGIN
SELECT #str = STUFF(#str, PATINDEX(#Pattern,#str),1,'');
END
ELSE
BEGIN
BREAK;
END
END
RETURN #str
END
A more speedy approach for large strings would look something like this:
CREATE FUNCTION [dbo].[fnReplaceInvalidChars] (#string VARCHAR(MAX))
RETURNS VARCHAR(MAX)
BEGIN
DECLARE #str VARCHAR(MAX) = #string;
DECLARE #Pattern VARCHAR (MAX) = '%[^a-zA-Z0-9]%';
WHILE PATINDEX(#Pattern,#str) > 0
BEGIN
SELECT #str = STUFF(#str, PATINDEX(#Pattern,#str),1,'');
END
RETURN #str
END
I've created this function to clean up a string that contained non numeric characters in a time field. The time contained question marks when they did not added the minutes, something like this 20:??. Function loops through each character and replaces the ? with a 0 :
CREATE FUNCTION [dbo].[CleanTime]
(
-- Add the parameters for the function here
#intime nvarchar(10)
)
RETURNS nvarchar(5)
AS
BEGIN
-- Declare the return variable here
DECLARE #ResultVar nvarchar(5)
DECLARE #char char(1)
-- Add the T-SQL statements to compute the return value here
DECLARE #i int = 1
WHILE #i <= LEN(#intime)
BEGIN
SELECT #char = CASE WHEN substring(#intime,#i,1) like '%[0-9:]%' THEN substring(#intime,#i,1) ELSE '0' END
SELECT #ResultVar = concat(#ResultVar,#char)
set #i = #i + 1
END;
-- Return the result of the function
RETURN #ResultVar
END
I think this solution is faster and simple. I use always CTE/recursive because WHILE is so slow on SQL Server.
I use it in projects I work with and large databases.
/*
Function: dbo.kSql_ReplaceRegExp
Create Date: 20.02.2021
Author: Karcan Ozbal
Description: The given string value will be replaced according to the given regexp/pattern.
Parameter(s): #Value : Value/Text to REPLACE.
#RegExp : The regexp/pattern to be used for REPLACE operation.
Usage: select dbo.kSql_ReplaceRegExp('2T3EST5','%[0-9]%')
Output: 'TEST'
*/
ALTER FUNCTION [dbo].[kSql_ReplaceRegExp](
#Value nvarchar(max),
#RegExp nvarchar(50)
)
RETURNS nvarchar(max)
AS
BEGIN
DECLARE #Result nvarchar(max)
;WITH CTE AS (
SELECT NUM = 1, VALUE = #Value, IDX = PATINDEX(#RegExp, #Value)
UNION ALL
SELECT NUM + 1, VALUE = REPLACE(VALUE, SUBSTRING(VALUE,IDX,1),''), IDX = PATINDEX(#RegExp, REPLACE(VALUE, SUBSTRING(VALUE,IDX,1),''))
FROM CTE
WHERE IDX > 0
)
SELECT TOP(1) #Result = VALUE
FROM CTE
ORDER BY NUM DESC
OPTION (maxrecursion 0)
RETURN #Result
END
If you are doing this just for a parameter coming into a Stored Procedure, you can use the following:
declare #badIndex int
set #badIndex = PatIndex('%[^0-9]%', #Param)
while #badIndex > 0
set #Param = Replace(#Param, Substring(#Param, #badIndex, 1), '')
set #badIndex = PatIndex('%[^0-9]%', #Param)
I thought this was clearer:
ALTER FUNCTION [dbo].[func_ReplaceChars](
#Value nvarchar(max),
#Chars nvarchar(50)
)
RETURNS nvarchar(max)
AS
BEGIN
DECLARE #cLen int = len(#Chars);
DECLARE #curChar int = 0;
WHILE #curChar<#cLen
BEGIN
set #Value = replace(#Value,substring(#Chars,#curChar,1),'');
set #curChar = #curChar + 1;
END;
RETURN #Value
END
I'm using this code similar to several codes above:
DROP FUNCTION [dbo].[fnCleanString]
GO
CREATE FUNCTION [dbo].[fnCleanString] (#input VARCHAR(max), #Pattern
VARCHAR (20))
RETURNS VARCHAR(max)
BEGIN
DECLARE #str VARCHAR(max) = #input;
DECLARE #Len INT;
DECLARE #INDEX INT;
SELECT #Len = LEN(#input);
WHILE #Len > 0
BEGIN
SET #INDEX = PATINDEX(#Pattern,#str);
IF (#INDEX > 0)
BEGIN
SET #str=REPLACE(#str,SUBSTRING(#str,#INDEX, 1), '');
END
ELSE
BEGIN
BREAK;
END
END
RETURN #str
END
You can use it like this:
SELECT CleanName = dbo.[fnCleanString](Name, '%[0-9]%') from YourTable
I think a simpler and faster approach is iterate by each character of the alphabet:
DECLARE #i int
SET #i = 0
WHILE(#i < 256)
BEGIN
IF char(#i) NOT IN ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.')
UPDATE Table SET Column = replace(Column, char(#i), '')
SET #i = #i + 1
END