Search for a common prefix between two strings - sql-server

I have a Stored Procedure:
ALTER PROCEDURE [dbo].[traxs_Paybook_Data_Validate]
#session_id varchar(30)
#paybook_start_number varchar(30)
#paybook_end_number varchar(30)
AS
UPDATE traxs_temp..__PaybookImport SET
/* BEGIN CHANGE */
prefix = null,
start_number = CAST(#paybook_start_number AS int),
end_number = CAST(#paybook_end_number AS int)
/* END CHANGE */
WHERE
session_id = #session_id
Values were like:
#paybook_start_number = 100
#paybook_end_number = 200
Now paybook numbers can have a prefix, i.e:
#paybook_start_number = ABC100
#paybook_end_number = ABC200
Prefix is not always the same, neither is its length. I need to find the prefix if one, store it into prefix and remove it from paybook numbers before casting them.
Thanks

Try this:
DECLARE #z VARCHAR(32) = 'ukasd10';
SELECT LEFT(#z, PATINDEX('%[0-9]%', #z) - 1) AS Prefix,REPLACE(SUBSTRING(#z, PATINDEX('%[0-9]%', #z), LEN(#z)), ',', '') AS Digits
and likewise use this logic to update the column Prefix....
Thanks

You need a Function to extract Number/Numeric value from your input string and a function to extract Alphabets from the Input string.
Function To Extract Numbers
CREATE FUNCTION dbo.fn_Extract_Numbers
(
#string NVARCHAR(100)
)
RETURNS INT
AS
BEGIN
DECLARE #int_Value INT;
SELECT #int_Value = LEFT(subsrt, PATINDEX('%[^0-9]%', subsrt + 't') - 1)
FROM (
SELECT subsrt = SUBSTRING(#string, pos, LEN(#string))
FROM (
SELECT #string AS string , PATINDEX('%[0-9]%', #string) AS Pos
) d
) t
RETURN #int_Value;
END
Function To Extract Alphabets
CREATE FUNCTION dbo.fn_Extract_Alphabets
(
#string NVARCHAR(100)
)
RETURNS NVARCHAR(100)
AS
BEGIN
DECLARE #Alpha_Value NVARCHAR(100);
SELECT #Alpha_Value = LEFT(subsrt, PATINDEX('%[^a-z]%', subsrt + 't') - 1)
FROM (
SELECT subsrt = SUBSTRING(#string, pos, LEN(#string))
FROM (
SELECT #string AS string , PATINDEX('%[a-z]%', #string) AS Pos
) d
) t
RETURN #Alpha_Value;
END
Now use these functions inside your stored procedure to extract the Alphabet/Prefix bit and the Number bit and store them in the target columns.
Something like....
ALTER PROCEDURE [dbo].[traxs_Paybook_Data_Validate]
#session_id varchar(30)
#paybook_start_number varchar(30)
#paybook_end_number varchar(30)
AS
DECLARE #Start_Num_Prefix VARCHAR(100);
DECLARE #End_Num_Prefix VARCHAR(100);
DECLARE #Start_Num_Numbers INT;
DECLARE #End_Num_Numbers INT;
SELECT #Start_Num_Prefix = dbo.fn_Extract_Alphabets(#paybook_start_number)
SELECT #End_Num_Prefix = dbo.fn_Extract_Alphabets(#paybook_end_number)
SELECT #Start_Num_Numbers = dbo.fn_Extract_Numbers(#paybook_start_number)
SELECT #End_Num_Numbers = dbo.fn_Extract_Numbers(#paybook_end_number)
..... rest of your procedure and so on....

Related

How to iterate over a string of varying length, replacing different abbreviations with their full text. All abbreviations separated by a semicolon

My problem is this; I have a field in a table that contains values like this:
NP
NP;MC;PE
MC;AB;AT;MI;TC;WM
OS
OG
I want to convert these abbreviations to their full name. i.e. NP becomes Nuclear Power, OG becomes Oil and Gas, MI becomes Military etc.
My desired output would be:
Nuclear Power
Nuclear Power;Military;Pesticides
and so on.
I'm creating this as a function. I got it working for just the one abbreviation and then the same for two. However my issue is that I may have 5 abbreviations or 7. I know my current approach is dreadful but cannot figure out how to loop it in the right way.
Please note: I've shortened the list of abbreviations for StackOverflow but there's 25 in total.
Please further note: I did the function bottom up (I don't know why) and got the two value and single value working. I've removed anything I did for values over 3 as nothing I did worked.
ALTER FUNCTION [dbo].[get_str_full]
(
-- Add the parameters for the function here
#str_input VARCHAR(250)
)
RETURNS VARCHAR(250)
AS
BEGIN
-- Declare the return variable here
DECLARE #Result VARCHAR(250)
DECLARE #TEMPSTRING VARCHAR(250)
DECLARE #TEMPSTRING_RIGHT AS VARCHAR(250)
-- DECLARE #PI_COUNT BIGINT
DECLARE #COUNTER INT
DECLARE #TOTAL_VALS BIGINT
DECLARE #STRING_ST VARCHAR(250)
DECLARE #POS_STR BIGINT
DECLARE #REMAINING_STR VARCHAR(250)
-- Used for easy loop skips
DECLARE #LEFTSKIP AS BIGINT
SET #LEFTSKIP = 1
SET #Result = #str_input
SET #STRING_ST = #Result
SET #COUNTER = (LEN(#Result) - LEN(REPLACE(#Result,';',''))) + 1
SET #TOTAL_VALS = (LEN(#Result) - LEN(REPLACE(#Result,';',''))) + 1
-- If the string has a semicolon then there's more than one PI value
IF CHARINDEX(';', #Result) > 0
BEGIN
WHILE #COUNTER > 0
BEGIN
IF #TOTAL_VALS >= 3 -- If counter is more than 2 then there's three or more
BEGIN
DECLARE #TEMP_VAL BIGINT
SET #TEMP_VAL = 5
END
ELSE IF #TOTAL_VALS = 2-- Theres 2
BEGIN
-- Do left two chars first
IF #LEFTSKIP = 1
BEGIN
SET #TEMPSTRING = LEFT(#Result, 2)
SELECT #TEMPSTRING = CASE #TEMPSTRING
WHEN 'MC' THEN 'Military Contracting'
WHEN 'NP' THEN 'Nuclear'
WHEN 'OG' THEN 'Oil & Gas'
WHEN 'OS' THEN 'Oil Sands'
WHEN 'PM' THEN 'Palm Oil'
WHEN 'PE' THEN 'Pesticides'
ELSE #TEMPSTRING
END
SET #LEFTSKIP = 2
END
ELSE IF #LEFTSKIP = 2
BEGIN
SET #TEMPSTRING_RIGHT = RIGHT(#Result, 2)
SELECT #TEMPSTRING_RIGHT = CASE #TEMPSTRING_RIGHT
WHEN 'MC' THEN 'Military Contracting'
WHEN 'NP' THEN 'Nuclear'
WHEN 'OG' THEN 'Oil & Gas'
WHEN 'OS' THEN 'Oil Sands'
WHEN 'PM' THEN 'Palm Oil'
WHEN 'PE' THEN 'Pesticides'
ELSE #TEMPSTRING_RIGHT
END
END
END
SET #COUNTER = #COUNTER - 1
END
SET #Result = CONCAT(#TEMPSTRING,';', #TEMPSTRING_RIGHT)
END
ELSE
BEGIN
SET #Result = REPLACE(#Result, 'MC', 'Military Contracting')
SET #Result = REPLACE(#RESULT, 'NP', 'Nuclear Power')
SET #Result = REPLACE(#Result, 'OG', 'Oil & Gas')
SET #Result = REPLACE(#Result, 'OS', 'Oil Sands')
SET #Result = REPLACE(#Result, 'PM', 'Palm Oil')
SET #Result = REPLACE(#Result, 'PE', 'Pesticides')
END
-- Return the result of the function
RETURN #Result
END
First for some easily consumable sample data:
DECLARE #tranlation TABLE(tCode VARCHAR(10), tString VARCHAR(40));
DECLARE #t TABLE(String VARCHAR(1000));
INSERT #t VALUES('PE;N'),('NP'),('NP;MC;PE;XX')
INSERT #tranlation VALUES ('N','Nukes'),('NP','Nuclear Power'),('MC','Military'),
('PE','Pesticides');
Note my updated sample data which includes "XX", which has no match , and an "N" for "Nukes" which would wreck any solution which leverages REPLACE. If you are on SQL 2016+ you can use STRING_SPLIT and STRING_AGG.
SELECT
OldString = t.String,
NewString = STRING_AGG(ISNULL(tx.tString,items.[value]),';')
FROM #t AS t
OUTER APPLY STRING_SPLIT(t.String,';') AS items
LEFT JOIN #tranlation AS tx
ON items.[value] = tx.tCode
GROUP BY t.String ;
Returns:
OldString NewString
----------------- -------------------------------------------
NP Nuclear Power
NP;MC;PE;XX Nuclear Power;Military;Pesticides;XX
PE;N Pesticides;Nukes
You should really fix your table design so that you do not store multiple pieces of info in one column.
If you would like it as a function, I would strongly recommend an inline Table-Valued function rather than a scalar function.
If you have SQL Server version 2017+ you can use STRING_SPLIT and STRING_AGG for this.
CREATE OR ALTER FUNCTION GetFullStr
( #str varchar(250) )
RETURNS TABLE
AS RETURN
(
SELECT STRING_AGG(ISNULL(v.FullStr, s.value), ';') result
FROM STRING_SPLIT(#str, ';') s
LEFT JOIN (VALUES
('MC', 'Military Contracting'),
('NP', 'Nuclear'),
('OG', 'Oil & Gas'),
('OS', 'Oil Sands'),
('PM', 'Palm Oil'),
('PE', 'Pesticides')
) v(Abbr, FullStr) ON v.Abbr = s.value
);
GO
You can, and should, replace the VALUES with a real table.
On 2016 you would need FOR XML PATH instead of STRING_AGG:
CREATE OR ALTER FUNCTION GetFullStr
( #str varchar(250) )
RETURNS TABLE
AS RETURN
(
SELECT STUFF(
(SELECT ';' + ISNULL(v.FullStr, s.value)
FROM STRING_SPLIT(#str, ';') s
LEFT JOIN (VALUES
('MC', 'Military Contracting'),
('NP', 'Nuclear'),
('OG', 'Oil & Gas'),
('OS', 'Oil Sands'),
('PM', 'Palm Oil'),
('PE', 'Pesticides')
) v(Abbr, FullStr) ON v.Abbr = s.value
FOR XML PATH(''), TYPE
).value('text()[1]','varchar(2500)'),
, 1, 1, '')
);
GO
You use it like this:
SELECT s.result AS FullStr
FROM table
OUTER APPLY GetFullStr(value) AS s;
-- alternatively
SELECT (SELECT * FROM GetFullStr(value)) AS FullStr
FROM table;
You could assign your abbreviation mappings to a TABLE variable and then use that for your REPLACE. You could build this into a function, then pass your string values in.
The test below returns Military:Nuclear Power:XX.
declare #mapping table (abbrev varchar(50), fullname varchar(100))
insert into #mapping(abbrev, fullname)
values ('NP','Nuclear Power'),
('MC','Military')
declare #testString varchar(100), #newString varchar(100)
set #teststring = 'MC:NP:XX'
set #newString = #testString
SELECT #newString = REPLACE(#newString, abbrev, fullname) FROM #mapping
select #newString

SELECT between semi colons [duplicate]

I have a need to create a function the will return nth element of a delimited string.
For a data migration project, I am converting JSON audit records stored in a SQL Server database into a structured report using SQL script. Goal is to deliver a sql script and a sql function used by the script without any code.
(This is a short-term fix will be used while a new auditing feature is added the ASP.NET/MVC application)
There is no shortage of delimited string to table examples available.
I've chosen a Common Table Expression example http://www.sqlperformance.com/2012/07/t-sql-queries/split-strings
Example: I want to return 67 from '1,222,2,67,888,1111'
This is the easiest answer to rerieve the 67 (type-safe!!):
SELECT CAST('<x>' + REPLACE('1,222,2,67,888,1111',',','</x><x>') + '</x>' AS XML).value('/x[4]','int')
In the following you will find examples how to use this with variables for the string, the delimiter and the position (even for edge-cases with XML-forbidden characters)
The easy one
This question is not about a string split approach, but about how to get the nth element. The easiest, fully inlineable way would be this IMO:
This is a real one-liner to get part 2 delimited by a space:
DECLARE #input NVARCHAR(100)=N'part1 part2 part3';
SELECT CAST(N'<x>' + REPLACE(#input,N' ',N'</x><x>') + N'</x>' AS XML).value('/x[2]','nvarchar(max)')
Variables can be used with sql:variable() or sql:column()
Of course you can use variables for delimiter and position (use sql:column to retrieve the position directly from a query's value):
DECLARE #dlmt NVARCHAR(10)=N' ';
DECLARE #pos INT = 2;
SELECT CAST(N'<x>' + REPLACE(#input,#dlmt,N'</x><x>') + N'</x>' AS XML).value('/x[sql:variable("#pos")][1]','nvarchar(max)')
Edge-Case with XML-forbidden characters
If your string might include forbidden characters, you still can do it this way. Just use FOR XML PATH on your string first to replace all forbidden characters with the fitting escape sequence implicitly.
It's a very special case if - additionally - your delimiter is the semicolon. In this case I replace the delimiter first to '#DLMT#', and replace this to the XML tags finally:
SET #input=N'Some <, > and &;Other äöü#€;One more';
SET #dlmt=N';';
SELECT CAST(N'<x>' + REPLACE((SELECT REPLACE(#input,#dlmt,'#DLMT#') AS [*] FOR XML PATH('')),N'#DLMT#',N'</x><x>') + N'</x>' AS XML).value('/x[sql:variable("#pos")][1]','nvarchar(max)');
UPDATE for SQL-Server 2016+
Regretfully the developers forgot to return the part's index with STRING_SPLIT. But, using SQL-Server 2016+, there is JSON_VALUE and OPENJSON.
With JSON_VALUE we can pass in the position as the index' array.
For OPENJSON the documentation states clearly:
When OPENJSON parses a JSON array, the function returns the indexes of the elements in the JSON text as keys.
A string like 1,2,3 needs nothing more than brackets: [1,2,3].
A string of words like this is an example needs to be ["this","is","an"," example"].
These are very easy string operations. Just try it out:
DECLARE #str VARCHAR(100)='Hello John Smith';
DECLARE #position INT = 2;
--We can build the json-path '$[1]' using CONCAT
SELECT JSON_VALUE('["' + REPLACE(#str,' ','","') + '"]',CONCAT('$[',#position-1,']'));
--See this for a position safe string-splitter (zero-based):
SELECT JsonArray.[key] AS [Position]
,JsonArray.[value] AS [Part]
FROM OPENJSON('["' + REPLACE(#str,' ','","') + '"]') JsonArray
In this post I tested various approaches and found, that OPENJSON is really fast. Even much faster than the famous "delimitedSplit8k()" method...
UPDATE 2 - Get the values type-safe
We can use an array within an array simply by using doubled [[]]. This allows for a typed WITH-clause:
DECLARE #SomeDelimitedString VARCHAR(100)='part1|1|20190920';
DECLARE #JsonArray NVARCHAR(MAX)=CONCAT('[["',REPLACE(#SomeDelimitedString,'|','","'),'"]]');
SELECT #SomeDelimitedString AS TheOriginal
,#JsonArray AS TransformedToJSON
,ValuesFromTheArray.*
FROM OPENJSON(#JsonArray)
WITH(TheFirstFragment VARCHAR(100) '$[0]'
,TheSecondFragment INT '$[1]'
,TheThirdFragment DATE '$[2]') ValuesFromTheArray
Here is my initial solution...
It is based on work by Aaron Bertrand http://www.sqlperformance.com/2012/07/t-sql-queries/split-strings
I simply changed the return type to make it a scalar function.
Example:
SELECT dbo.GetSplitString_CTE('1,222,2,67,888,1111',',',4)
CREATE FUNCTION dbo.GetSplitString_CTE
(
#List VARCHAR(MAX),
#Delimiter VARCHAR(255),
#ElementNumber int
)
RETURNS VARCHAR(4000)
AS
BEGIN
DECLARE #result varchar(4000)
DECLARE #Items TABLE ( position int IDENTITY PRIMARY KEY,
Item VARCHAR(4000)
)
DECLARE #ll INT = LEN(#List) + 1, #ld INT = LEN(#Delimiter);
WITH a AS
(
SELECT
[start] = 1,
[end] = COALESCE(NULLIF(CHARINDEX(#Delimiter,
#List, #ld), 0), #ll),
[value] = SUBSTRING(#List, 1,
COALESCE(NULLIF(CHARINDEX(#Delimiter,
#List, #ld), 0), #ll) - 1)
UNION ALL
SELECT
[start] = CONVERT(INT, [end]) + #ld,
[end] = COALESCE(NULLIF(CHARINDEX(#Delimiter,
#List, [end] + #ld), 0), #ll),
[value] = SUBSTRING(#List, [end] + #ld,
COALESCE(NULLIF(CHARINDEX(#Delimiter,
#List, [end] + #ld), 0), #ll)-[end]-#ld)
FROM a
WHERE [end] < #ll
)
INSERT #Items SELECT [value]
FROM a
WHERE LEN([value]) > 0
OPTION (MAXRECURSION 0);
SELECT #result=Item
FROM #Items
WHERE position=#ElementNumber
RETURN #result;
END
GO
How about:
CREATE FUNCTION dbo.NTH_ELEMENT (#Input NVARCHAR(MAX), #Delim CHAR = '-', #N INT = 0)
RETURNS NVARCHAR(MAX)
AS
BEGIN
RETURN (SELECT VALUE FROM STRING_SPLIT(#Input, #Delim) ORDER BY (SELECT NULL) OFFSET #N ROWS FETCH NEXT 1 ROW ONLY)
END
On Azure SQL Database, and on SQL Server 2022, STRING_SPLIT now has an optional ordinal parameter. If the parameter is omitted, or 0 is passed, then the function acts as it did before, and just returns a value column and the order is not guaranteed. If you pass the parameter with the value 1 then the function returns 2 columns, value, and ordinal which (unsurprisingly) provides the ordinal position of the value within the string.
So, if you wanted the 4th delimited value from the string '1,222,2,67,888,1111' you could do the following:
SELECT [value]
FROM STRING_SPLIT('1,222,2,67,888,1111',',',1)
WHERE ordinal = 4;
If the value was in a column, it would look like this:
SELECT SS.[value]
FROM dbo.YourTable YT
CROSS APPLY STRING_SPLIT(YT.YourColumn,',',1) SS
WHERE SS.ordinal = 4;
#a - the value (f.e. 'a/bb/ccc/dddd/ee/ff/....')
#p - the desired position (1,2,3...)
#d - the delimeter ( '/' )
trim(substring(replace(#a,#d,replicate(' ',len(#a))),(#p-1)*len(#a)+1,len(#a)))
only problem is - if desired part has trailing or leading blanks they get trimmed.
Completely Based on article from https://exceljet.net/formula/split-text-with-delimiter
In a rare moment of lunacy I just thought that split is far easier if we use XML to parse it out for us:
(Using the variables from #Gary Kindel's answer)
declare #xml xml
set #xml = '<split><el>' + replace(#list,#Delimiter,'</el><el>') + '</el></split>'
select
el = split.el.value('.','varchar(max)')
from #xml.nodes('/split/el') split(el))
This lists all elements of the string, split by the specified character.
We can use an xpath test to filter out empty values, and a further xpath test to restrict this to the element we're interested in. In full Gary's function becomes:
alter FUNCTION dbo.GetSplitString_CTE
(
#List VARCHAR(MAX),
#Delimiter VARCHAR(255),
#ElementNumber int
)
RETURNS VARCHAR(max)
AS
BEGIN
-- escape any XML https://dba.stackexchange.com/a/143140/65992
set #list = convert(VARCHAR(MAX),(select #list for xml path(''), type));
declare #xml xml
set #xml = '<split><el>' + replace(#list,#Delimiter,'</el><el>') + '</el></split>'
declare #ret varchar(max)
set #ret = (select
el = split.el.value('.','varchar(max)')
from #xml.nodes('/split/el[string-length(.)>0][position() = sql:variable("#elementnumber")]') split(el))
return #ret
END
you can put this select into UFN. if you need you can customize it for specifying delimiter as well. in that case your ufn will have two input. number Nth and delimiter to use.
DECLARE #tlist varchar(max)='10,20,30,40,50,60,70,80,90,100'
DECLARE #i INT=1, #nth INT=3
While len(#tlist) <> 0
BEGIN
IF #i=#nth
BEGIN
select Case when charindex(',',#tlist) <> 0 Then LEFT(#tlist,charindex(',',#tlist)-1)
Else #tlist
END
END
Select #tlist = Case when charindex(',',#tlist) <> 0 Then substring(#tlist,charindex(',',#tlist)+1,len(#tlist))
Else ''
END
SELECT #i=#i+1
END
Alternatively, one can use xml, nodes() and ROW_NUMBER. We can order the elements based on their document order. For example:
DECLARE #Input VARCHAR(100) = '1a,2b,3c,4d,5e,6f,7g,8h'
,#Number TINYINT = 3
DECLARE #XML XML;
DECLARE #value VARCHAR(100);
SET #XML = CAST('<x>' + REPLACE(#Input,',','</x><x>') + '</x>' AS XML);
WITH DataSource ([rowID], [rowValue]) AS
(
SELECT ROW_NUMBER() OVER (ORDER BY T.c ASC)
,T.c.value('.', 'VARCHAR(100)')
FROM #XML.nodes('./x') T(c)
)
SELECT #value = [rowValue]
FROM DataSource
WHERE [rowID] = #Number;
SELECT #value;
I would rather create a temp table with an identity column and fill it up with output from the SPLIT function.
CREATE TABLE #tblVals(Id INT IDENTITY(1,1), Val NVARCHAR(100))
INSERT INTO #tblVals (Val)
SELECT [value] FROM STRING_SPLIT('Val1-Val3-Val2-Val5', '-')
SELECT * FROM #tblVals
Now you can easily do something like below.
DECLARE #val2 NVARCHAR(100) = (SELECT TOP 1 Val FROM #tblVals WHERE Id = 2)
See the snapshot below:
You can use STRING_SPLIT with ROW_NUMBER:
SELECT value, idx FROM
(
SELECT
value,
ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) idx
FROM STRING_SPLIT('Lorem ipsum dolor sit amet.', ' ')
) t
WHERE idx=2
returns second element (idx=2): 'ipsum'
We have the answer over below url.
DECLARE # AS VARCHAR(MAX) = 'Pawan1,Pawan2,Pawan4,Pawan3'
SELECT VALUE FROM
(
SELECT VALUE , ROW_NUMBER() OVER (ORDER BY (SELECT null)) rnk FROM STRING_SPLIT(#, ',')
)x where rnk = 3
GO
https://msbiskills.com/2018/06/15/sql-puzzle-multiple-ways-to-split-a-string-and-get-nth-row-xml-advanced-sql/
I don't have enough reputation to comment, so I am adding an answer. Please adjust as appropriate.
I have a problem with Gary Kindel's answer for cases where there is nothing between the two delimiters
If you do
select * from dbo.GetSplitString_CTE('abc^def^^ghi','^',3)
you get
ghi
instead of an empty string
If you comment out the
WHERE LEN([value]) > 0
line, you get the desired result
I cannot comment on Gary's solution because of my low reputation
I know Gary was referencing another link.
I have struggled to understand why we need this variable
#ld INT = LEN(#Delimiter)
I also don't understand why charindex has to start at the position of length of delimiter, #ld
I tested with many examples with a single character delimiter, and they work. Most of the time, delimiter character is a single character. However, since the developer included the ld as length of delimiter, the code has to work for delimiters that have more than one character
In this case, the following case will fail
11,,,22,,,33,,,44,,,55,,,
I cloned from the codes from this link. http://codebetter.com/raymondlewallen/2005/10/26/quick-t-sql-to-parse-a-delimited-string/
I have tested various scenarios including the delimiters that have more than one character
alter FUNCTION [dbo].[split1]
(
#string1 VARCHAR(8000) -- List of delimited items
, #Delimiter VARCHAR(40) = ',' -- delimiter that separates items
, #ElementNumber int
)
RETURNS varchar(8000)
AS
BEGIN
declare #position int
declare #piece varchar(8000)=''
declare #returnVal varchar(8000)=''
declare #Pattern varchar(50) = '%' + #Delimiter + '%'
declare #counter int =0
declare #ld int = len(#Delimiter)
declare #ls1 int = len (#string1)
declare #foundit int = 0
if patindex(#Pattern , #string1) = 0
return ''
if right(rtrim(#string1),1) <> #Delimiter
set #string1 = #string1 + #Delimiter
set #position = patindex(#Pattern , #string1) + #ld -1
while #position > 0
begin
set #counter = #counter +1
set #ls1 = len (#string1)
if (#ls1 >= #ld)
set #piece = left(#string1, #position - #ld)
else
break
if (#counter = #ElementNumber)
begin
set #foundit = 1
break
end
if len(#string1) > 0
begin
set #string1 = stuff(#string1, 1, #position, '')
set #position = patindex(#Pattern , #string1) + #ld -1
end
else
set #position = -1
end
if #foundit =1
set #returnVal = #piece
else
set #returnVal = ''
return #returnVal
you can create simple table variable and use it as below
Declare #tbl_split Table (Id INT IDENTITY(1,1), VAL VARCHAR(50))
INSERT #tbl_split SELECT VALUE
FROM string_split('999999:01', ':')
Select val from #tbl_split
WHERE Id=2

Using T-SQL, return nth delimited element from a string

I have a need to create a function the will return nth element of a delimited string.
For a data migration project, I am converting JSON audit records stored in a SQL Server database into a structured report using SQL script. Goal is to deliver a sql script and a sql function used by the script without any code.
(This is a short-term fix will be used while a new auditing feature is added the ASP.NET/MVC application)
There is no shortage of delimited string to table examples available.
I've chosen a Common Table Expression example http://www.sqlperformance.com/2012/07/t-sql-queries/split-strings
Example: I want to return 67 from '1,222,2,67,888,1111'
This is the easiest answer to rerieve the 67 (type-safe!!):
SELECT CAST('<x>' + REPLACE('1,222,2,67,888,1111',',','</x><x>') + '</x>' AS XML).value('/x[4]','int')
In the following you will find examples how to use this with variables for the string, the delimiter and the position (even for edge-cases with XML-forbidden characters)
The easy one
This question is not about a string split approach, but about how to get the nth element. The easiest, fully inlineable way would be this IMO:
This is a real one-liner to get part 2 delimited by a space:
DECLARE #input NVARCHAR(100)=N'part1 part2 part3';
SELECT CAST(N'<x>' + REPLACE(#input,N' ',N'</x><x>') + N'</x>' AS XML).value('/x[2]','nvarchar(max)')
Variables can be used with sql:variable() or sql:column()
Of course you can use variables for delimiter and position (use sql:column to retrieve the position directly from a query's value):
DECLARE #dlmt NVARCHAR(10)=N' ';
DECLARE #pos INT = 2;
SELECT CAST(N'<x>' + REPLACE(#input,#dlmt,N'</x><x>') + N'</x>' AS XML).value('/x[sql:variable("#pos")][1]','nvarchar(max)')
Edge-Case with XML-forbidden characters
If your string might include forbidden characters, you still can do it this way. Just use FOR XML PATH on your string first to replace all forbidden characters with the fitting escape sequence implicitly.
It's a very special case if - additionally - your delimiter is the semicolon. In this case I replace the delimiter first to '#DLMT#', and replace this to the XML tags finally:
SET #input=N'Some <, > and &;Other äöü#€;One more';
SET #dlmt=N';';
SELECT CAST(N'<x>' + REPLACE((SELECT REPLACE(#input,#dlmt,'#DLMT#') AS [*] FOR XML PATH('')),N'#DLMT#',N'</x><x>') + N'</x>' AS XML).value('/x[sql:variable("#pos")][1]','nvarchar(max)');
UPDATE for SQL-Server 2016+
Regretfully the developers forgot to return the part's index with STRING_SPLIT. But, using SQL-Server 2016+, there is JSON_VALUE and OPENJSON.
With JSON_VALUE we can pass in the position as the index' array.
For OPENJSON the documentation states clearly:
When OPENJSON parses a JSON array, the function returns the indexes of the elements in the JSON text as keys.
A string like 1,2,3 needs nothing more than brackets: [1,2,3].
A string of words like this is an example needs to be ["this","is","an"," example"].
These are very easy string operations. Just try it out:
DECLARE #str VARCHAR(100)='Hello John Smith';
DECLARE #position INT = 2;
--We can build the json-path '$[1]' using CONCAT
SELECT JSON_VALUE('["' + REPLACE(#str,' ','","') + '"]',CONCAT('$[',#position-1,']'));
--See this for a position safe string-splitter (zero-based):
SELECT JsonArray.[key] AS [Position]
,JsonArray.[value] AS [Part]
FROM OPENJSON('["' + REPLACE(#str,' ','","') + '"]') JsonArray
In this post I tested various approaches and found, that OPENJSON is really fast. Even much faster than the famous "delimitedSplit8k()" method...
UPDATE 2 - Get the values type-safe
We can use an array within an array simply by using doubled [[]]. This allows for a typed WITH-clause:
DECLARE #SomeDelimitedString VARCHAR(100)='part1|1|20190920';
DECLARE #JsonArray NVARCHAR(MAX)=CONCAT('[["',REPLACE(#SomeDelimitedString,'|','","'),'"]]');
SELECT #SomeDelimitedString AS TheOriginal
,#JsonArray AS TransformedToJSON
,ValuesFromTheArray.*
FROM OPENJSON(#JsonArray)
WITH(TheFirstFragment VARCHAR(100) '$[0]'
,TheSecondFragment INT '$[1]'
,TheThirdFragment DATE '$[2]') ValuesFromTheArray
Here is my initial solution...
It is based on work by Aaron Bertrand http://www.sqlperformance.com/2012/07/t-sql-queries/split-strings
I simply changed the return type to make it a scalar function.
Example:
SELECT dbo.GetSplitString_CTE('1,222,2,67,888,1111',',',4)
CREATE FUNCTION dbo.GetSplitString_CTE
(
#List VARCHAR(MAX),
#Delimiter VARCHAR(255),
#ElementNumber int
)
RETURNS VARCHAR(4000)
AS
BEGIN
DECLARE #result varchar(4000)
DECLARE #Items TABLE ( position int IDENTITY PRIMARY KEY,
Item VARCHAR(4000)
)
DECLARE #ll INT = LEN(#List) + 1, #ld INT = LEN(#Delimiter);
WITH a AS
(
SELECT
[start] = 1,
[end] = COALESCE(NULLIF(CHARINDEX(#Delimiter,
#List, #ld), 0), #ll),
[value] = SUBSTRING(#List, 1,
COALESCE(NULLIF(CHARINDEX(#Delimiter,
#List, #ld), 0), #ll) - 1)
UNION ALL
SELECT
[start] = CONVERT(INT, [end]) + #ld,
[end] = COALESCE(NULLIF(CHARINDEX(#Delimiter,
#List, [end] + #ld), 0), #ll),
[value] = SUBSTRING(#List, [end] + #ld,
COALESCE(NULLIF(CHARINDEX(#Delimiter,
#List, [end] + #ld), 0), #ll)-[end]-#ld)
FROM a
WHERE [end] < #ll
)
INSERT #Items SELECT [value]
FROM a
WHERE LEN([value]) > 0
OPTION (MAXRECURSION 0);
SELECT #result=Item
FROM #Items
WHERE position=#ElementNumber
RETURN #result;
END
GO
How about:
CREATE FUNCTION dbo.NTH_ELEMENT (#Input NVARCHAR(MAX), #Delim CHAR = '-', #N INT = 0)
RETURNS NVARCHAR(MAX)
AS
BEGIN
RETURN (SELECT VALUE FROM STRING_SPLIT(#Input, #Delim) ORDER BY (SELECT NULL) OFFSET #N ROWS FETCH NEXT 1 ROW ONLY)
END
On Azure SQL Database, and on SQL Server 2022, STRING_SPLIT now has an optional ordinal parameter. If the parameter is omitted, or 0 is passed, then the function acts as it did before, and just returns a value column and the order is not guaranteed. If you pass the parameter with the value 1 then the function returns 2 columns, value, and ordinal which (unsurprisingly) provides the ordinal position of the value within the string.
So, if you wanted the 4th delimited value from the string '1,222,2,67,888,1111' you could do the following:
SELECT [value]
FROM STRING_SPLIT('1,222,2,67,888,1111',',',1)
WHERE ordinal = 4;
If the value was in a column, it would look like this:
SELECT SS.[value]
FROM dbo.YourTable YT
CROSS APPLY STRING_SPLIT(YT.YourColumn,',',1) SS
WHERE SS.ordinal = 4;
#a - the value (f.e. 'a/bb/ccc/dddd/ee/ff/....')
#p - the desired position (1,2,3...)
#d - the delimeter ( '/' )
trim(substring(replace(#a,#d,replicate(' ',len(#a))),(#p-1)*len(#a)+1,len(#a)))
only problem is - if desired part has trailing or leading blanks they get trimmed.
Completely Based on article from https://exceljet.net/formula/split-text-with-delimiter
In a rare moment of lunacy I just thought that split is far easier if we use XML to parse it out for us:
(Using the variables from #Gary Kindel's answer)
declare #xml xml
set #xml = '<split><el>' + replace(#list,#Delimiter,'</el><el>') + '</el></split>'
select
el = split.el.value('.','varchar(max)')
from #xml.nodes('/split/el') split(el))
This lists all elements of the string, split by the specified character.
We can use an xpath test to filter out empty values, and a further xpath test to restrict this to the element we're interested in. In full Gary's function becomes:
alter FUNCTION dbo.GetSplitString_CTE
(
#List VARCHAR(MAX),
#Delimiter VARCHAR(255),
#ElementNumber int
)
RETURNS VARCHAR(max)
AS
BEGIN
-- escape any XML https://dba.stackexchange.com/a/143140/65992
set #list = convert(VARCHAR(MAX),(select #list for xml path(''), type));
declare #xml xml
set #xml = '<split><el>' + replace(#list,#Delimiter,'</el><el>') + '</el></split>'
declare #ret varchar(max)
set #ret = (select
el = split.el.value('.','varchar(max)')
from #xml.nodes('/split/el[string-length(.)>0][position() = sql:variable("#elementnumber")]') split(el))
return #ret
END
you can put this select into UFN. if you need you can customize it for specifying delimiter as well. in that case your ufn will have two input. number Nth and delimiter to use.
DECLARE #tlist varchar(max)='10,20,30,40,50,60,70,80,90,100'
DECLARE #i INT=1, #nth INT=3
While len(#tlist) <> 0
BEGIN
IF #i=#nth
BEGIN
select Case when charindex(',',#tlist) <> 0 Then LEFT(#tlist,charindex(',',#tlist)-1)
Else #tlist
END
END
Select #tlist = Case when charindex(',',#tlist) <> 0 Then substring(#tlist,charindex(',',#tlist)+1,len(#tlist))
Else ''
END
SELECT #i=#i+1
END
Alternatively, one can use xml, nodes() and ROW_NUMBER. We can order the elements based on their document order. For example:
DECLARE #Input VARCHAR(100) = '1a,2b,3c,4d,5e,6f,7g,8h'
,#Number TINYINT = 3
DECLARE #XML XML;
DECLARE #value VARCHAR(100);
SET #XML = CAST('<x>' + REPLACE(#Input,',','</x><x>') + '</x>' AS XML);
WITH DataSource ([rowID], [rowValue]) AS
(
SELECT ROW_NUMBER() OVER (ORDER BY T.c ASC)
,T.c.value('.', 'VARCHAR(100)')
FROM #XML.nodes('./x') T(c)
)
SELECT #value = [rowValue]
FROM DataSource
WHERE [rowID] = #Number;
SELECT #value;
I would rather create a temp table with an identity column and fill it up with output from the SPLIT function.
CREATE TABLE #tblVals(Id INT IDENTITY(1,1), Val NVARCHAR(100))
INSERT INTO #tblVals (Val)
SELECT [value] FROM STRING_SPLIT('Val1-Val3-Val2-Val5', '-')
SELECT * FROM #tblVals
Now you can easily do something like below.
DECLARE #val2 NVARCHAR(100) = (SELECT TOP 1 Val FROM #tblVals WHERE Id = 2)
See the snapshot below:
You can use STRING_SPLIT with ROW_NUMBER:
SELECT value, idx FROM
(
SELECT
value,
ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) idx
FROM STRING_SPLIT('Lorem ipsum dolor sit amet.', ' ')
) t
WHERE idx=2
returns second element (idx=2): 'ipsum'
We have the answer over below url.
DECLARE # AS VARCHAR(MAX) = 'Pawan1,Pawan2,Pawan4,Pawan3'
SELECT VALUE FROM
(
SELECT VALUE , ROW_NUMBER() OVER (ORDER BY (SELECT null)) rnk FROM STRING_SPLIT(#, ',')
)x where rnk = 3
GO
https://msbiskills.com/2018/06/15/sql-puzzle-multiple-ways-to-split-a-string-and-get-nth-row-xml-advanced-sql/
I don't have enough reputation to comment, so I am adding an answer. Please adjust as appropriate.
I have a problem with Gary Kindel's answer for cases where there is nothing between the two delimiters
If you do
select * from dbo.GetSplitString_CTE('abc^def^^ghi','^',3)
you get
ghi
instead of an empty string
If you comment out the
WHERE LEN([value]) > 0
line, you get the desired result
I cannot comment on Gary's solution because of my low reputation
I know Gary was referencing another link.
I have struggled to understand why we need this variable
#ld INT = LEN(#Delimiter)
I also don't understand why charindex has to start at the position of length of delimiter, #ld
I tested with many examples with a single character delimiter, and they work. Most of the time, delimiter character is a single character. However, since the developer included the ld as length of delimiter, the code has to work for delimiters that have more than one character
In this case, the following case will fail
11,,,22,,,33,,,44,,,55,,,
I cloned from the codes from this link. http://codebetter.com/raymondlewallen/2005/10/26/quick-t-sql-to-parse-a-delimited-string/
I have tested various scenarios including the delimiters that have more than one character
alter FUNCTION [dbo].[split1]
(
#string1 VARCHAR(8000) -- List of delimited items
, #Delimiter VARCHAR(40) = ',' -- delimiter that separates items
, #ElementNumber int
)
RETURNS varchar(8000)
AS
BEGIN
declare #position int
declare #piece varchar(8000)=''
declare #returnVal varchar(8000)=''
declare #Pattern varchar(50) = '%' + #Delimiter + '%'
declare #counter int =0
declare #ld int = len(#Delimiter)
declare #ls1 int = len (#string1)
declare #foundit int = 0
if patindex(#Pattern , #string1) = 0
return ''
if right(rtrim(#string1),1) <> #Delimiter
set #string1 = #string1 + #Delimiter
set #position = patindex(#Pattern , #string1) + #ld -1
while #position > 0
begin
set #counter = #counter +1
set #ls1 = len (#string1)
if (#ls1 >= #ld)
set #piece = left(#string1, #position - #ld)
else
break
if (#counter = #ElementNumber)
begin
set #foundit = 1
break
end
if len(#string1) > 0
begin
set #string1 = stuff(#string1, 1, #position, '')
set #position = patindex(#Pattern , #string1) + #ld -1
end
else
set #position = -1
end
if #foundit =1
set #returnVal = #piece
else
set #returnVal = ''
return #returnVal
you can create simple table variable and use it as below
Declare #tbl_split Table (Id INT IDENTITY(1,1), VAL VARCHAR(50))
INSERT #tbl_split SELECT VALUE
FROM string_split('999999:01', ':')
Select val from #tbl_split
WHERE Id=2

Number of times a particular character appears in a string

Is there MS SQL Server function that counts the number of times a particular character appears in a string?
There's no direct function for this, but you can do it with a replace:
declare #myvar varchar(20)
set #myvar = 'Hello World'
select len(#myvar) - len(replace(#myvar,'o',''))
Basically this tells you how many chars were removed, and therefore how many instances of it there were.
Extra:
The above can be extended to count the occurences of a multi-char string by dividing by the length of the string being searched for. For example:
declare #myvar varchar(max), #tocount varchar(20)
set #myvar = 'Hello World, Hello World'
set #tocount = 'lo'
select (len(#myvar) - len(replace(#myvar,#tocount,''))) / LEN(#tocount)
Look at the length of the string after replacing the sequence
declare #s varchar(10) = 'aabaacaa'
select len(#s) - len(replace(#s, 'a', ''))
>>6
You can do that using replace and len.
Count number of x characters in str:
len(str) - len(replace(str, 'x', ''))
Use this function begining from SQL SERVER 2016
Select Count(value) From STRING_SPLIT('AAA AAA AAA',' ');
-- Output : 3
When This function used with count function it gives you how many
character exists in string
try that :
declare #t nvarchar(max)
set #t='aaaa'
select len(#t)-len(replace(#t,'a',''))
You can do it inline, but you have to be careful with spaces in the column data. Better to use datalength()
SELECT
ColName,
DATALENGTH(ColName) -
DATALENGTH(REPLACE(Col, 'A', '')) AS NumberOfLetterA
FROM ColName;
-OR-
Do the replace with 2 characters
SELECT
ColName,
-LEN(ColName)
+LEN(REPLACE(Col, 'A', '><')) AS NumberOfLetterA
FROM ColName;
function for sql server:
CREATE function NTSGetCinC(#Cadena nvarchar(4000), #UnChar nvarchar(100))
Returns int
as
begin
declare #t1 int
declare #t2 int
declare #t3 int
set #t1 = len(#Cadena)
set #t2 = len(replace(#Cadena,#UnChar,''))
set #t3 = len(#UnChar)
return (#t1 - #t2) / #t3
end
Code for visual basic and others:
Public Function NTSCuentaChars(Texto As String, CharAContar As String) As Long
NTSCuentaChars = (Len(Texto) - Len(Replace(Texto, CharAContar, ""))) / Len(CharAContar)
End Function
It will count occurrences of a specific character
DECLARE #char NVARCHAR(50);
DECLARE #counter INT = 0;
DECLARE #i INT = 1;
DECLARE #search NVARCHAR(10) = 'o'
SET #char = N'Hello World';
WHILE #i <= LEN(#char)
BEGIN
IF SUBSTRING(#char, #i, 1) = #search
SET #counter += 1;
SET #i += 1;
END;
SELECT #counter;
Use this code, it is working perfectly.
I have create a sql function that accept two parameters, the first param is the long string that we want to search into it,and it can accept string length up to 1500 character(of course you can extend it or even change it to text datatype).
And the second parameter is the substring that we want to calculate the number of its occurance(its length is up to 200 character, of course you can change it to what your need). and the output is an integer, represent the number of frequency.....enjoy it.
CREATE FUNCTION [dbo].[GetSubstringCount]
(
#InputString nvarchar(1500),
#SubString NVARCHAR(200)
)
RETURNS int
AS
BEGIN
declare #K int , #StrLen int , #Count int , #SubStrLen int
set #SubStrLen = (select len(#SubString))
set #Count = 0
Set #k = 1
set #StrLen =(select len(#InputString))
While #K <= #StrLen
Begin
if ((select substring(#InputString, #K, #SubStrLen)) = #SubString)
begin
if ((select CHARINDEX(#SubString ,#InputString)) > 0)
begin
set #Count = #Count +1
end
end
Set #K=#k+1
end
return #Count
end

How do you count the number of occurrences of a certain substring in a SQL varchar?

I have a column that has values formatted like a,b,c,d. Is there a way to count the number of commas in that value in T-SQL?
The first way that comes to mind is to do it indirectly by replacing the comma with an empty string and comparing the lengths
Declare #string varchar(1000)
Set #string = 'a,b,c,d'
select len(#string) - len(replace(#string, ',', ''))
Quick extension of cmsjr's answer that works for strings with more than one character.
CREATE FUNCTION dbo.CountOccurrencesOfString
(
#searchString nvarchar(max),
#searchTerm nvarchar(max)
)
RETURNS INT
AS
BEGIN
return (LEN(#searchString)-LEN(REPLACE(#searchString,#searchTerm,'')))/LEN(#searchTerm)
END
Usage:
SELECT * FROM MyTable
where dbo.CountOccurrencesOfString(MyColumn, 'MyString') = 1
You can compare the length of the string with one where the commas are removed:
len(value) - len(replace(value,',',''))
The answer by #csmjr has a problem in some instances.
His answer was to do this:
Declare #string varchar(1000)
Set #string = 'a,b,c,d'
select len(#string) - len(replace(#string, ',', ''))
This works in most scenarios, however, try running this:
DECLARE #string VARCHAR(1000)
SET #string = 'a,b,c,d ,'
SELECT LEN(#string) - LEN(REPLACE(#string, ',', ''))
For some reason, REPLACE gets rid of the final comma but ALSO the space just before it (not sure why). This results in a returned value of 5 when you'd expect 4. Here is another way to do this which will work even in this special scenario:
DECLARE #string VARCHAR(1000)
SET #string = 'a,b,c,d ,'
SELECT LEN(REPLACE(#string, ',', '**')) - LEN(#string)
Note that you don't need to use asterisks. Any two-character replacement will do. The idea is that you lengthen the string by one character for each instance of the character you're counting, then subtract the length of the original. It's basically the opposite method of the original answer which doesn't come with the strange trimming side-effect.
Building on #Andrew's solution, you'll get much better performance using a non-procedural table-valued-function and CROSS APPLY:
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
/* Usage:
SELECT t.[YourColumn], c.StringCount
FROM YourDatabase.dbo.YourTable t
CROSS APPLY dbo.CountOccurrencesOfString('your search string', t.[YourColumn]) c
*/
CREATE FUNCTION [dbo].[CountOccurrencesOfString]
(
#searchTerm nvarchar(max),
#searchString nvarchar(max)
)
RETURNS TABLE
AS
RETURN
SELECT (DATALENGTH(#searchString)-DATALENGTH(REPLACE(#searchString,#searchTerm,'')))/NULLIF(DATALENGTH(#searchTerm), 0) AS StringCount
Declare #string varchar(1000)
DECLARE #SearchString varchar(100)
Set #string = 'as as df df as as as'
SET #SearchString = 'as'
select ((len(#string) - len(replace(#string, #SearchString, ''))) -(len(#string) -
len(replace(#string, #SearchString, ''))) % 2) / len(#SearchString)
Accepted answer is correct ,
extending it to use 2 or more character in substring:
Declare #string varchar(1000)
Set #string = 'aa,bb,cc,dd'
Set #substring = 'aa'
select (len(#string) - len(replace(#string, #substring, '')))/len(#substring)
Darrel Lee I think has a pretty good answer. Replace CHARINDEX() with PATINDEX(), and you can do some weak regex searching along a string, too...
Like, say you use this for #pattern:
set #pattern='%[-.|!,'+char(9)+']%'
Why would you maybe want to do something crazy like this?
Say you're loading delimited text strings into a staging table, where the field holding the data is something like a varchar(8000) or nvarchar(max)...
Sometimes it's easier/faster to do ELT (Extract-Load-Transform) with data rather than ETL (Extract-Transform-Load), and one way to do this is to load the delimited records as-is into a staging table, especially if you may want an simpler way to see the exceptional records rather than deal with them as part of an SSIS package...but that's a holy war for a different thread.
If we know there is a limitation on LEN and space, why cant we replace the space first?
Then we know there is no space to confuse LEN.
len(replace(#string, ' ', '-')) - len(replace(replace(#string, ' ', '-'), ',', ''))
Use this code, it is working perfectly.
I have create a sql function that accept two parameters, the first param is the long string that we want to search into it,and it can accept string length up to 1500 character(of course you can extend it or even change it to text datatype).
And the second parameter is the substring that we want to calculate the number of its occurance(its length is up to 200 character, of course you can change it to what your need). and the output is an integer, represent the number of frequency.....enjoy it.
CREATE FUNCTION [dbo].[GetSubstringCount]
(
#InputString nvarchar(1500),
#SubString NVARCHAR(200)
)
RETURNS int
AS
BEGIN
declare #K int , #StrLen int , #Count int , #SubStrLen int
set #SubStrLen = (select len(#SubString))
set #Count = 0
Set #k = 1
set #StrLen =(select len(#InputString))
While #K <= #StrLen
Begin
if ((select substring(#InputString, #K, #SubStrLen)) = #SubString)
begin
if ((select CHARINDEX(#SubString ,#InputString)) > 0)
begin
set #Count = #Count +1
end
end
Set #K=#k+1
end
return #Count
end
In SQL 2017 or higher, you can use this:
declare #hits int = 0
set #hits = (select value from STRING_SPLIT('F609,4DFA,8499',','));
select count(#hits)
Improved version based on top answer and other answers:
Wrapping the string with delimiters ensures that LEN works properly. Making the replace character string one character longer than the match string removes the need for division.
CREATE FUNCTION dbo.MatchCount(#value nvarchar(max), #match nvarchar(max))
RETURNS int
BEGIN
RETURN LEN('[' + REPLACE(#value,#match,REPLICATE('*', LEN('[' + #match + ']') - 1)) + ']') - LEN('['+#value+']')
END
DECLARE #records varchar(400)
SELECT #records = 'a,b,c,d'
select LEN(#records) as 'Before removing Commas' , LEN(#records) - LEN(REPLACE(#records, ',', '')) 'After Removing Commans'
The following should do the trick for both single character and multiple character searches:
CREATE FUNCTION dbo.CountOccurrences
(
#SearchString VARCHAR(1000),
#SearchFor VARCHAR(1000)
)
RETURNS TABLE
AS
RETURN (
SELECT COUNT(*) AS Occurrences
FROM (
SELECT ROW_NUMBER() OVER (ORDER BY O.object_id) AS n
FROM sys.objects AS O
) AS N
JOIN (
VALUES (#SearchString)
) AS S (SearchString)
ON
SUBSTRING(S.SearchString, N.n, LEN(#SearchFor)) = #SearchFor
);
GO
---------------------------------------------------------------------------------------
-- Test the function for single and multiple character searches
---------------------------------------------------------------------------------------
DECLARE #SearchForComma VARCHAR(10) = ',',
#SearchForCharacters VARCHAR(10) = 'de';
DECLARE #TestTable TABLE
(
TestData VARCHAR(30) NOT NULL
);
INSERT INTO #TestTable
(
TestData
)
VALUES
('a,b,c,de,de ,d e'),
('abc,de,hijk,,'),
(',,a,b,cde,,');
SELECT TT.TestData,
CO.Occurrences AS CommaOccurrences,
CO2.Occurrences AS CharacterOccurrences
FROM #TestTable AS TT
OUTER APPLY dbo.CountOccurrences(TT.TestData, #SearchForComma) AS CO
OUTER APPLY dbo.CountOccurrences(TT.TestData, #SearchForCharacters) AS CO2;
The function can be simplified a bit using a table of numbers (dbo.Nums):
RETURN (
SELECT COUNT(*) AS Occurrences
FROM dbo.Nums AS N
JOIN (
VALUES (#SearchString)
) AS S (SearchString)
ON
SUBSTRING(S.SearchString, N.n, LEN(#SearchFor)) = #SearchFor
);
I finally write this function that should cover all the possible situations, adding a char prefix and suffix to the input. this char is evaluated to be different to any of the char conteined in the search parameter, so it can't affect the result.
CREATE FUNCTION [dbo].[CountOccurrency]
(
#Input nvarchar(max),
#Search nvarchar(max)
)
RETURNS int AS
BEGIN
declare #SearhLength as int = len('-' + #Search + '-') -2;
declare #conteinerIndex as int = 255;
declare #conteiner as char(1) = char(#conteinerIndex);
WHILE ((CHARINDEX(#conteiner, #Search)>0) and (#conteinerIndex>0))
BEGIN
set #conteinerIndex = #conteinerIndex-1;
set #conteiner = char(#conteinerIndex);
END;
set #Input = #conteiner + #Input + #conteiner
RETURN (len(#Input) - len(replace(#Input, #Search, ''))) / #SearhLength
END
usage
select dbo.CountOccurrency('a,b,c,d ,', ',')
Declare #MainStr nvarchar(200)
Declare #SubStr nvarchar(10)
Set #MainStr = 'nikhildfdfdfuzxsznikhilweszxnikhil'
Set #SubStr = 'nikhil'
Select (Len(#MainStr) - Len(REPLACE(#MainStr,#SubStr,'')))/Len(#SubStr)
this T-SQL code finds and prints all occurrences of pattern #p in sentence #s. you can do any processing on the sentence afterward.
declare #old_hit int = 0
declare #hit int = 0
declare #i int = 0
declare #s varchar(max)='alibcalirezaalivisualization'
declare #p varchar(max)='ali'
while #i<len(#s)
begin
set #hit=charindex(#p,#s,#i)
if #hit>#old_hit
begin
set #old_hit =#hit
set #i=#hit+1
print #hit
end
else
break
end
the result is:
1
6
13
20
I ended up using a CTE table for this,
CREATE TABLE #test (
[id] int,
[field] nvarchar(500)
)
INSERT INTO #test ([id], [field])
VALUES (1, 'this is a test string http://url, and https://google.com'),
(2, 'another string, hello world http://example.com'),
(3, 'a string with no url')
SELECT *
FROM #test
;WITH URL_count_cte ([id], [url_index], [field])
AS
(
SELECT [id], CHARINDEX('http', [field], 0)+1 AS [url_index], [field]
FROM #test AS [t]
WHERE CHARINDEX('http', [field], 0) != 0
UNION ALL
SELECT [id], CHARINDEX('http', [field], [url_index])+1 AS [url_index], [field]
FROM URL_count_cte
WHERE CHARINDEX('http', [field], [url_index]) > 0
)
-- total urls
SELECT COUNT(1)
FROM URL_count_cte
-- urls per row
SELECT [id], COUNT(1) AS [url_count]
FROM URL_count_cte
GROUP BY [id]
Using this function, you can get the number of repetitions of words in a text.
/****** Object: UserDefinedFunction [dbo].[fn_getCountKeywords] Script Date: 22/11/2021 17:52:00 ******/
DROP FUNCTION IF EXISTS [dbo].[fn_getCountKeywords]
GO
/****** Object: UserDefinedFunction [dbo].[fn_getCountKeywords] Script Date: 2211/2021 17:52:00 ******/
SET ANSI_NULLS OFF
GO
SET QUOTED_IDENTIFIER ON
GO
-- =============================================
-- Author: m_Khezrian
-- Create date: 2021/11/22-17:52
-- Description: Return Count Keywords In Input Text
-- =============================================
Create OR Alter Function [dbo].[fn_getCountKeywords]
(#Text nvarchar(max)
,#Keywords nvarchar(max)
)
RETURNS #Result TABLE
(
[ID] int Not Null IDENTITY PRIMARY KEY
,[Keyword] nvarchar(max) Not Null
,[Cnt] int Not Null Default(0)
)
/*With ENCRYPTION*/ As
Begin
Declare #Key nvarchar(max);
Declare #Cnt int;
Declare #I int;
Set #I = 0 ;
--Set #Text = QUOTENAME(#Text);
Insert Into #Result
([Keyword])
Select Trim([value])
From String_Split(#Keywords,N',')
Group By [value]
Order By Len([value]) Desc;
Declare CntKey_Cursor Insensitive Cursor For
Select [Keyword]
From #Result
Order By [ID];
Open CntKey_Cursor;
Fetch Next From CntKey_Cursor Into #Key;
While (##Fetch_STATUS = 0) Begin
Set #Cnt = 0;
While (PatIndex(N'%'+#Key+'%',#Text) > 0) Begin
Set #Cnt += 1;
Set #I += 1 ;
Set #Text = Stuff(#Text,PatIndex(N'%'+#Key+'%',#Text),len(#Key),N'{'+Convert(nvarchar,#I)+'}');
--Set #Text = Replace(#Text,#Key,N'{'+Convert(nvarchar,#I)+'}');
End--While
Update #Result
Set [Cnt] = #Cnt
Where ([Keyword] = #Key);
Fetch Next From CntKey_Cursor Into #Key;
End--While
Close CntKey_Cursor;
Deallocate CntKey_Cursor;
Return
End
GO
--Test
Select *
From dbo.fn_getCountKeywords(
N'<U+0001F4E3> MARKET IMPACT Euro area Euro CPIarea annual inflation up to 3.0% MaCPIRKET forex'
,N'CPI ,core,MaRKET , Euro area'
)
Go
Reference https://learn.microsoft.com/en-us/sql/t-sql/functions/string-split-transact-sql?view=sql-server-ver15
Example:
SELECT s.*
,s.[Number1] - (SELECT COUNT(Value)
FROM string_split(s.[StringColumn],',')
WHERE RTRIM(VALUE) <> '')
FROM TableName AS s
Applies to: SQL Server 2016 (13.x) and later
You can use the following stored procedure to fetch , values.
IF EXISTS (SELECT * FROM sys.objects
WHERE object_id = OBJECT_ID(N'[dbo].[sp_parsedata]') AND type in (N'P', N'PC'))
DROP PROCEDURE [dbo].[sp_parsedata]
GO
create procedure sp_parsedata
(#cid integer,#st varchar(1000))
as
declare #coid integer
declare #c integer
declare #c1 integer
select #c1=len(#st) - len(replace(#st, ',', ''))
set #c=0
delete from table1 where complainid=#cid;
while (#c<=#c1)
begin
if (#c<#c1)
begin
select #coid=cast(replace(left(#st,CHARINDEX(',',#st,1)),',','') as integer)
select #st=SUBSTRING(#st,CHARINDEX(',',#st,1)+1,LEN(#st))
end
else
begin
select #coid=cast(#st as integer)
end
insert into table1(complainid,courtid) values(#cid,#coid)
set #c=#c+1
end
The Replace/Len test is cute, but probably very inefficient (especially in terms of memory).
A simple function with a loop will do the job.
CREATE FUNCTION [dbo].[fn_Occurences]
(
#pattern varchar(255),
#expression varchar(max)
)
RETURNS int
AS
BEGIN
DECLARE #Result int = 0;
DECLARE #index BigInt = 0
DECLARE #patLen int = len(#pattern)
SET #index = CHARINDEX(#pattern, #expression, #index)
While #index > 0
BEGIN
SET #Result = #Result + 1;
SET #index = CHARINDEX(#pattern, #expression, #index + #patLen)
END
RETURN #Result
END
Perhaps you should not store data that way. It is a bad practice to ever store a comma delimited list in a field. IT is very inefficient for querying. This should be a related table.

Resources