How can I populate a user defined table quickly? - sql-server

I'm trying to create a generic histogram function to obtain histogram data for an arbitrary table in my db.
My db has many tables, each one is roughly a few gigs in size. Only some of the columns are numeric.
I started off by trying to pass in a user defined table valued parameter. The signature for my user defined function looked something like this:
CREATE TYPE dbo.numArray AS TABLE (
number real NOT NULL
);
CREATE FUNCTION dbo.fn_numericHistogram (
#values dbo.numArray READONLY,
#numOfBreaks int = 10,
#rangeMin float = NULL,
#rangeMax float = NULL
)
That worked, but it did not meet my performance requirements because I had to insert the existing numeric column into my user defined table first. That takes a long time. This long-running insertion happened inside the calling stored procedure, and looks something like this:
DECLARE #TVP AS dbo.numArray;
-- Takes far too long
INSERT INTO #TVP (number)
SELECT myNumericColumn
FROM dbo.SomeLargeTable;
EXEC dbo.fn_numericHistogram #values = #TVP READONLY
To get around this, my next approach was to simply pass in the table name as an nvarchar but it involves a lot of string manipulation and is quite ugly.
Does that seem like a reasonable workaround? I'd much rather go with the first approach but I don't know if it's possible to populate a UDT "by reference".
Thanks
*Edit:
Assuming I can quickly populate the #values UDT with ~2gigs worth of numeric data. My function would look like this:
CREATE TYPE dbo.numArray AS TABLE (
number real NOT NULL
);
CREATE FUNCTION dbo.fn_numericHistogram (
#values dbo.numArray READONLY,
#numOfBreaks int = 10,
#rangeMin float = NULL,
#rangeMax float = NULL
)
RETURNS #output TABLE (
lowerBound float NOT NULL,
upperBound float NOT NULL,
[count] int NOT NULL
)
BEGIN;
DECLARE #intervalSize float;
IF (#rangeMin IS NULL AND #rangeMax IS NULL)
BEGIN
SELECT
#rangeMinOUT = MIN(number),
#rangeMaxOUT = MAX(number)
FROM #values;
END
SET #intervalSize = (#rangeMax - #rangeMin)/#numOfBreaks;
INSERT INTO #output (lowerBound, upperBound, [count])
SELECT #rangeMin+#intervalSize*FLOOR(number/#intervalSize) AS lowerBound,
#rangeMin+#intervalSize*FLOOR(number/#intervalSize)+#intervalSize AS upperBound,
COUNT(*) AS [count]
FROM (
-- Special Case the max values.
SELECT ISNULL(NULLIF(number, #rangeMax), #rangeMax - 0.5 * #intervalSize - #rangeMin AS number
FROM #values
) AS B
GROUP BY FLOOR(number/#intervalSize);
';
RETURN;
END;
GO
Otherwise, I'll have to pass in a table name, and the function bloats to something like this: (By the way, I'm not even sure if this would work as a function...perhaps I would need a stored procedure instead).
CREATE FUNCTION dbo.fn_numericHistogram (
#tableName nvarchar(200),
#numericColumnName nvarchar(200),
#numOfBreaks int = 10,
#rangeMin float = NULL,
#rangeMax float = NULL
)
RETURNS #output TABLE (
lowerBound float NOT NULL,
upperBound float NOT NULL,
[count] int NOT NULL
)
BEGIN;
DECLARE #intervalSize float;
IF (#rangeMin IS NULL AND #rangeMax IS NULL)
BEGIN
DECLARE #SQLQuery nvarchar(MAX);
SET #SQLQuery = N'
SELECT
#rangeMinOUT = CONVERT(float, MIN('+#numericColumnName+')),
#rangeMaxOUT = CONVERT(float, MAX('+#numericColumnName+'))
FROM '+#tableName+';
EXEC sp_executesql #SQLQuery, N'rangeMinOUT nvarchar(50) OUTPUT, rangeMaxOUT nvarchar(50) OUTPUT',
#rangeMinOUT=#rangeMin OUTPUT, #rangeMaxOUT=#rangeMax OUTPUT;
END
SET #intervalSize = (#rangeMax - #rangeMin)/#numOfBreaks;
SET #SQLQuery = N'
INSERT INTO #output (lowerBound, upperBound, [count])
SELECT '+CONVERT(nvarchar, #rangeMin)+'+'+CONVERT(nvarchar, #intervalSize)+'*FLOOR(number/'+CONVERT(nvarchar, #intervalSize)+') AS lowerBound,
'+CONVERT(nvarchar, #rangeMin)+'+'+CONVERT(nvarchar, #intervalSize)+'*FLOOR(number/'+CONVERT(nvarchar, #intervalSize)+')+'+CONVERT(nvarchar, #intervalSize)+' AS upperBound,
COUNT(*) AS [count]
FROM (
-- Special Case the max values.
SELECT ISNULL(NULLIF('+#numericColumnName+', '+CONVERT(nvarchar, #rangeMax)+'), '+CONVERT(nvarchar, #rangeMax)+' - 0.5 * '+CONVERT(nvarchar, #intervalSize)+') - '+CONVERT(nvarchar, #rangeMin)+' AS number
FROM '+#tableName+'
) AS B
GROUP BY FLOOR(number/'+CONVERT(nvarchar, #intervalSize)+');'
-- Return the results above
RETURN;
END;
GO

Related

How to iterate over a string of varying length, replacing different abbreviations with their full text. All abbreviations separated by a semicolon

My problem is this; I have a field in a table that contains values like this:
NP
NP;MC;PE
MC;AB;AT;MI;TC;WM
OS
OG
I want to convert these abbreviations to their full name. i.e. NP becomes Nuclear Power, OG becomes Oil and Gas, MI becomes Military etc.
My desired output would be:
Nuclear Power
Nuclear Power;Military;Pesticides
and so on.
I'm creating this as a function. I got it working for just the one abbreviation and then the same for two. However my issue is that I may have 5 abbreviations or 7. I know my current approach is dreadful but cannot figure out how to loop it in the right way.
Please note: I've shortened the list of abbreviations for StackOverflow but there's 25 in total.
Please further note: I did the function bottom up (I don't know why) and got the two value and single value working. I've removed anything I did for values over 3 as nothing I did worked.
ALTER FUNCTION [dbo].[get_str_full]
(
-- Add the parameters for the function here
#str_input VARCHAR(250)
)
RETURNS VARCHAR(250)
AS
BEGIN
-- Declare the return variable here
DECLARE #Result VARCHAR(250)
DECLARE #TEMPSTRING VARCHAR(250)
DECLARE #TEMPSTRING_RIGHT AS VARCHAR(250)
-- DECLARE #PI_COUNT BIGINT
DECLARE #COUNTER INT
DECLARE #TOTAL_VALS BIGINT
DECLARE #STRING_ST VARCHAR(250)
DECLARE #POS_STR BIGINT
DECLARE #REMAINING_STR VARCHAR(250)
-- Used for easy loop skips
DECLARE #LEFTSKIP AS BIGINT
SET #LEFTSKIP = 1
SET #Result = #str_input
SET #STRING_ST = #Result
SET #COUNTER = (LEN(#Result) - LEN(REPLACE(#Result,';',''))) + 1
SET #TOTAL_VALS = (LEN(#Result) - LEN(REPLACE(#Result,';',''))) + 1
-- If the string has a semicolon then there's more than one PI value
IF CHARINDEX(';', #Result) > 0
BEGIN
WHILE #COUNTER > 0
BEGIN
IF #TOTAL_VALS >= 3 -- If counter is more than 2 then there's three or more
BEGIN
DECLARE #TEMP_VAL BIGINT
SET #TEMP_VAL = 5
END
ELSE IF #TOTAL_VALS = 2-- Theres 2
BEGIN
-- Do left two chars first
IF #LEFTSKIP = 1
BEGIN
SET #TEMPSTRING = LEFT(#Result, 2)
SELECT #TEMPSTRING = CASE #TEMPSTRING
WHEN 'MC' THEN 'Military Contracting'
WHEN 'NP' THEN 'Nuclear'
WHEN 'OG' THEN 'Oil & Gas'
WHEN 'OS' THEN 'Oil Sands'
WHEN 'PM' THEN 'Palm Oil'
WHEN 'PE' THEN 'Pesticides'
ELSE #TEMPSTRING
END
SET #LEFTSKIP = 2
END
ELSE IF #LEFTSKIP = 2
BEGIN
SET #TEMPSTRING_RIGHT = RIGHT(#Result, 2)
SELECT #TEMPSTRING_RIGHT = CASE #TEMPSTRING_RIGHT
WHEN 'MC' THEN 'Military Contracting'
WHEN 'NP' THEN 'Nuclear'
WHEN 'OG' THEN 'Oil & Gas'
WHEN 'OS' THEN 'Oil Sands'
WHEN 'PM' THEN 'Palm Oil'
WHEN 'PE' THEN 'Pesticides'
ELSE #TEMPSTRING_RIGHT
END
END
END
SET #COUNTER = #COUNTER - 1
END
SET #Result = CONCAT(#TEMPSTRING,';', #TEMPSTRING_RIGHT)
END
ELSE
BEGIN
SET #Result = REPLACE(#Result, 'MC', 'Military Contracting')
SET #Result = REPLACE(#RESULT, 'NP', 'Nuclear Power')
SET #Result = REPLACE(#Result, 'OG', 'Oil & Gas')
SET #Result = REPLACE(#Result, 'OS', 'Oil Sands')
SET #Result = REPLACE(#Result, 'PM', 'Palm Oil')
SET #Result = REPLACE(#Result, 'PE', 'Pesticides')
END
-- Return the result of the function
RETURN #Result
END
First for some easily consumable sample data:
DECLARE #tranlation TABLE(tCode VARCHAR(10), tString VARCHAR(40));
DECLARE #t TABLE(String VARCHAR(1000));
INSERT #t VALUES('PE;N'),('NP'),('NP;MC;PE;XX')
INSERT #tranlation VALUES ('N','Nukes'),('NP','Nuclear Power'),('MC','Military'),
('PE','Pesticides');
Note my updated sample data which includes "XX", which has no match , and an "N" for "Nukes" which would wreck any solution which leverages REPLACE. If you are on SQL 2016+ you can use STRING_SPLIT and STRING_AGG.
SELECT
OldString = t.String,
NewString = STRING_AGG(ISNULL(tx.tString,items.[value]),';')
FROM #t AS t
OUTER APPLY STRING_SPLIT(t.String,';') AS items
LEFT JOIN #tranlation AS tx
ON items.[value] = tx.tCode
GROUP BY t.String ;
Returns:
OldString NewString
----------------- -------------------------------------------
NP Nuclear Power
NP;MC;PE;XX Nuclear Power;Military;Pesticides;XX
PE;N Pesticides;Nukes
You should really fix your table design so that you do not store multiple pieces of info in one column.
If you would like it as a function, I would strongly recommend an inline Table-Valued function rather than a scalar function.
If you have SQL Server version 2017+ you can use STRING_SPLIT and STRING_AGG for this.
CREATE OR ALTER FUNCTION GetFullStr
( #str varchar(250) )
RETURNS TABLE
AS RETURN
(
SELECT STRING_AGG(ISNULL(v.FullStr, s.value), ';') result
FROM STRING_SPLIT(#str, ';') s
LEFT JOIN (VALUES
('MC', 'Military Contracting'),
('NP', 'Nuclear'),
('OG', 'Oil & Gas'),
('OS', 'Oil Sands'),
('PM', 'Palm Oil'),
('PE', 'Pesticides')
) v(Abbr, FullStr) ON v.Abbr = s.value
);
GO
You can, and should, replace the VALUES with a real table.
On 2016 you would need FOR XML PATH instead of STRING_AGG:
CREATE OR ALTER FUNCTION GetFullStr
( #str varchar(250) )
RETURNS TABLE
AS RETURN
(
SELECT STUFF(
(SELECT ';' + ISNULL(v.FullStr, s.value)
FROM STRING_SPLIT(#str, ';') s
LEFT JOIN (VALUES
('MC', 'Military Contracting'),
('NP', 'Nuclear'),
('OG', 'Oil & Gas'),
('OS', 'Oil Sands'),
('PM', 'Palm Oil'),
('PE', 'Pesticides')
) v(Abbr, FullStr) ON v.Abbr = s.value
FOR XML PATH(''), TYPE
).value('text()[1]','varchar(2500)'),
, 1, 1, '')
);
GO
You use it like this:
SELECT s.result AS FullStr
FROM table
OUTER APPLY GetFullStr(value) AS s;
-- alternatively
SELECT (SELECT * FROM GetFullStr(value)) AS FullStr
FROM table;
You could assign your abbreviation mappings to a TABLE variable and then use that for your REPLACE. You could build this into a function, then pass your string values in.
The test below returns Military:Nuclear Power:XX.
declare #mapping table (abbrev varchar(50), fullname varchar(100))
insert into #mapping(abbrev, fullname)
values ('NP','Nuclear Power'),
('MC','Military')
declare #testString varchar(100), #newString varchar(100)
set #teststring = 'MC:NP:XX'
set #newString = #testString
SELECT #newString = REPLACE(#newString, abbrev, fullname) FROM #mapping
select #newString

Replacing individual digits in a CustomerID field SQL Server

I have some customer data that needs to be anonymised. I have customerIds which consists of numbers.
for example:
CustomerID
3937487
I need to swap each digit with an alternative, which should be enough for my requirement. Based on the following lookup table
Only issue I'm having is when I use the REPLACE function on the field:
REPLACE(REPLACE(CustomerID,2,9),9,6)
which gives me
CustomerID
3637487
It's swapping the digit 2 to a 9, then that same 9 to a 6. It needs to only replace the digits ONCE.
As I'm going to be changing millions of records in one go, using temp tables isn't possible from a performance perspective. Can this be done in one query, recursively?
I can't think of any way of accomplishing this in a single query. If I wanted to do this I'd create a function something along the lines of
CREATE FUNCTION [dbo].[AnonymiseId]
(
#Id [int]
)
RETURNS [int]
AS
BEGIN
-- Declare the return variable here
DECLARE #ResultVar int;
DECLARE #substitutions nvarchar(10) = '7295380146';
DECLARE #stringId nvarchar(100) = CONVERT(nvarchar(100), #Id);
DECLARE #i int = 1
DECLARE #substituteStringId nvarchar(100) = '';
WHILE #i <= LEN(#stringID)
BEGIN
DECLARE #char nvarchar = SUBSTRING(#stringId, #i, 1);
DECLARE #charValue int = CONVERT(int, #char);
DECLARE #subsChar nvarchar = SUBSTRING(#substitutions, #charValue + 1, 1);
SET #substituteStringId = CONCAT(#substituteStringId, #subsChar);
SET #i = #i + 1
END
SET #ResultVar = CONVERT(int, #substituteStringId);
-- Return the result of the function
RETURN #ResultVar;
END
GO
and then just use it in the query
SELECT dbo.AnonymiseId(CustomerID) FROM ???

How to separate variables and values, then insert in a table?

Problem
A stored procedure is receiving list of variables and values, and the delimiter. This stored procedure needs to insert those in a table.
--Example table
create table #tempo
(
Variable1 int,
Variable2 int,
Variable3 int
)
These are the parameters to the stored procedure:
declare #variableList varchar(100)
declare #valueList varchar(100)
declare #separator char(1)
set #variableList = 'Variable1#Variable2#Variable3'
set #valueList = '1111#2222#3333'
set #separator = '#'
Result
What I want to achieve is this:
select * from #tempo
+---------+---------+---------+
|Variable1|Variable2|Variable3|
+---------+---------+---------+
|1111 |2222 |3333 |
+---------+---------+---------+
One way to do it
I can use a loop and build dynamic SQL but I want to avoid it. Other than the obvious reasons for not using dynamic SQL, the loop structure is hard to maintain, explain and testing can become an issue too.
Ideal way
I am thinking about a more elegant way to do this, for example with string_split or coalesce etc. But cannot figure out a way without using dynamic SQL or loops.
If you always have same set of column names then it is very easy to do with pivoting, but if columns are changing then you can use the same script with dynamically adjusted list of variables, provided as a parameter or from direct reading from temp table:
INSERT INTO #tempo SELECT *
FROM (
SELECT [value], rv = 'Variable' + CAST(Row_Number() OVER ( ORDER BY (SELECT 1)) as VARCHAR)
FROM STRING_SPLIT(#valueList,#separator)
) AS src
PIVOT (MAX([value]) FOR rv IN (Variable1,Variable2,Variable3)) AS pvt;
You can always try pivoting out the data. This is just the select, but could easily have an insert wrapped into it.
We use a split string with a row ID to allow matching of two split data sets. Function is :
CREATE FUNCTION [dbo].[Split] (#RowData NVARCHAR(MAX), #SplitOn NVARCHAR(5))
RETURNS #RtnValue TABLE (Id INT IDENTITY(1, 1), Data NVARCHAR(100))
AS
BEGIN
DECLARE #Cnt INT;
SET #Cnt = 1;
WHILE (CHARINDEX(#SplitOn, #RowData) > 0)
BEGIN
INSERT INTO #RtnValue (Data)
SELECT Data = LTRIM(RTRIM(SUBSTRING(#RowData, 1, CHARINDEX(#SplitOn, #RowData) - 1)));
SET #RowData = SUBSTRING(#RowData, CHARINDEX(#SplitOn, #RowData) + 1, LEN(#RowData));
SET #Cnt = #Cnt + 1;
END;
INSERT INTO #RtnValue (Data)
SELECT Data = LTRIM(RTRIM(#RowData));
RETURN;
END;
You can then join the two sets together to give some key value pairs, and from there pivot out the data to give the format you requested. If you replace the last select with a select from any of the previous cte's then you can see how the logic unfolds.
DECLARE #variableList VARCHAR(100);
DECLARE #valueList VARCHAR(100);
DECLARE #separator CHAR(1);
SET #variableList = 'Variable1,Variable2,Variable3';
SET #valueList = '1111, 2222, 3333';
SET #separator = ',';
WITH cteVar AS (SELECT Id, Data FROM dbo.Split(#variableList, #separator) )
, cteVal AS (SELECT Id, Data FROM dbo.Split(#valueList, #separator) )
, cteData AS
(SELECT cteVar.Data VariableData
, cteVal.Data ValueData
FROM cteVar
JOIN cteVal ON cteVal.Id = cteVar.Id)
, ctePivot AS
(SELECT *
FROM cteData
PIVOT ( MAX(ValueData)
FOR VariableData IN ([Variable1], [Variable2], [Variable3])) AS PivotTable)
SELECT *
FROM ctePivot;
This is quite a long approach to it but hopefully it well help you understand the steps involved. Its worth looking at the Pivot function in general anyway, its well documented.

Stored procedure parameter contains multiple values

My stored procedure parameter contains more than 2 values (eg: create stored procedure recentAssetList #recentAssetList = (id1,id2,..)) then with these parameter how can I get data from a table?
SQL Server doesn't support that logic. Here are some options:
1. Split them amongst many parameters
create procedure yourProc
#FirstParam varchar(10),
#SecondParam varchar(10)
as
-- etc.
go
If some of these parameters may be null you can do this:
create procedure yourProc
#FirstParam varchar(10) = null,
#SecondParam varchar(10) = null
as
select *
from yourTable
where
((#FirstParam is null) or (SomeCol1 = #FirstParam)) and
((#SecondParam is null) or (SomeCol2 = #SecondParam))
go
2. Pass a read only table
create type yourTableData
as table
(
id int not null
)
go
create procedure yourProc
#yourInput yourTableData readonly
as
select *
from yourTable
where id in
(
select id
from #yourInput
)
go
You can use splittion function like that and pass the values in comcatenated string:
CREATE function [dbo].[csv2tbl](#list nvarchar(max), #delimiter nvarchar(10))
returns #res table ([index] int PRIMARY KEY, col nvarchar(max))
AS BEGIN
with tbl_for_csv as
(
select 0 as [index] ,left(#list + #delimiter+#delimiter,charindex(#delimiter,#list + #delimiter+#delimiter) -1)as col,
right(#list + #delimiter+#delimiter,len(#list + #delimiter+#delimiter) - charindex(#delimiter,#list + #delimiter+#delimiter)) as Str
union all
select [index]+1, left(Str,charindex(#delimiter,Str) - 1)as Col,
right(Str,len(Str) - charindex(#delimiter,Str)) from tbl_for_csv
where len(right(Str,len(Str) - charindex(#delimiter,Str))) > 0
)
INSERT #res
select [index], col from tbl_for_csv option (MAXRECURSION 0);
return;
END
In sql2008+ you can pass values to SP thru user defined table types variable (see #Shark answer)

How do you count the number of occurrences of a certain substring in a SQL varchar?

I have a column that has values formatted like a,b,c,d. Is there a way to count the number of commas in that value in T-SQL?
The first way that comes to mind is to do it indirectly by replacing the comma with an empty string and comparing the lengths
Declare #string varchar(1000)
Set #string = 'a,b,c,d'
select len(#string) - len(replace(#string, ',', ''))
Quick extension of cmsjr's answer that works for strings with more than one character.
CREATE FUNCTION dbo.CountOccurrencesOfString
(
#searchString nvarchar(max),
#searchTerm nvarchar(max)
)
RETURNS INT
AS
BEGIN
return (LEN(#searchString)-LEN(REPLACE(#searchString,#searchTerm,'')))/LEN(#searchTerm)
END
Usage:
SELECT * FROM MyTable
where dbo.CountOccurrencesOfString(MyColumn, 'MyString') = 1
You can compare the length of the string with one where the commas are removed:
len(value) - len(replace(value,',',''))
The answer by #csmjr has a problem in some instances.
His answer was to do this:
Declare #string varchar(1000)
Set #string = 'a,b,c,d'
select len(#string) - len(replace(#string, ',', ''))
This works in most scenarios, however, try running this:
DECLARE #string VARCHAR(1000)
SET #string = 'a,b,c,d ,'
SELECT LEN(#string) - LEN(REPLACE(#string, ',', ''))
For some reason, REPLACE gets rid of the final comma but ALSO the space just before it (not sure why). This results in a returned value of 5 when you'd expect 4. Here is another way to do this which will work even in this special scenario:
DECLARE #string VARCHAR(1000)
SET #string = 'a,b,c,d ,'
SELECT LEN(REPLACE(#string, ',', '**')) - LEN(#string)
Note that you don't need to use asterisks. Any two-character replacement will do. The idea is that you lengthen the string by one character for each instance of the character you're counting, then subtract the length of the original. It's basically the opposite method of the original answer which doesn't come with the strange trimming side-effect.
Building on #Andrew's solution, you'll get much better performance using a non-procedural table-valued-function and CROSS APPLY:
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
/* Usage:
SELECT t.[YourColumn], c.StringCount
FROM YourDatabase.dbo.YourTable t
CROSS APPLY dbo.CountOccurrencesOfString('your search string', t.[YourColumn]) c
*/
CREATE FUNCTION [dbo].[CountOccurrencesOfString]
(
#searchTerm nvarchar(max),
#searchString nvarchar(max)
)
RETURNS TABLE
AS
RETURN
SELECT (DATALENGTH(#searchString)-DATALENGTH(REPLACE(#searchString,#searchTerm,'')))/NULLIF(DATALENGTH(#searchTerm), 0) AS StringCount
Declare #string varchar(1000)
DECLARE #SearchString varchar(100)
Set #string = 'as as df df as as as'
SET #SearchString = 'as'
select ((len(#string) - len(replace(#string, #SearchString, ''))) -(len(#string) -
len(replace(#string, #SearchString, ''))) % 2) / len(#SearchString)
Accepted answer is correct ,
extending it to use 2 or more character in substring:
Declare #string varchar(1000)
Set #string = 'aa,bb,cc,dd'
Set #substring = 'aa'
select (len(#string) - len(replace(#string, #substring, '')))/len(#substring)
Darrel Lee I think has a pretty good answer. Replace CHARINDEX() with PATINDEX(), and you can do some weak regex searching along a string, too...
Like, say you use this for #pattern:
set #pattern='%[-.|!,'+char(9)+']%'
Why would you maybe want to do something crazy like this?
Say you're loading delimited text strings into a staging table, where the field holding the data is something like a varchar(8000) or nvarchar(max)...
Sometimes it's easier/faster to do ELT (Extract-Load-Transform) with data rather than ETL (Extract-Transform-Load), and one way to do this is to load the delimited records as-is into a staging table, especially if you may want an simpler way to see the exceptional records rather than deal with them as part of an SSIS package...but that's a holy war for a different thread.
If we know there is a limitation on LEN and space, why cant we replace the space first?
Then we know there is no space to confuse LEN.
len(replace(#string, ' ', '-')) - len(replace(replace(#string, ' ', '-'), ',', ''))
Use this code, it is working perfectly.
I have create a sql function that accept two parameters, the first param is the long string that we want to search into it,and it can accept string length up to 1500 character(of course you can extend it or even change it to text datatype).
And the second parameter is the substring that we want to calculate the number of its occurance(its length is up to 200 character, of course you can change it to what your need). and the output is an integer, represent the number of frequency.....enjoy it.
CREATE FUNCTION [dbo].[GetSubstringCount]
(
#InputString nvarchar(1500),
#SubString NVARCHAR(200)
)
RETURNS int
AS
BEGIN
declare #K int , #StrLen int , #Count int , #SubStrLen int
set #SubStrLen = (select len(#SubString))
set #Count = 0
Set #k = 1
set #StrLen =(select len(#InputString))
While #K <= #StrLen
Begin
if ((select substring(#InputString, #K, #SubStrLen)) = #SubString)
begin
if ((select CHARINDEX(#SubString ,#InputString)) > 0)
begin
set #Count = #Count +1
end
end
Set #K=#k+1
end
return #Count
end
In SQL 2017 or higher, you can use this:
declare #hits int = 0
set #hits = (select value from STRING_SPLIT('F609,4DFA,8499',','));
select count(#hits)
Improved version based on top answer and other answers:
Wrapping the string with delimiters ensures that LEN works properly. Making the replace character string one character longer than the match string removes the need for division.
CREATE FUNCTION dbo.MatchCount(#value nvarchar(max), #match nvarchar(max))
RETURNS int
BEGIN
RETURN LEN('[' + REPLACE(#value,#match,REPLICATE('*', LEN('[' + #match + ']') - 1)) + ']') - LEN('['+#value+']')
END
DECLARE #records varchar(400)
SELECT #records = 'a,b,c,d'
select LEN(#records) as 'Before removing Commas' , LEN(#records) - LEN(REPLACE(#records, ',', '')) 'After Removing Commans'
The following should do the trick for both single character and multiple character searches:
CREATE FUNCTION dbo.CountOccurrences
(
#SearchString VARCHAR(1000),
#SearchFor VARCHAR(1000)
)
RETURNS TABLE
AS
RETURN (
SELECT COUNT(*) AS Occurrences
FROM (
SELECT ROW_NUMBER() OVER (ORDER BY O.object_id) AS n
FROM sys.objects AS O
) AS N
JOIN (
VALUES (#SearchString)
) AS S (SearchString)
ON
SUBSTRING(S.SearchString, N.n, LEN(#SearchFor)) = #SearchFor
);
GO
---------------------------------------------------------------------------------------
-- Test the function for single and multiple character searches
---------------------------------------------------------------------------------------
DECLARE #SearchForComma VARCHAR(10) = ',',
#SearchForCharacters VARCHAR(10) = 'de';
DECLARE #TestTable TABLE
(
TestData VARCHAR(30) NOT NULL
);
INSERT INTO #TestTable
(
TestData
)
VALUES
('a,b,c,de,de ,d e'),
('abc,de,hijk,,'),
(',,a,b,cde,,');
SELECT TT.TestData,
CO.Occurrences AS CommaOccurrences,
CO2.Occurrences AS CharacterOccurrences
FROM #TestTable AS TT
OUTER APPLY dbo.CountOccurrences(TT.TestData, #SearchForComma) AS CO
OUTER APPLY dbo.CountOccurrences(TT.TestData, #SearchForCharacters) AS CO2;
The function can be simplified a bit using a table of numbers (dbo.Nums):
RETURN (
SELECT COUNT(*) AS Occurrences
FROM dbo.Nums AS N
JOIN (
VALUES (#SearchString)
) AS S (SearchString)
ON
SUBSTRING(S.SearchString, N.n, LEN(#SearchFor)) = #SearchFor
);
I finally write this function that should cover all the possible situations, adding a char prefix and suffix to the input. this char is evaluated to be different to any of the char conteined in the search parameter, so it can't affect the result.
CREATE FUNCTION [dbo].[CountOccurrency]
(
#Input nvarchar(max),
#Search nvarchar(max)
)
RETURNS int AS
BEGIN
declare #SearhLength as int = len('-' + #Search + '-') -2;
declare #conteinerIndex as int = 255;
declare #conteiner as char(1) = char(#conteinerIndex);
WHILE ((CHARINDEX(#conteiner, #Search)>0) and (#conteinerIndex>0))
BEGIN
set #conteinerIndex = #conteinerIndex-1;
set #conteiner = char(#conteinerIndex);
END;
set #Input = #conteiner + #Input + #conteiner
RETURN (len(#Input) - len(replace(#Input, #Search, ''))) / #SearhLength
END
usage
select dbo.CountOccurrency('a,b,c,d ,', ',')
Declare #MainStr nvarchar(200)
Declare #SubStr nvarchar(10)
Set #MainStr = 'nikhildfdfdfuzxsznikhilweszxnikhil'
Set #SubStr = 'nikhil'
Select (Len(#MainStr) - Len(REPLACE(#MainStr,#SubStr,'')))/Len(#SubStr)
this T-SQL code finds and prints all occurrences of pattern #p in sentence #s. you can do any processing on the sentence afterward.
declare #old_hit int = 0
declare #hit int = 0
declare #i int = 0
declare #s varchar(max)='alibcalirezaalivisualization'
declare #p varchar(max)='ali'
while #i<len(#s)
begin
set #hit=charindex(#p,#s,#i)
if #hit>#old_hit
begin
set #old_hit =#hit
set #i=#hit+1
print #hit
end
else
break
end
the result is:
1
6
13
20
I ended up using a CTE table for this,
CREATE TABLE #test (
[id] int,
[field] nvarchar(500)
)
INSERT INTO #test ([id], [field])
VALUES (1, 'this is a test string http://url, and https://google.com'),
(2, 'another string, hello world http://example.com'),
(3, 'a string with no url')
SELECT *
FROM #test
;WITH URL_count_cte ([id], [url_index], [field])
AS
(
SELECT [id], CHARINDEX('http', [field], 0)+1 AS [url_index], [field]
FROM #test AS [t]
WHERE CHARINDEX('http', [field], 0) != 0
UNION ALL
SELECT [id], CHARINDEX('http', [field], [url_index])+1 AS [url_index], [field]
FROM URL_count_cte
WHERE CHARINDEX('http', [field], [url_index]) > 0
)
-- total urls
SELECT COUNT(1)
FROM URL_count_cte
-- urls per row
SELECT [id], COUNT(1) AS [url_count]
FROM URL_count_cte
GROUP BY [id]
Using this function, you can get the number of repetitions of words in a text.
/****** Object: UserDefinedFunction [dbo].[fn_getCountKeywords] Script Date: 22/11/2021 17:52:00 ******/
DROP FUNCTION IF EXISTS [dbo].[fn_getCountKeywords]
GO
/****** Object: UserDefinedFunction [dbo].[fn_getCountKeywords] Script Date: 2211/2021 17:52:00 ******/
SET ANSI_NULLS OFF
GO
SET QUOTED_IDENTIFIER ON
GO
-- =============================================
-- Author: m_Khezrian
-- Create date: 2021/11/22-17:52
-- Description: Return Count Keywords In Input Text
-- =============================================
Create OR Alter Function [dbo].[fn_getCountKeywords]
(#Text nvarchar(max)
,#Keywords nvarchar(max)
)
RETURNS #Result TABLE
(
[ID] int Not Null IDENTITY PRIMARY KEY
,[Keyword] nvarchar(max) Not Null
,[Cnt] int Not Null Default(0)
)
/*With ENCRYPTION*/ As
Begin
Declare #Key nvarchar(max);
Declare #Cnt int;
Declare #I int;
Set #I = 0 ;
--Set #Text = QUOTENAME(#Text);
Insert Into #Result
([Keyword])
Select Trim([value])
From String_Split(#Keywords,N',')
Group By [value]
Order By Len([value]) Desc;
Declare CntKey_Cursor Insensitive Cursor For
Select [Keyword]
From #Result
Order By [ID];
Open CntKey_Cursor;
Fetch Next From CntKey_Cursor Into #Key;
While (##Fetch_STATUS = 0) Begin
Set #Cnt = 0;
While (PatIndex(N'%'+#Key+'%',#Text) > 0) Begin
Set #Cnt += 1;
Set #I += 1 ;
Set #Text = Stuff(#Text,PatIndex(N'%'+#Key+'%',#Text),len(#Key),N'{'+Convert(nvarchar,#I)+'}');
--Set #Text = Replace(#Text,#Key,N'{'+Convert(nvarchar,#I)+'}');
End--While
Update #Result
Set [Cnt] = #Cnt
Where ([Keyword] = #Key);
Fetch Next From CntKey_Cursor Into #Key;
End--While
Close CntKey_Cursor;
Deallocate CntKey_Cursor;
Return
End
GO
--Test
Select *
From dbo.fn_getCountKeywords(
N'<U+0001F4E3> MARKET IMPACT Euro area Euro CPIarea annual inflation up to 3.0% MaCPIRKET forex'
,N'CPI ,core,MaRKET , Euro area'
)
Go
Reference https://learn.microsoft.com/en-us/sql/t-sql/functions/string-split-transact-sql?view=sql-server-ver15
Example:
SELECT s.*
,s.[Number1] - (SELECT COUNT(Value)
FROM string_split(s.[StringColumn],',')
WHERE RTRIM(VALUE) <> '')
FROM TableName AS s
Applies to: SQL Server 2016 (13.x) and later
You can use the following stored procedure to fetch , values.
IF EXISTS (SELECT * FROM sys.objects
WHERE object_id = OBJECT_ID(N'[dbo].[sp_parsedata]') AND type in (N'P', N'PC'))
DROP PROCEDURE [dbo].[sp_parsedata]
GO
create procedure sp_parsedata
(#cid integer,#st varchar(1000))
as
declare #coid integer
declare #c integer
declare #c1 integer
select #c1=len(#st) - len(replace(#st, ',', ''))
set #c=0
delete from table1 where complainid=#cid;
while (#c<=#c1)
begin
if (#c<#c1)
begin
select #coid=cast(replace(left(#st,CHARINDEX(',',#st,1)),',','') as integer)
select #st=SUBSTRING(#st,CHARINDEX(',',#st,1)+1,LEN(#st))
end
else
begin
select #coid=cast(#st as integer)
end
insert into table1(complainid,courtid) values(#cid,#coid)
set #c=#c+1
end
The Replace/Len test is cute, but probably very inefficient (especially in terms of memory).
A simple function with a loop will do the job.
CREATE FUNCTION [dbo].[fn_Occurences]
(
#pattern varchar(255),
#expression varchar(max)
)
RETURNS int
AS
BEGIN
DECLARE #Result int = 0;
DECLARE #index BigInt = 0
DECLARE #patLen int = len(#pattern)
SET #index = CHARINDEX(#pattern, #expression, #index)
While #index > 0
BEGIN
SET #Result = #Result + 1;
SET #index = CHARINDEX(#pattern, #expression, #index + #patLen)
END
RETURN #Result
END
Perhaps you should not store data that way. It is a bad practice to ever store a comma delimited list in a field. IT is very inefficient for querying. This should be a related table.

Resources