Executing a query on csv data stored in an ntext column - sql-server

Say that the raw text of CSV exports and an associated timestamps are stored in a database, where one record is equivalent to one export.
Does anyone have a way to execute a query on the CSV file stored in that field without creating a second connection to the database or exporting the data to a file and then reopening it using the csv text driver?
Assume that:
1) you can't write out a physical file onto the server in the solution
2) you can't a second connection to the server w/ OPENROWSET (servers, usernames & passwords change)
3) that it must be a 100% SQL solution - must be able to be run as an SP
4) that you only need to work with one record at time - the solution doesn't need to account for selecting from multiple csv files stored in the DB.

My solution would be to create a UDF that will parse the CSV data into a table variable. Then, in the SP, retrieve the CSV, pass it to the UDF, then run the query against the table variable.
First, create a UDF to return a table from the CSV value (uses CHAR(13) to determine new lines, may need to be altered to work with your data):
CREATE FUNCTION [dbo].[fnParseCSV] (#InputString NVARCHAR(MAX), #Delimiter NCHAR(1) = ',')
RETURNS #tbl TABLE (ID int, Val NVARCHAR(64)) AS
BEGIN
declare #singleLine nvarchar(max)
declare #id int
declare #val varchar(64)
WHILE LEN(#InputString) > 0 BEGIN
IF CHARINDEX(char(13), #InputString) > 0 BEGIN
SELECT #singleLine = SUBSTRING(#InputString, 1, CHARINDEX(char(13), #InputString) - 1)
IF CHARINDEX(#Delimiter, #singleline) > 0 BEGIN
SELECT #id = convert(int, SUBSTRING(#singleline, 1, CHARINDEX(#Delimiter, #singleline) - 1))
SELECT #val = RIGHT(#singleline, LEN(#singleline) - CHARINDEX(#Delimiter, #singleline) )
INSERT INTO #tbl (id, val) values (#id, #val)
END
SELECT #InputString = RIGHT(#InputString, LEN(#InputString) - CHARINDEX(char(13), #InputString) )
END
ELSE
BEGIN
IF CHARINDEX(#Delimiter, #inputString) > 0
BEGIN
SELECT #id = convert(int, SUBSTRING(#inputString, 1, CHARINDEX(#Delimiter, #inputString) - 1))
SELECT #val = RIGHT(#inputString, LEN(#inputString) - CHARINDEX(#Delimiter, #inputString) )
INSERT INTO #tbl (id, val) values (#id, #val)
END
set #inputString = ''
END
END
RETURN
END
Then run the query against that output:
select * from dbo.fnParseCsv('123,val1' + char(13) + '456,val2' + CHAR(13) + '789,val3', ',')

You could set up a series of user-defined functions which could parse through the column. It would likely be slow and wouldn't be robust at all.
As an example though (with no real error checking, etc. and only minimally tested):
IF OBJECT_ID('dbo.Test_CSV_Search') IS NOT NULL
DROP TABLE dbo.Test_CSV_Search
GO
CREATE TABLE dbo.Test_CSV_Search
(
my_id INT IDENTITY NOT NULL,
txt VARCHAR(MAX) NOT NULL,
CONSTRAINT PK_Test_CSV_Search PRIMARY KEY CLUSTERED (my_id)
)
GO
INSERT INTO dbo.Test_CSV_Search (txt) VALUES ('11, 12, 13, 14,15,16
21,22, 23,24, 25,26
31,22,33,34,35,36')
GO
IF OBJECT_ID('dbo.Get_CSV_Row') IS NOT NULL
DROP FUNCTION dbo.Get_CSV_Row
GO
CREATE FUNCTION dbo.Get_CSV_Row
(#my_id INT, #col_num SMALLINT, #search_value VARCHAR(100))
RETURNS #results TABLE (row_num INT, row_txt VARCHAR(MAX))
AS
BEGIN
DECLARE
#csv_txt VARCHAR(MAX),
#full_row VARCHAR(MAX),
#start_pos INT,
#end_pos INT,
#col_txt VARCHAR(100),
#cur_col SMALLINT,
#line_start INT,
#line_end INT,
#row_num INT
SELECT #csv_txt = txt + CHAR(10) FROM dbo.Test_CSV_Search WHERE my_id = #my_id
SELECT
#line_start = 1,
#cur_col = 1,
#start_pos = 1,
#row_num = 1
WHILE (CHARINDEX(CHAR(10), #csv_txt, #line_start) > 0)
BEGIN
SELECT
#line_end = CHARINDEX(CHAR(10), #csv_txt, #line_start),
#end_pos = CHARINDEX(',', #csv_txt, #start_pos)
WHILE (#cur_col < #col_num)
BEGIN
SET #start_pos = #end_pos + 1
SET #end_pos = CHARINDEX(',', #csv_txt, #start_pos)
SET #cur_col = #cur_col + 1
END
IF (RTRIM(LTRIM(SUBSTRING(#csv_txt, #start_pos, #end_pos - #start_pos))) = #search_value)
BEGIN
INSERT INTO #results (row_num, row_txt) VALUES (#row_num, RTRIM(LTRIM(SUBSTRING(#csv_txt, #line_start, #line_end - #line_start))))
END
SELECT
#line_start = #line_end + 1,
#start_pos = #line_end + 1,
#cur_col = 1,
#row_num = #row_num + 1
END
RETURN
END
GO
SELECT * FROM dbo.Get_CSV_Row(1, 1, '11')

Related

How I can replace all values in table dynamically in SQL Server 2012?

My current table is like
where UserId has 14 columns from ys_ar_item1 to ys_ar_item14 for multiple clients. I have to change or update the values in this table such that 0 should be replaced by 1, 1 should be replaced by 2, 2 should be replaced by 3 up to 6 should be replaced by 7, and nil should be replaced by 0.
For example: ys_ar_item1 should be 31245 instead of 20134.
Here is Sample table for 3 columns, it will be for all columns that is 14.
Note - It would be fine to create a new table with UserId and 14 columns.
Like already mentioned, this is a bad database design.
But you can try the following function to get the desired result:
CREATE FUNCTION ReplaceNumbers
(
#str varchar(max)
)
RETURNS varchar(max)
AS
BEGIN
DECLARE #result varchar(max)
DECLARE #cnt int=1
WHILE #cnt <= LEN(#str)
BEGIN
SET #result = CONCAT(#result,
CASE WHEN SUBSTRING(#str,#cnt,1) IN ('0','1','2','3','4','5','6')
THEN CAST(PARSE(SUBSTRING(#str,#cnt,1) AS int)+1 AS varchar)
ELSE SUBSTRING(#str,#cnt,1) END)
SET #cnt = #cnt + 1
END;
RETURN REPLACE(#result, 'nil', '0')
END
Use it like this:
UPDATE myTable SET ys_ar_item1=dbo.ReplaceNumbers(ys_ar_item1)
It's really bad design and you store numbers as strings which makes things hard, but I can think just of using those functions IIF(), TRY_CAST() and REPLICATE():
CREATE TABLE Sample (
UserID INT,
Ys_Ar_Item1 VARCHAR(10),
Ys_Ar_Item2 VARCHAR(10),
Ys_Ar_Item3 VARCHAR(10)
-- Other columns
);
INSERT INTO Sample VALUES
(1, '20134', 'nil NULL', '02341');
SELECT UserID,
IIF(TRY_CAST(Ys_Ar_Item1 AS INT) IS NULL, '0', CAST( REPLICATE('1', LEN(Ys_Ar_Item1) ) AS INT) + CAST(Ys_Ar_Item1 AS INT)) AS Ys_Ar_Item1,
IIF(TRY_CAST(Ys_Ar_Item2 AS INT) IS NULL, '0', CAST( REPLICATE('1', LEN(Ys_Ar_Item2) ) AS INT) + CAST(Ys_Ar_Item2 AS INT)) AS Ys_Ar_Item2,
IIF(TRY_CAST(Ys_Ar_Item3 AS INT) IS NULL, '0', CAST( REPLICATE('1', LEN(Ys_Ar_Item3) ) AS INT) + CAST(Ys_Ar_Item3 AS INT)) AS Ys_Ar_Item3
FROM Sample;
Note:
In the sample data you include in your question as an image, there is no number >6, this solution will fail if you have such numbers.

Can anyone identify why my loop isn't working correctly?

I am trying to do an insert, so that when a clientID value is NULL then insert the client data into the client table.
I have a loop that cycles through data entered into a temp table, when the clientID is null it should do an insert and choose the next sequential client reference for that client then delete the row from the temp table and move onto the next.
The problem is when the loop does the second insert or more, it is using the SAME client reference even though I have specified +1. The below is an extract of the loop, can anyone figure out why after the first insert the client reference stays the same? If I run the insert by itself with the loop and select #result it shows the vaues sequentially so I don't understand why when the script runs it doesn't insert the reference sequentially.
Declare #Id int
WHILE EXISTS(SELECT * FROM #Temp)
begin
select top 1 #id = ID from #temp
IF (select clientID from #m1 where id = #id) is null AND (select renewalinsuredid from #m1 where id=#id) is not null and (select renewalmasterID from #m1 where id=#id) is not null
BEGIN
declare #result varchar(10)
SELECT #Result = (MAX(CAST(SUBSTRING(ClientReference1,3,6) AS INTEGER)) + 1) FROM Client
set #result = 'CR0' + #result
INSERT INTO Client (clientid,InsuredName,InsuredId,MasterInsuredId,ClientReference1)
SELECT newid(),insuredname,RenewalInsuredID,RenewalMasterID,#result from #M1 where id = #id
PRINT 'Client ref '+ cast(#result as varchar(64)) +' inserted for policy ' + #result2
END
DELETE from #temp where ID = #Id
END
you do...
SELECT #Result = (MAX(CAST(SUBSTRING(ClientReference1,3,6) AS INTEGER)) + 1) FROM Client
...to get the maximum client reference CAST to an integer, then add 1 to it - this doesn't guarantee that #Result is 'greater' than all the other ClientReference1, because ClientReference1 appears to be a text field - consider a field '9', cast it to integer, and add 1 - what have you got? A 10 - if I change back to text, then '10' < '9' in terms of alphanumerics
interchanging from numeric to string, and sorting numerics that have changed to strings, can have unwanted effects. Also you add'CR0' to the start of the string, that could confuse things possibly.
So I managed to get it to work after converting the value. It seems to be due to the datatype being int as when converted to varchar the insert works correctly. The main thing that I don't understand is how if i just did select #result to see what the output was - it was correct every time, just didn't seem to insert the value correctly. Thanks for the help people.
declare #result varchar(10)
declare #Length int
declare #Refresult varchar(10)
SELECT #Result = (MAX(CAST(SUBSTRING(ClientReference1,3,6) AS INTEGER)) + 1) FROM Client
SET #Length = CONVERT(INT,#Result)
SET #Result = CONVERT(VARCHAR(10),#Length)
SET #Length = LEN(#Result)
IF(#Length =5)
SET #Result = '0' + #Result
IF #Result IS NULL
BEGIN
SET #Result = '00000' + '1'
END
SET #Refresult = 'CR' + #Result

Search for a common prefix between two strings

I have a Stored Procedure:
ALTER PROCEDURE [dbo].[traxs_Paybook_Data_Validate]
#session_id varchar(30)
#paybook_start_number varchar(30)
#paybook_end_number varchar(30)
AS
UPDATE traxs_temp..__PaybookImport SET
/* BEGIN CHANGE */
prefix = null,
start_number = CAST(#paybook_start_number AS int),
end_number = CAST(#paybook_end_number AS int)
/* END CHANGE */
WHERE
session_id = #session_id
Values were like:
#paybook_start_number = 100
#paybook_end_number = 200
Now paybook numbers can have a prefix, i.e:
#paybook_start_number = ABC100
#paybook_end_number = ABC200
Prefix is not always the same, neither is its length. I need to find the prefix if one, store it into prefix and remove it from paybook numbers before casting them.
Thanks
Try this:
DECLARE #z VARCHAR(32) = 'ukasd10';
SELECT LEFT(#z, PATINDEX('%[0-9]%', #z) - 1) AS Prefix,REPLACE(SUBSTRING(#z, PATINDEX('%[0-9]%', #z), LEN(#z)), ',', '') AS Digits
and likewise use this logic to update the column Prefix....
Thanks
You need a Function to extract Number/Numeric value from your input string and a function to extract Alphabets from the Input string.
Function To Extract Numbers
CREATE FUNCTION dbo.fn_Extract_Numbers
(
#string NVARCHAR(100)
)
RETURNS INT
AS
BEGIN
DECLARE #int_Value INT;
SELECT #int_Value = LEFT(subsrt, PATINDEX('%[^0-9]%', subsrt + 't') - 1)
FROM (
SELECT subsrt = SUBSTRING(#string, pos, LEN(#string))
FROM (
SELECT #string AS string , PATINDEX('%[0-9]%', #string) AS Pos
) d
) t
RETURN #int_Value;
END
Function To Extract Alphabets
CREATE FUNCTION dbo.fn_Extract_Alphabets
(
#string NVARCHAR(100)
)
RETURNS NVARCHAR(100)
AS
BEGIN
DECLARE #Alpha_Value NVARCHAR(100);
SELECT #Alpha_Value = LEFT(subsrt, PATINDEX('%[^a-z]%', subsrt + 't') - 1)
FROM (
SELECT subsrt = SUBSTRING(#string, pos, LEN(#string))
FROM (
SELECT #string AS string , PATINDEX('%[a-z]%', #string) AS Pos
) d
) t
RETURN #Alpha_Value;
END
Now use these functions inside your stored procedure to extract the Alphabet/Prefix bit and the Number bit and store them in the target columns.
Something like....
ALTER PROCEDURE [dbo].[traxs_Paybook_Data_Validate]
#session_id varchar(30)
#paybook_start_number varchar(30)
#paybook_end_number varchar(30)
AS
DECLARE #Start_Num_Prefix VARCHAR(100);
DECLARE #End_Num_Prefix VARCHAR(100);
DECLARE #Start_Num_Numbers INT;
DECLARE #End_Num_Numbers INT;
SELECT #Start_Num_Prefix = dbo.fn_Extract_Alphabets(#paybook_start_number)
SELECT #End_Num_Prefix = dbo.fn_Extract_Alphabets(#paybook_end_number)
SELECT #Start_Num_Numbers = dbo.fn_Extract_Numbers(#paybook_start_number)
SELECT #End_Num_Numbers = dbo.fn_Extract_Numbers(#paybook_end_number)
..... rest of your procedure and so on....

Search for value of split field

I have a field with multiple foreign keys which I need to search for a given value.
Say admin_ids :: 24,56,78
How do I search that field and get the following results:
24 = true; 6 = false; 7 = false
I cannot change the schema.
Any ideas?
What I've done in the past is create a table valued function to split the string into the distinct values, provided they're delimited by the same...delimiter.
There are examples out there on the web, for instance:
http://www.sqlservercentral.com/blogs/querying-microsoft-sql-server/2013/09/19/how-to-split-a-string-by-delimited-char-in-sql-server/
Here is an example, converted into a script:
DECLARE
#Data NVARCHAR(MAX) = '24,56,78',
#Delimeter NVARCHAR(MAX) = ','
DECLARE #Values TABLE (ID INT IDENTITY(1,1), Data NVARCHAR(MAX))
DECLARE #Users TABLE (ID INT)
INSERT INTO #Users (ID)
VALUES (1),(6),(7),(24),(30),(56)
DECLARE #Iterator INT
SET #Iterator = 1
DECLARE #FoundIndex INT
SET #FoundIndex = CHARINDEX(#Delimeter,#Data)
WHILE (#FoundIndex>0)
BEGIN
INSERT INTO #Values (data)
SELECT
Data = LTRIM(RTRIM(SUBSTRING(#Data, 1, #FoundIndex - 1)))
SET #Data = SUBSTRING(#Data,
#FoundIndex + DATALENGTH(#Delimeter) / 2,
LEN(#Data))
SET #Iterator = #Iterator + 1
SET #FoundIndex = CHARINDEX(#Delimeter, #Data)
END
INSERT INTO #Values (Data)
SELECT Data = LTRIM(RTRIM(#Data))
SELECT
usr.ID,
CASE ISNULL(val.Data, -1)
WHEN -1 THEN 'False'
ELSE 'True'
END
FROM #Users usr
LEFT JOIN #Values val ON val.Data = usr.ID
Hopefully this gives you enough to get started, if a TVF is the way you want to go :)
P.S. There are lots of ways to do this and I'm sure some are better than the one presented..this works and if the field is only varchar(255), then the efficiency should be fine.

How do you count the number of occurrences of a certain substring in a SQL varchar?

I have a column that has values formatted like a,b,c,d. Is there a way to count the number of commas in that value in T-SQL?
The first way that comes to mind is to do it indirectly by replacing the comma with an empty string and comparing the lengths
Declare #string varchar(1000)
Set #string = 'a,b,c,d'
select len(#string) - len(replace(#string, ',', ''))
Quick extension of cmsjr's answer that works for strings with more than one character.
CREATE FUNCTION dbo.CountOccurrencesOfString
(
#searchString nvarchar(max),
#searchTerm nvarchar(max)
)
RETURNS INT
AS
BEGIN
return (LEN(#searchString)-LEN(REPLACE(#searchString,#searchTerm,'')))/LEN(#searchTerm)
END
Usage:
SELECT * FROM MyTable
where dbo.CountOccurrencesOfString(MyColumn, 'MyString') = 1
You can compare the length of the string with one where the commas are removed:
len(value) - len(replace(value,',',''))
The answer by #csmjr has a problem in some instances.
His answer was to do this:
Declare #string varchar(1000)
Set #string = 'a,b,c,d'
select len(#string) - len(replace(#string, ',', ''))
This works in most scenarios, however, try running this:
DECLARE #string VARCHAR(1000)
SET #string = 'a,b,c,d ,'
SELECT LEN(#string) - LEN(REPLACE(#string, ',', ''))
For some reason, REPLACE gets rid of the final comma but ALSO the space just before it (not sure why). This results in a returned value of 5 when you'd expect 4. Here is another way to do this which will work even in this special scenario:
DECLARE #string VARCHAR(1000)
SET #string = 'a,b,c,d ,'
SELECT LEN(REPLACE(#string, ',', '**')) - LEN(#string)
Note that you don't need to use asterisks. Any two-character replacement will do. The idea is that you lengthen the string by one character for each instance of the character you're counting, then subtract the length of the original. It's basically the opposite method of the original answer which doesn't come with the strange trimming side-effect.
Building on #Andrew's solution, you'll get much better performance using a non-procedural table-valued-function and CROSS APPLY:
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
/* Usage:
SELECT t.[YourColumn], c.StringCount
FROM YourDatabase.dbo.YourTable t
CROSS APPLY dbo.CountOccurrencesOfString('your search string', t.[YourColumn]) c
*/
CREATE FUNCTION [dbo].[CountOccurrencesOfString]
(
#searchTerm nvarchar(max),
#searchString nvarchar(max)
)
RETURNS TABLE
AS
RETURN
SELECT (DATALENGTH(#searchString)-DATALENGTH(REPLACE(#searchString,#searchTerm,'')))/NULLIF(DATALENGTH(#searchTerm), 0) AS StringCount
Declare #string varchar(1000)
DECLARE #SearchString varchar(100)
Set #string = 'as as df df as as as'
SET #SearchString = 'as'
select ((len(#string) - len(replace(#string, #SearchString, ''))) -(len(#string) -
len(replace(#string, #SearchString, ''))) % 2) / len(#SearchString)
Accepted answer is correct ,
extending it to use 2 or more character in substring:
Declare #string varchar(1000)
Set #string = 'aa,bb,cc,dd'
Set #substring = 'aa'
select (len(#string) - len(replace(#string, #substring, '')))/len(#substring)
Darrel Lee I think has a pretty good answer. Replace CHARINDEX() with PATINDEX(), and you can do some weak regex searching along a string, too...
Like, say you use this for #pattern:
set #pattern='%[-.|!,'+char(9)+']%'
Why would you maybe want to do something crazy like this?
Say you're loading delimited text strings into a staging table, where the field holding the data is something like a varchar(8000) or nvarchar(max)...
Sometimes it's easier/faster to do ELT (Extract-Load-Transform) with data rather than ETL (Extract-Transform-Load), and one way to do this is to load the delimited records as-is into a staging table, especially if you may want an simpler way to see the exceptional records rather than deal with them as part of an SSIS package...but that's a holy war for a different thread.
If we know there is a limitation on LEN and space, why cant we replace the space first?
Then we know there is no space to confuse LEN.
len(replace(#string, ' ', '-')) - len(replace(replace(#string, ' ', '-'), ',', ''))
Use this code, it is working perfectly.
I have create a sql function that accept two parameters, the first param is the long string that we want to search into it,and it can accept string length up to 1500 character(of course you can extend it or even change it to text datatype).
And the second parameter is the substring that we want to calculate the number of its occurance(its length is up to 200 character, of course you can change it to what your need). and the output is an integer, represent the number of frequency.....enjoy it.
CREATE FUNCTION [dbo].[GetSubstringCount]
(
#InputString nvarchar(1500),
#SubString NVARCHAR(200)
)
RETURNS int
AS
BEGIN
declare #K int , #StrLen int , #Count int , #SubStrLen int
set #SubStrLen = (select len(#SubString))
set #Count = 0
Set #k = 1
set #StrLen =(select len(#InputString))
While #K <= #StrLen
Begin
if ((select substring(#InputString, #K, #SubStrLen)) = #SubString)
begin
if ((select CHARINDEX(#SubString ,#InputString)) > 0)
begin
set #Count = #Count +1
end
end
Set #K=#k+1
end
return #Count
end
In SQL 2017 or higher, you can use this:
declare #hits int = 0
set #hits = (select value from STRING_SPLIT('F609,4DFA,8499',','));
select count(#hits)
Improved version based on top answer and other answers:
Wrapping the string with delimiters ensures that LEN works properly. Making the replace character string one character longer than the match string removes the need for division.
CREATE FUNCTION dbo.MatchCount(#value nvarchar(max), #match nvarchar(max))
RETURNS int
BEGIN
RETURN LEN('[' + REPLACE(#value,#match,REPLICATE('*', LEN('[' + #match + ']') - 1)) + ']') - LEN('['+#value+']')
END
DECLARE #records varchar(400)
SELECT #records = 'a,b,c,d'
select LEN(#records) as 'Before removing Commas' , LEN(#records) - LEN(REPLACE(#records, ',', '')) 'After Removing Commans'
The following should do the trick for both single character and multiple character searches:
CREATE FUNCTION dbo.CountOccurrences
(
#SearchString VARCHAR(1000),
#SearchFor VARCHAR(1000)
)
RETURNS TABLE
AS
RETURN (
SELECT COUNT(*) AS Occurrences
FROM (
SELECT ROW_NUMBER() OVER (ORDER BY O.object_id) AS n
FROM sys.objects AS O
) AS N
JOIN (
VALUES (#SearchString)
) AS S (SearchString)
ON
SUBSTRING(S.SearchString, N.n, LEN(#SearchFor)) = #SearchFor
);
GO
---------------------------------------------------------------------------------------
-- Test the function for single and multiple character searches
---------------------------------------------------------------------------------------
DECLARE #SearchForComma VARCHAR(10) = ',',
#SearchForCharacters VARCHAR(10) = 'de';
DECLARE #TestTable TABLE
(
TestData VARCHAR(30) NOT NULL
);
INSERT INTO #TestTable
(
TestData
)
VALUES
('a,b,c,de,de ,d e'),
('abc,de,hijk,,'),
(',,a,b,cde,,');
SELECT TT.TestData,
CO.Occurrences AS CommaOccurrences,
CO2.Occurrences AS CharacterOccurrences
FROM #TestTable AS TT
OUTER APPLY dbo.CountOccurrences(TT.TestData, #SearchForComma) AS CO
OUTER APPLY dbo.CountOccurrences(TT.TestData, #SearchForCharacters) AS CO2;
The function can be simplified a bit using a table of numbers (dbo.Nums):
RETURN (
SELECT COUNT(*) AS Occurrences
FROM dbo.Nums AS N
JOIN (
VALUES (#SearchString)
) AS S (SearchString)
ON
SUBSTRING(S.SearchString, N.n, LEN(#SearchFor)) = #SearchFor
);
I finally write this function that should cover all the possible situations, adding a char prefix and suffix to the input. this char is evaluated to be different to any of the char conteined in the search parameter, so it can't affect the result.
CREATE FUNCTION [dbo].[CountOccurrency]
(
#Input nvarchar(max),
#Search nvarchar(max)
)
RETURNS int AS
BEGIN
declare #SearhLength as int = len('-' + #Search + '-') -2;
declare #conteinerIndex as int = 255;
declare #conteiner as char(1) = char(#conteinerIndex);
WHILE ((CHARINDEX(#conteiner, #Search)>0) and (#conteinerIndex>0))
BEGIN
set #conteinerIndex = #conteinerIndex-1;
set #conteiner = char(#conteinerIndex);
END;
set #Input = #conteiner + #Input + #conteiner
RETURN (len(#Input) - len(replace(#Input, #Search, ''))) / #SearhLength
END
usage
select dbo.CountOccurrency('a,b,c,d ,', ',')
Declare #MainStr nvarchar(200)
Declare #SubStr nvarchar(10)
Set #MainStr = 'nikhildfdfdfuzxsznikhilweszxnikhil'
Set #SubStr = 'nikhil'
Select (Len(#MainStr) - Len(REPLACE(#MainStr,#SubStr,'')))/Len(#SubStr)
this T-SQL code finds and prints all occurrences of pattern #p in sentence #s. you can do any processing on the sentence afterward.
declare #old_hit int = 0
declare #hit int = 0
declare #i int = 0
declare #s varchar(max)='alibcalirezaalivisualization'
declare #p varchar(max)='ali'
while #i<len(#s)
begin
set #hit=charindex(#p,#s,#i)
if #hit>#old_hit
begin
set #old_hit =#hit
set #i=#hit+1
print #hit
end
else
break
end
the result is:
1
6
13
20
I ended up using a CTE table for this,
CREATE TABLE #test (
[id] int,
[field] nvarchar(500)
)
INSERT INTO #test ([id], [field])
VALUES (1, 'this is a test string http://url, and https://google.com'),
(2, 'another string, hello world http://example.com'),
(3, 'a string with no url')
SELECT *
FROM #test
;WITH URL_count_cte ([id], [url_index], [field])
AS
(
SELECT [id], CHARINDEX('http', [field], 0)+1 AS [url_index], [field]
FROM #test AS [t]
WHERE CHARINDEX('http', [field], 0) != 0
UNION ALL
SELECT [id], CHARINDEX('http', [field], [url_index])+1 AS [url_index], [field]
FROM URL_count_cte
WHERE CHARINDEX('http', [field], [url_index]) > 0
)
-- total urls
SELECT COUNT(1)
FROM URL_count_cte
-- urls per row
SELECT [id], COUNT(1) AS [url_count]
FROM URL_count_cte
GROUP BY [id]
Using this function, you can get the number of repetitions of words in a text.
/****** Object: UserDefinedFunction [dbo].[fn_getCountKeywords] Script Date: 22/11/2021 17:52:00 ******/
DROP FUNCTION IF EXISTS [dbo].[fn_getCountKeywords]
GO
/****** Object: UserDefinedFunction [dbo].[fn_getCountKeywords] Script Date: 2211/2021 17:52:00 ******/
SET ANSI_NULLS OFF
GO
SET QUOTED_IDENTIFIER ON
GO
-- =============================================
-- Author: m_Khezrian
-- Create date: 2021/11/22-17:52
-- Description: Return Count Keywords In Input Text
-- =============================================
Create OR Alter Function [dbo].[fn_getCountKeywords]
(#Text nvarchar(max)
,#Keywords nvarchar(max)
)
RETURNS #Result TABLE
(
[ID] int Not Null IDENTITY PRIMARY KEY
,[Keyword] nvarchar(max) Not Null
,[Cnt] int Not Null Default(0)
)
/*With ENCRYPTION*/ As
Begin
Declare #Key nvarchar(max);
Declare #Cnt int;
Declare #I int;
Set #I = 0 ;
--Set #Text = QUOTENAME(#Text);
Insert Into #Result
([Keyword])
Select Trim([value])
From String_Split(#Keywords,N',')
Group By [value]
Order By Len([value]) Desc;
Declare CntKey_Cursor Insensitive Cursor For
Select [Keyword]
From #Result
Order By [ID];
Open CntKey_Cursor;
Fetch Next From CntKey_Cursor Into #Key;
While (##Fetch_STATUS = 0) Begin
Set #Cnt = 0;
While (PatIndex(N'%'+#Key+'%',#Text) > 0) Begin
Set #Cnt += 1;
Set #I += 1 ;
Set #Text = Stuff(#Text,PatIndex(N'%'+#Key+'%',#Text),len(#Key),N'{'+Convert(nvarchar,#I)+'}');
--Set #Text = Replace(#Text,#Key,N'{'+Convert(nvarchar,#I)+'}');
End--While
Update #Result
Set [Cnt] = #Cnt
Where ([Keyword] = #Key);
Fetch Next From CntKey_Cursor Into #Key;
End--While
Close CntKey_Cursor;
Deallocate CntKey_Cursor;
Return
End
GO
--Test
Select *
From dbo.fn_getCountKeywords(
N'<U+0001F4E3> MARKET IMPACT Euro area Euro CPIarea annual inflation up to 3.0% MaCPIRKET forex'
,N'CPI ,core,MaRKET , Euro area'
)
Go
Reference https://learn.microsoft.com/en-us/sql/t-sql/functions/string-split-transact-sql?view=sql-server-ver15
Example:
SELECT s.*
,s.[Number1] - (SELECT COUNT(Value)
FROM string_split(s.[StringColumn],',')
WHERE RTRIM(VALUE) <> '')
FROM TableName AS s
Applies to: SQL Server 2016 (13.x) and later
You can use the following stored procedure to fetch , values.
IF EXISTS (SELECT * FROM sys.objects
WHERE object_id = OBJECT_ID(N'[dbo].[sp_parsedata]') AND type in (N'P', N'PC'))
DROP PROCEDURE [dbo].[sp_parsedata]
GO
create procedure sp_parsedata
(#cid integer,#st varchar(1000))
as
declare #coid integer
declare #c integer
declare #c1 integer
select #c1=len(#st) - len(replace(#st, ',', ''))
set #c=0
delete from table1 where complainid=#cid;
while (#c<=#c1)
begin
if (#c<#c1)
begin
select #coid=cast(replace(left(#st,CHARINDEX(',',#st,1)),',','') as integer)
select #st=SUBSTRING(#st,CHARINDEX(',',#st,1)+1,LEN(#st))
end
else
begin
select #coid=cast(#st as integer)
end
insert into table1(complainid,courtid) values(#cid,#coid)
set #c=#c+1
end
The Replace/Len test is cute, but probably very inefficient (especially in terms of memory).
A simple function with a loop will do the job.
CREATE FUNCTION [dbo].[fn_Occurences]
(
#pattern varchar(255),
#expression varchar(max)
)
RETURNS int
AS
BEGIN
DECLARE #Result int = 0;
DECLARE #index BigInt = 0
DECLARE #patLen int = len(#pattern)
SET #index = CHARINDEX(#pattern, #expression, #index)
While #index > 0
BEGIN
SET #Result = #Result + 1;
SET #index = CHARINDEX(#pattern, #expression, #index + #patLen)
END
RETURN #Result
END
Perhaps you should not store data that way. It is a bad practice to ever store a comma delimited list in a field. IT is very inefficient for querying. This should be a related table.

Resources