Convert T-SQL function to PL/SQL [duplicate] - sql-server

This question already has answers here:
Closed 11 years ago.
Possible Duplicate:
How to best split csv strings in oracle 9i
can you help me convert following T-SQL funcntion into Oracle. The function converts a string like
service|nvretail;language|de;yyyy|2011;
to a table.
The main problem I have is the usage of the temp table. I could not find any equivalent to it in Oracle.
CREATE FUNCTION [dbo].[TF_ConvertPara]
(
#parastringNVARCHAR(max)
)
RETURNS #para TABLE
(
[Key] varchar(max),
[Value] varchar(max)
)
begin
DECLARE #NextString NVARCHAR(40)
DECLARE #Pos1 INT
DECLARE #Pos2 INT
DECLARE #NextPos INT
DECLARE #Delimiter1 NCHAR=';'
DECLARE #Delimiter2 NCHAR='|'
if substring(#paraString, len(#paraString) - 1, 1) <> #Delimiter1
SET #paraString = #paraString + #Delimiter1
SET #Pos1 = charindex(#Delimiter1, #paraString)
WHILE (#pos1 <> 0)
BEGIN
SET #NextString = substring(#paraString, 1, #Pos1 - 1)
SET #paraString = substring(#paraString, #pos1 + 1, len(#paraString))
SET #pos1 = charindex(#Delimiter1, #paraString)
SET #Pos2 = charindex(#Delimiter2, #NextString)
if (#Pos2 > 0)
begin
insert into #para
values
(substring(#NextString, 1, #Pos2 - 1),
substring(#NextString, #Pos2 + 1, len(#NextString)))
end
END
return;
end
Thank you in advance.

If you are looking to return the tokens of your string as rows then you could do it all in a single SQL query:
WITH t AS (SELECT 'service|nvretail;language|de;yyyy|2011;' as val FROM dual)
SELECT row_output
FROM t,
XMLTABLE( '/root/e/text()'
PASSING xmltype( '<root><e>' ||
REGEXP_REPLACE (t.val, '(;|\|)', '</e><e>' ) ||
'</e></root>' )
COLUMNS row_output VARCHAR2( 4000 ) PATH '/');
Returns:
service
nvretail
language
de
yyyy
2011
If you want these rows in a table then wrap the SQL in a simple function to either return the in-memory table or insert the records into a DB table, whichever you need.
Here is an example of a function returning a table (collection) of VARCHAR2 records:
CREATE OR REPLACE
FUNCTION str_to_table (
p_input_str IN VARCHAR2
)
RETURN DBMS_SQL.VARCHAR2_TABLE
IS
CURSOR str_to_rows (
cp_str IN VARCHAR2
)
IS
SELECT row_output
FROM (SELECT cp_str as val FROM dual) t,
XMLTABLE( '/root/e/text()'
PASSING xmltype( '<root><e>' ||
REGEXP_REPLACE(t.val, '(;|\|)', '</e><e>') ||
'</e></root>' )
COLUMNS row_output VARCHAR2( 4000 ) PATH '/');
varchar_tab DBMS_SQL.VARCHAR2_TABLE;
BEGIN
OPEN str_to_rows(p_input_str);
FETCH str_to_rows BULK COLLECT INTO varchar_tab;
CLOSE str_to_rows;
--
RETURN varchar_tab;
EXCEPTION
WHEN others
THEN
IF str_to_rows%ISOPEN
THEN
CLOSE str_to_rows;
END IF;
RAISE;
END str_to_table;
It's untested so there may be a couple of syntax errors but it should be prety close.
To return a ref cursor:
CREATE OR REPLACE
FUNCTION str_to_table (
p_input_str IN VARCHAR2
)
RETURN sys_refcursor
IS
cur SYS_REFCURSOR;
BEGIN
OPEN cur
FOR SELECT row_output
FROM (SELECT p_input_str as val FROM dual) t,
XMLTABLE( '/root/e/text()'
PASSING xmltype( '<root><e>' ||
REGEXP_REPLACE (t.val, '(;|\|)', '</e><e>' ) ||
'</e></root>' )
COLUMNS row_output VARCHAR2( 4000 ) PATH '/');
--
RETURN cur;
END str_to_table;
Hope it helps...

To return the key value pairs, How about this (for Oracle 10g and above):
CREATE OR REPLACE FUNCTION csv_to_rows(mycsv IN VARCHAR2)
RETURN sys_refcursor
AS
c SYS_REFCURSOR;
BEGIN
OPEN c FOR
SELECT SUBSTR(element,1,INSTR(element,'|')-1) as key ,
SUBSTR(element,INSTR(element,'|')+1,99999) as val
FROM (
SELECT REGEXP_SUBSTR(mycsv,'[^;]+',1,LEVEL) element
FROM dual
CONNECT BY LEVEL < LENGTH(REGEXP_REPLACE(mycsv,'[^;]+')) + 1
);
RETURN c;
END;
and to see your results:
DECLARE
c SYS_REFCURSOR;
akey VARCHAR2(100);
aval VARCHAR2(100);
BEGIN
c := csv_to_rows('service|nvretail;language|de;yyyy|2011');
LOOP
FETCH c into akey,aval;
EXIT WHEN c%NOTFOUND;
dbms_output.put_line('Key : '||akey || ' Value : '||aval);
END LOOP;
END;
this should give you
Key : service Value : nvretail
Key : language Value : de
Key : yyyy Value : 2011

Related

Foreach update with parameter separated with a delimiter in SQL Server stored procedure

UPDATE SampleTable
SET Schemaname = #SchemaName,
SchemaCode = #SchemaCode,
ForeignKeyColumn = #ForeignKeyColumn,
IsChildSchema = #IsChildSchema,
ModifiedBy = #ModifiedBy,
ModifiedDate = #ModifiedDate
WHERE
DataSchemaID = #DataSchemaId
My #ForeignKeyColumn parameter is
2233^SITE_CLM_NUMBER,2236^SITE_ID_N,
Can anyone help me in updating ForeignKeyColumn='SITE_CLM_NUMBER' where DataSchemaID=2233 and ForeignKeyColumn='SITE_ID_N' where DataSchemaID=2236
It's easy to pass multiple parameter values to a query, using a Table Valued Parameter. These are available in all versions of SQL Server since 2008.
First, you need to create a Table type with the fields you want:
CREATE TYPE dbo.KeyValueType AS TABLE
( Key int, Value nvarchar(50) )
This allows you to specify a parameter of type KeyValueType with the Key/Value combinations you want, eg #updatedColumns.
You can join the target table with the TVP to update rows with matching DataSchemaID values:
Create Procedure UpdateSchemas(...., #updatedColumns dbo.KeyValueType)
UPDATE SampleTable
SET
Schemaname=#SchemaName
,SchemaCode=#SchemaCode
,ForeignKeyColumn=t.Value
,IsChildSchema=#IsChildSchema
,ModifiedBy=#ModifiedBy
,ModifiedDate=#ModifiedDate
FROM SampleTable
INNER JOIN #updatedColumns t
ON t.ID=DataSchemaID
You can add an SplitString function, like this one :
How to Split String by Character into Separate Columns in SQL Server
CREATE FUNCTION [dbo].[Split]
(
#String varchar(max)
,#Delimiter char
)
RETURNS #Results table
(
Ordinal int
,StringValue varchar(max)
)
as
begin
set #String = isnull(#String,'')
set #Delimiter = isnull(#Delimiter,'')
declare
#TempString varchar(max) = #String
,#Ordinal int = 0
,#CharIndex int = 0
set #CharIndex = charindex(#Delimiter, #TempString)
while #CharIndex != 0 begin
set #Ordinal += 1
insert #Results values
(
#Ordinal
,substring(#TempString, 0, #CharIndex)
)
set #TempString = substring(#TempString, #CharIndex + 1, len(#TempString) - #CharIndex)
set #CharIndex = charindex(#Delimiter, #TempString)
end
if #TempString != '' begin
set #Ordinal += 1
insert #Results values
(
#Ordinal
,#TempString
)
end
return
end
Now you can easily extract each part of your input parameter.
declare #I int;
declare #TMP nvarchar(255);
set #I = 1;
set #TMP = null;
set #TMP = (select StringValue from Split(#ForeignKeyCoumn, ',') where Ordinal = 1);
while #TMP <> null
begin
set #ForeignKeyColumn = (select StringValue from Split(#TMP, '^') where Ordinal = 1);
set #DataSchemaID = (select StringValue from Split(#TMP, '^') where Ordinal = 2);
-- Update here your table with #ForeignKeyColumn and #DataSchemaID values
set #I = #I + 1;
set #TMP = null;
set #TMP = (select StringValue from Split(#ForeignKeyCoumn, ',') where Ordinal = #I);
end
PS: If your are using SQL Server 2016 it already includes an SplitString function, so you won't need to add your own. https://learn.microsoft.com/en-us/sql/t-sql/functions/string-split-transact-sql

Search for a common prefix between two strings

I have a Stored Procedure:
ALTER PROCEDURE [dbo].[traxs_Paybook_Data_Validate]
#session_id varchar(30)
#paybook_start_number varchar(30)
#paybook_end_number varchar(30)
AS
UPDATE traxs_temp..__PaybookImport SET
/* BEGIN CHANGE */
prefix = null,
start_number = CAST(#paybook_start_number AS int),
end_number = CAST(#paybook_end_number AS int)
/* END CHANGE */
WHERE
session_id = #session_id
Values were like:
#paybook_start_number = 100
#paybook_end_number = 200
Now paybook numbers can have a prefix, i.e:
#paybook_start_number = ABC100
#paybook_end_number = ABC200
Prefix is not always the same, neither is its length. I need to find the prefix if one, store it into prefix and remove it from paybook numbers before casting them.
Thanks
Try this:
DECLARE #z VARCHAR(32) = 'ukasd10';
SELECT LEFT(#z, PATINDEX('%[0-9]%', #z) - 1) AS Prefix,REPLACE(SUBSTRING(#z, PATINDEX('%[0-9]%', #z), LEN(#z)), ',', '') AS Digits
and likewise use this logic to update the column Prefix....
Thanks
You need a Function to extract Number/Numeric value from your input string and a function to extract Alphabets from the Input string.
Function To Extract Numbers
CREATE FUNCTION dbo.fn_Extract_Numbers
(
#string NVARCHAR(100)
)
RETURNS INT
AS
BEGIN
DECLARE #int_Value INT;
SELECT #int_Value = LEFT(subsrt, PATINDEX('%[^0-9]%', subsrt + 't') - 1)
FROM (
SELECT subsrt = SUBSTRING(#string, pos, LEN(#string))
FROM (
SELECT #string AS string , PATINDEX('%[0-9]%', #string) AS Pos
) d
) t
RETURN #int_Value;
END
Function To Extract Alphabets
CREATE FUNCTION dbo.fn_Extract_Alphabets
(
#string NVARCHAR(100)
)
RETURNS NVARCHAR(100)
AS
BEGIN
DECLARE #Alpha_Value NVARCHAR(100);
SELECT #Alpha_Value = LEFT(subsrt, PATINDEX('%[^a-z]%', subsrt + 't') - 1)
FROM (
SELECT subsrt = SUBSTRING(#string, pos, LEN(#string))
FROM (
SELECT #string AS string , PATINDEX('%[a-z]%', #string) AS Pos
) d
) t
RETURN #Alpha_Value;
END
Now use these functions inside your stored procedure to extract the Alphabet/Prefix bit and the Number bit and store them in the target columns.
Something like....
ALTER PROCEDURE [dbo].[traxs_Paybook_Data_Validate]
#session_id varchar(30)
#paybook_start_number varchar(30)
#paybook_end_number varchar(30)
AS
DECLARE #Start_Num_Prefix VARCHAR(100);
DECLARE #End_Num_Prefix VARCHAR(100);
DECLARE #Start_Num_Numbers INT;
DECLARE #End_Num_Numbers INT;
SELECT #Start_Num_Prefix = dbo.fn_Extract_Alphabets(#paybook_start_number)
SELECT #End_Num_Prefix = dbo.fn_Extract_Alphabets(#paybook_end_number)
SELECT #Start_Num_Numbers = dbo.fn_Extract_Numbers(#paybook_start_number)
SELECT #End_Num_Numbers = dbo.fn_Extract_Numbers(#paybook_end_number)
..... rest of your procedure and so on....

T-SQL: How can I compare two variables of type XML when length > VarChar(MAX)?

Using only SQL Server 2008 R2 (this is going to be in a stored proc), how can I determine if two variables of type XML are equivalent?
Here is what I want to do:
DECLARE #XmlA XML
DECLARE #XmlB XML
SET #XmlA = '[Really long Xml value]'
SET #XmlB = '[Really long Xml value]'
IF #XmlA = #XmlB
SELECT 'Matching Xml!'
But as you probably know, it returns:
Msg 305, Level 16, State 1, Line 7 The XML data type cannot be
compared or sorted, except when using the IS NULL operator.
I can convert to VarChar(MAX) and compare, but that only compares the first 2MB. Is there another way?
Check this SQL function:
CREATE FUNCTION [dbo].[CompareXml]
(
#xml1 XML,
#xml2 XML
)
RETURNS INT
AS
BEGIN
DECLARE #ret INT
SELECT #ret = 0
-- -------------------------------------------------------------
-- If one of the arguments is NULL then we assume that they are
-- not equal.
-- -------------------------------------------------------------
IF #xml1 IS NULL OR #xml2 IS NULL
BEGIN
RETURN 1
END
-- -------------------------------------------------------------
-- Match the name of the elements
-- -------------------------------------------------------------
IF (SELECT #xml1.value('(local-name((/*)[1]))','VARCHAR(MAX)'))
<>
(SELECT #xml2.value('(local-name((/*)[1]))','VARCHAR(MAX)'))
BEGIN
RETURN 1
END
---------------------------------------------------------------
--Match the value of the elements
---------------------------------------------------------------
IF((#xml1.query('count(/*)').value('.','INT') = 1) AND (#xml2.query('count(/*)').value('.','INT') = 1))
BEGIN
DECLARE #elValue1 VARCHAR(MAX), #elValue2 VARCHAR(MAX)
SELECT
#elValue1 = #xml1.value('((/*)[1])','VARCHAR(MAX)'),
#elValue2 = #xml2.value('((/*)[1])','VARCHAR(MAX)')
IF #elValue1 <> #elValue2
BEGIN
RETURN 1
END
END
-- -------------------------------------------------------------
-- Match the number of attributes
-- -------------------------------------------------------------
DECLARE #attCnt1 INT, #attCnt2 INT
SELECT
#attCnt1 = #xml1.query('count(/*/#*)').value('.','INT'),
#attCnt2 = #xml2.query('count(/*/#*)').value('.','INT')
IF #attCnt1 <> #attCnt2 BEGIN
RETURN 1
END
-- -------------------------------------------------------------
-- Match the attributes of attributes
-- Here we need to run a loop over each attribute in the
-- first XML element and see if the same attribut exists
-- in the second element. If the attribute exists, we
-- need to check if the value is the same.
-- -------------------------------------------------------------
DECLARE #cnt INT, #cnt2 INT
DECLARE #attName VARCHAR(MAX)
DECLARE #attValue VARCHAR(MAX)
SELECT #cnt = 1
WHILE #cnt <= #attCnt1
BEGIN
SELECT #attName = NULL, #attValue = NULL
SELECT
#attName = #xml1.value(
'local-name((/*/#*[sql:variable("#cnt")])[1])',
'varchar(MAX)'),
#attValue = #xml1.value(
'(/*/#*[sql:variable("#cnt")])[1]',
'varchar(MAX)')
-- check if the attribute exists in the other XML document
IF #xml2.exist(
'(/*/#*[local-name()=sql:variable("#attName")])[1]'
) = 0
BEGIN
RETURN 1
END
IF #xml2.value(
'(/*/#*[local-name()=sql:variable("#attName")])[1]',
'varchar(MAX)')
<>
#attValue
BEGIN
RETURN 1
END
SELECT #cnt = #cnt + 1
END
-- -------------------------------------------------------------
-- Match the number of child elements
-- -------------------------------------------------------------
DECLARE #elCnt1 INT, #elCnt2 INT
SELECT
#elCnt1 = #xml1.query('count(/*/*)').value('.','INT'),
#elCnt2 = #xml2.query('count(/*/*)').value('.','INT')
IF #elCnt1 <> #elCnt2
BEGIN
RETURN 1
END
-- -------------------------------------------------------------
-- Start recursion for each child element
-- -------------------------------------------------------------
SELECT #cnt = 1
SELECT #cnt2 = 1
DECLARE #x1 XML, #x2 XML
DECLARE #noMatch INT
WHILE #cnt <= #elCnt1
BEGIN
SELECT #x1 = #xml1.query('/*/*[sql:variable("#cnt")]')
--RETURN CONVERT(VARCHAR(MAX),#x1)
WHILE #cnt2 <= #elCnt2
BEGIN
SELECT #x2 = #xml2.query('/*/*[sql:variable("#cnt2")]')
SELECT #noMatch = dbo.CompareXml( #x1, #x2 )
IF #noMatch = 0 BREAK
SELECT #cnt2 = #cnt2 + 1
END
SELECT #cnt2 = 1
IF #noMatch = 1
BEGIN
RETURN 1
END
SELECT #cnt = #cnt + 1
END
RETURN #ret
END
Here is the Source
The function fails to compare XML fragments e.g. when there is not a single root element, like:
SELECT dbo.CompareXml('<data/>', '<data/><data234/>')
In order to fix this, you must wrap your XMLs in root elements, when they are passed to the function or edit the function to do this. For, example:
SELECT dbo.CompareXml('<r><data/></r>', '<r><data/><data234/></r>')
There are many different ways of comparing two XML documents, and a lot depends on what kind of differences you want to tolerate: you definitely need to tolerate differences in encoding, attribute order, insignificant whitespace, numeric character references, and use of attribute delimiters, and you should probably also tolerate differences in use of comments, namespace prefixes, and CDATA. So comparing two XML documents as strings is definitely not a good idea - unless you invoke XML canonicalization first.
For many purposes the XQuery deep-equals() function does the right thing (and is more-or-less equivalent to comparing the canonical forms of the two XML documents). I don't know enough about Microsoft's SQL Server implementation of XQuery to tell you how to invoke this from the SQL level.
You may cast fields to varbinary(max), hash them and compare hashes. But you definitely miss if XMLs are equivalent but not identical
To calculate hash you may use either CLR function:
using System;
using System.Data.SqlTypes;
using System.IO;
namespace ClrHelpers
{
public partial class UserDefinedFunctions {
[Microsoft.SqlServer.Server.SqlFunction]
public static Guid HashMD5(SqlBytes data) {
System.Security.Cryptography.MD5CryptoServiceProvider md5 = new System.Security.Cryptography.MD5CryptoServiceProvider();
md5.Initialize();
int len = 0;
byte[] b = new byte[8192];
Stream s = data.Stream;
do {
len = s.Read(b, 0, 8192);
md5.TransformBlock(b, 0, len, b, 0);
} while(len > 0);
md5.TransformFinalBlock(b, 0, 0);
Guid g = new Guid(md5.Hash);
return g;
}
};
}
Or sql function:
CREATE FUNCTION dbo.GetMyLongHash(#data VARBINARY(MAX))
RETURNS VARBINARY(MAX)
WITH RETURNS NULL ON NULL INPUT
AS
BEGIN
DECLARE #res VARBINARY(MAX) = 0x
DECLARE #position INT = 1, #len INT = DATALENGTH(#data)
WHILE 1 = 1
BEGIN
SET #res = #res + HASHBYTES('MD5', SUBSTRING(#data, #position, 8000))
SET #position = #position+8000
IF #Position > #len
BREAK
END
WHILE DATALENGTH(#res) > 16 SET #res= dbo.GetMyLongHash(#res)
RETURN #res
END
If you can use SQL CLR, I suggest to write a function using XNode.DeepEquals Method:
var xmlTree1 = new XElement("Root",
new XAttribute("Att1", 1),
new XAttribute("Att2", 2),
new XElement("Child1", 1),
new XElement("Child2", "some content")
);
var xmlTree2 = new XElement("Root",
new XAttribute("Att1", 1),
new XAttribute("Att2", 2),
new XElement("Child1", 1),
new XElement("Child2", "some content")
);
Console.WriteLine(XNode.DeepEquals(xmlTree1, xmlTree2));
If you cannot, you can write your own function (see SQL FIDDLE EXAMPLE):
CREATE function [dbo].[udf_XML_Is_Equal]
(
#Data1 xml,
#Data2 xml
)
returns bit
as
begin
declare
#i bigint, #cnt1 bigint, #cnt2 bigint,
#Sub_Data1 xml, #Sub_Data2 xml,
#Name varchar(max), #Value1 nvarchar(max), #Value2 nvarchar(max)
if #Data1 is null or #Data2 is null
return 1
--=========================================================================================================
-- If more than one root - recurse for each element
--=========================================================================================================
select
#cnt1 = #Data1.query('count(/*)').value('.','int'),
#cnt2 = #Data1.query('count(/*)').value('.','int')
if #cnt1 <> #cnt2
return 0
if #cnt1 > 1
begin
select #i = 1
while #i <= #cnt1
begin
select
#Sub_Data1 = #Data1.query('/*[sql:variable("#i")]'),
#Sub_Data2 = #Data2.query('/*[sql:variable("#i")]')
if dbo.udf_XML_Is_Equal_New(#Sub_Data1, #Sub_Data2) = 0
return 0
select #i = #i + 1
end
return 1
end
--=========================================================================================================
-- Comparing root data
--=========================================================================================================
if #Data1.value('local-name(/*[1])','nvarchar(max)') <> #Data2.value('local-name(/*[1])','nvarchar(max)')
return 0
if #Data1.value('/*[1]', 'nvarchar(max)') <> #Data2.value('/*[1]', 'nvarchar(max)')
return 0
--=========================================================================================================
-- Comparing attributes
--=========================================================================================================
select
#cnt1 = #Data1.query('count(/*[1]/#*)').value('.','int'),
#cnt2 = #Data1.query('count(/*[1]/#*)').value('.','int')
if #cnt1 <> #cnt2
return 0
if exists (
select *
from
(
select
T.C.value('local-name(.)', 'nvarchar(max)') as Name,
T.C.value('.', 'nvarchar(max)') as Value
from #Data1.nodes('/*[1]/#*') as T(C)
) as D1
full outer join
(
select
T.C.value('local-name(.)', 'nvarchar(max)') as Name,
T.C.value('.', 'nvarchar(max)') as Value
from #Data2.nodes('/*[1]/#*') as T(C)
) as D2
on D1.Name = D2.Name
where
not
(
D1.Value is null and D2.Value is null or
D1.Value is not null and D2.Value is not null and D1.Value = D2.Value
)
)
return 0
--=========================================================================================================
-- Recursively running for each child
--=========================================================================================================
select
#cnt1 = #Data1.query('count(/*[1]/*)').value('.','int'),
#cnt2 = #Data2.query('count(/*[1]/*)').value('.','int')
if #cnt1 <> #cnt2
return 0
select #i = 1
while #i <= #cnt1
begin
select
#Sub_Data1 = #Data1.query('/*/*[sql:variable("#i")]'),
#Sub_Data2 = #Data2.query('/*/*[sql:variable("#i")]')
if dbo.udf_XML_Is_Equal(#Sub_Data1, #Sub_Data2) = 0
return 0
select #i = #i + 1
end
return 1
END
I stumbled upon this fairly comprehensive article which goes into more detail of actually comparing the CONTENT of 2 XML entries to determine whether they are the same. It makes sense, as the ordering of attributes in nodes CAN differ, even though their values are exactly the same. I'd recommend you read through it and even implement the function to see if it works for you... I tried it out quickly and it seemed to work for me?

Executing a query on csv data stored in an ntext column

Say that the raw text of CSV exports and an associated timestamps are stored in a database, where one record is equivalent to one export.
Does anyone have a way to execute a query on the CSV file stored in that field without creating a second connection to the database or exporting the data to a file and then reopening it using the csv text driver?
Assume that:
1) you can't write out a physical file onto the server in the solution
2) you can't a second connection to the server w/ OPENROWSET (servers, usernames & passwords change)
3) that it must be a 100% SQL solution - must be able to be run as an SP
4) that you only need to work with one record at time - the solution doesn't need to account for selecting from multiple csv files stored in the DB.
My solution would be to create a UDF that will parse the CSV data into a table variable. Then, in the SP, retrieve the CSV, pass it to the UDF, then run the query against the table variable.
First, create a UDF to return a table from the CSV value (uses CHAR(13) to determine new lines, may need to be altered to work with your data):
CREATE FUNCTION [dbo].[fnParseCSV] (#InputString NVARCHAR(MAX), #Delimiter NCHAR(1) = ',')
RETURNS #tbl TABLE (ID int, Val NVARCHAR(64)) AS
BEGIN
declare #singleLine nvarchar(max)
declare #id int
declare #val varchar(64)
WHILE LEN(#InputString) > 0 BEGIN
IF CHARINDEX(char(13), #InputString) > 0 BEGIN
SELECT #singleLine = SUBSTRING(#InputString, 1, CHARINDEX(char(13), #InputString) - 1)
IF CHARINDEX(#Delimiter, #singleline) > 0 BEGIN
SELECT #id = convert(int, SUBSTRING(#singleline, 1, CHARINDEX(#Delimiter, #singleline) - 1))
SELECT #val = RIGHT(#singleline, LEN(#singleline) - CHARINDEX(#Delimiter, #singleline) )
INSERT INTO #tbl (id, val) values (#id, #val)
END
SELECT #InputString = RIGHT(#InputString, LEN(#InputString) - CHARINDEX(char(13), #InputString) )
END
ELSE
BEGIN
IF CHARINDEX(#Delimiter, #inputString) > 0
BEGIN
SELECT #id = convert(int, SUBSTRING(#inputString, 1, CHARINDEX(#Delimiter, #inputString) - 1))
SELECT #val = RIGHT(#inputString, LEN(#inputString) - CHARINDEX(#Delimiter, #inputString) )
INSERT INTO #tbl (id, val) values (#id, #val)
END
set #inputString = ''
END
END
RETURN
END
Then run the query against that output:
select * from dbo.fnParseCsv('123,val1' + char(13) + '456,val2' + CHAR(13) + '789,val3', ',')
You could set up a series of user-defined functions which could parse through the column. It would likely be slow and wouldn't be robust at all.
As an example though (with no real error checking, etc. and only minimally tested):
IF OBJECT_ID('dbo.Test_CSV_Search') IS NOT NULL
DROP TABLE dbo.Test_CSV_Search
GO
CREATE TABLE dbo.Test_CSV_Search
(
my_id INT IDENTITY NOT NULL,
txt VARCHAR(MAX) NOT NULL,
CONSTRAINT PK_Test_CSV_Search PRIMARY KEY CLUSTERED (my_id)
)
GO
INSERT INTO dbo.Test_CSV_Search (txt) VALUES ('11, 12, 13, 14,15,16
21,22, 23,24, 25,26
31,22,33,34,35,36')
GO
IF OBJECT_ID('dbo.Get_CSV_Row') IS NOT NULL
DROP FUNCTION dbo.Get_CSV_Row
GO
CREATE FUNCTION dbo.Get_CSV_Row
(#my_id INT, #col_num SMALLINT, #search_value VARCHAR(100))
RETURNS #results TABLE (row_num INT, row_txt VARCHAR(MAX))
AS
BEGIN
DECLARE
#csv_txt VARCHAR(MAX),
#full_row VARCHAR(MAX),
#start_pos INT,
#end_pos INT,
#col_txt VARCHAR(100),
#cur_col SMALLINT,
#line_start INT,
#line_end INT,
#row_num INT
SELECT #csv_txt = txt + CHAR(10) FROM dbo.Test_CSV_Search WHERE my_id = #my_id
SELECT
#line_start = 1,
#cur_col = 1,
#start_pos = 1,
#row_num = 1
WHILE (CHARINDEX(CHAR(10), #csv_txt, #line_start) > 0)
BEGIN
SELECT
#line_end = CHARINDEX(CHAR(10), #csv_txt, #line_start),
#end_pos = CHARINDEX(',', #csv_txt, #start_pos)
WHILE (#cur_col < #col_num)
BEGIN
SET #start_pos = #end_pos + 1
SET #end_pos = CHARINDEX(',', #csv_txt, #start_pos)
SET #cur_col = #cur_col + 1
END
IF (RTRIM(LTRIM(SUBSTRING(#csv_txt, #start_pos, #end_pos - #start_pos))) = #search_value)
BEGIN
INSERT INTO #results (row_num, row_txt) VALUES (#row_num, RTRIM(LTRIM(SUBSTRING(#csv_txt, #line_start, #line_end - #line_start))))
END
SELECT
#line_start = #line_end + 1,
#start_pos = #line_end + 1,
#cur_col = 1,
#row_num = #row_num + 1
END
RETURN
END
GO
SELECT * FROM dbo.Get_CSV_Row(1, 1, '11')

How do you count the number of occurrences of a certain substring in a SQL varchar?

I have a column that has values formatted like a,b,c,d. Is there a way to count the number of commas in that value in T-SQL?
The first way that comes to mind is to do it indirectly by replacing the comma with an empty string and comparing the lengths
Declare #string varchar(1000)
Set #string = 'a,b,c,d'
select len(#string) - len(replace(#string, ',', ''))
Quick extension of cmsjr's answer that works for strings with more than one character.
CREATE FUNCTION dbo.CountOccurrencesOfString
(
#searchString nvarchar(max),
#searchTerm nvarchar(max)
)
RETURNS INT
AS
BEGIN
return (LEN(#searchString)-LEN(REPLACE(#searchString,#searchTerm,'')))/LEN(#searchTerm)
END
Usage:
SELECT * FROM MyTable
where dbo.CountOccurrencesOfString(MyColumn, 'MyString') = 1
You can compare the length of the string with one where the commas are removed:
len(value) - len(replace(value,',',''))
The answer by #csmjr has a problem in some instances.
His answer was to do this:
Declare #string varchar(1000)
Set #string = 'a,b,c,d'
select len(#string) - len(replace(#string, ',', ''))
This works in most scenarios, however, try running this:
DECLARE #string VARCHAR(1000)
SET #string = 'a,b,c,d ,'
SELECT LEN(#string) - LEN(REPLACE(#string, ',', ''))
For some reason, REPLACE gets rid of the final comma but ALSO the space just before it (not sure why). This results in a returned value of 5 when you'd expect 4. Here is another way to do this which will work even in this special scenario:
DECLARE #string VARCHAR(1000)
SET #string = 'a,b,c,d ,'
SELECT LEN(REPLACE(#string, ',', '**')) - LEN(#string)
Note that you don't need to use asterisks. Any two-character replacement will do. The idea is that you lengthen the string by one character for each instance of the character you're counting, then subtract the length of the original. It's basically the opposite method of the original answer which doesn't come with the strange trimming side-effect.
Building on #Andrew's solution, you'll get much better performance using a non-procedural table-valued-function and CROSS APPLY:
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
/* Usage:
SELECT t.[YourColumn], c.StringCount
FROM YourDatabase.dbo.YourTable t
CROSS APPLY dbo.CountOccurrencesOfString('your search string', t.[YourColumn]) c
*/
CREATE FUNCTION [dbo].[CountOccurrencesOfString]
(
#searchTerm nvarchar(max),
#searchString nvarchar(max)
)
RETURNS TABLE
AS
RETURN
SELECT (DATALENGTH(#searchString)-DATALENGTH(REPLACE(#searchString,#searchTerm,'')))/NULLIF(DATALENGTH(#searchTerm), 0) AS StringCount
Declare #string varchar(1000)
DECLARE #SearchString varchar(100)
Set #string = 'as as df df as as as'
SET #SearchString = 'as'
select ((len(#string) - len(replace(#string, #SearchString, ''))) -(len(#string) -
len(replace(#string, #SearchString, ''))) % 2) / len(#SearchString)
Accepted answer is correct ,
extending it to use 2 or more character in substring:
Declare #string varchar(1000)
Set #string = 'aa,bb,cc,dd'
Set #substring = 'aa'
select (len(#string) - len(replace(#string, #substring, '')))/len(#substring)
Darrel Lee I think has a pretty good answer. Replace CHARINDEX() with PATINDEX(), and you can do some weak regex searching along a string, too...
Like, say you use this for #pattern:
set #pattern='%[-.|!,'+char(9)+']%'
Why would you maybe want to do something crazy like this?
Say you're loading delimited text strings into a staging table, where the field holding the data is something like a varchar(8000) or nvarchar(max)...
Sometimes it's easier/faster to do ELT (Extract-Load-Transform) with data rather than ETL (Extract-Transform-Load), and one way to do this is to load the delimited records as-is into a staging table, especially if you may want an simpler way to see the exceptional records rather than deal with them as part of an SSIS package...but that's a holy war for a different thread.
If we know there is a limitation on LEN and space, why cant we replace the space first?
Then we know there is no space to confuse LEN.
len(replace(#string, ' ', '-')) - len(replace(replace(#string, ' ', '-'), ',', ''))
Use this code, it is working perfectly.
I have create a sql function that accept two parameters, the first param is the long string that we want to search into it,and it can accept string length up to 1500 character(of course you can extend it or even change it to text datatype).
And the second parameter is the substring that we want to calculate the number of its occurance(its length is up to 200 character, of course you can change it to what your need). and the output is an integer, represent the number of frequency.....enjoy it.
CREATE FUNCTION [dbo].[GetSubstringCount]
(
#InputString nvarchar(1500),
#SubString NVARCHAR(200)
)
RETURNS int
AS
BEGIN
declare #K int , #StrLen int , #Count int , #SubStrLen int
set #SubStrLen = (select len(#SubString))
set #Count = 0
Set #k = 1
set #StrLen =(select len(#InputString))
While #K <= #StrLen
Begin
if ((select substring(#InputString, #K, #SubStrLen)) = #SubString)
begin
if ((select CHARINDEX(#SubString ,#InputString)) > 0)
begin
set #Count = #Count +1
end
end
Set #K=#k+1
end
return #Count
end
In SQL 2017 or higher, you can use this:
declare #hits int = 0
set #hits = (select value from STRING_SPLIT('F609,4DFA,8499',','));
select count(#hits)
Improved version based on top answer and other answers:
Wrapping the string with delimiters ensures that LEN works properly. Making the replace character string one character longer than the match string removes the need for division.
CREATE FUNCTION dbo.MatchCount(#value nvarchar(max), #match nvarchar(max))
RETURNS int
BEGIN
RETURN LEN('[' + REPLACE(#value,#match,REPLICATE('*', LEN('[' + #match + ']') - 1)) + ']') - LEN('['+#value+']')
END
DECLARE #records varchar(400)
SELECT #records = 'a,b,c,d'
select LEN(#records) as 'Before removing Commas' , LEN(#records) - LEN(REPLACE(#records, ',', '')) 'After Removing Commans'
The following should do the trick for both single character and multiple character searches:
CREATE FUNCTION dbo.CountOccurrences
(
#SearchString VARCHAR(1000),
#SearchFor VARCHAR(1000)
)
RETURNS TABLE
AS
RETURN (
SELECT COUNT(*) AS Occurrences
FROM (
SELECT ROW_NUMBER() OVER (ORDER BY O.object_id) AS n
FROM sys.objects AS O
) AS N
JOIN (
VALUES (#SearchString)
) AS S (SearchString)
ON
SUBSTRING(S.SearchString, N.n, LEN(#SearchFor)) = #SearchFor
);
GO
---------------------------------------------------------------------------------------
-- Test the function for single and multiple character searches
---------------------------------------------------------------------------------------
DECLARE #SearchForComma VARCHAR(10) = ',',
#SearchForCharacters VARCHAR(10) = 'de';
DECLARE #TestTable TABLE
(
TestData VARCHAR(30) NOT NULL
);
INSERT INTO #TestTable
(
TestData
)
VALUES
('a,b,c,de,de ,d e'),
('abc,de,hijk,,'),
(',,a,b,cde,,');
SELECT TT.TestData,
CO.Occurrences AS CommaOccurrences,
CO2.Occurrences AS CharacterOccurrences
FROM #TestTable AS TT
OUTER APPLY dbo.CountOccurrences(TT.TestData, #SearchForComma) AS CO
OUTER APPLY dbo.CountOccurrences(TT.TestData, #SearchForCharacters) AS CO2;
The function can be simplified a bit using a table of numbers (dbo.Nums):
RETURN (
SELECT COUNT(*) AS Occurrences
FROM dbo.Nums AS N
JOIN (
VALUES (#SearchString)
) AS S (SearchString)
ON
SUBSTRING(S.SearchString, N.n, LEN(#SearchFor)) = #SearchFor
);
I finally write this function that should cover all the possible situations, adding a char prefix and suffix to the input. this char is evaluated to be different to any of the char conteined in the search parameter, so it can't affect the result.
CREATE FUNCTION [dbo].[CountOccurrency]
(
#Input nvarchar(max),
#Search nvarchar(max)
)
RETURNS int AS
BEGIN
declare #SearhLength as int = len('-' + #Search + '-') -2;
declare #conteinerIndex as int = 255;
declare #conteiner as char(1) = char(#conteinerIndex);
WHILE ((CHARINDEX(#conteiner, #Search)>0) and (#conteinerIndex>0))
BEGIN
set #conteinerIndex = #conteinerIndex-1;
set #conteiner = char(#conteinerIndex);
END;
set #Input = #conteiner + #Input + #conteiner
RETURN (len(#Input) - len(replace(#Input, #Search, ''))) / #SearhLength
END
usage
select dbo.CountOccurrency('a,b,c,d ,', ',')
Declare #MainStr nvarchar(200)
Declare #SubStr nvarchar(10)
Set #MainStr = 'nikhildfdfdfuzxsznikhilweszxnikhil'
Set #SubStr = 'nikhil'
Select (Len(#MainStr) - Len(REPLACE(#MainStr,#SubStr,'')))/Len(#SubStr)
this T-SQL code finds and prints all occurrences of pattern #p in sentence #s. you can do any processing on the sentence afterward.
declare #old_hit int = 0
declare #hit int = 0
declare #i int = 0
declare #s varchar(max)='alibcalirezaalivisualization'
declare #p varchar(max)='ali'
while #i<len(#s)
begin
set #hit=charindex(#p,#s,#i)
if #hit>#old_hit
begin
set #old_hit =#hit
set #i=#hit+1
print #hit
end
else
break
end
the result is:
1
6
13
20
I ended up using a CTE table for this,
CREATE TABLE #test (
[id] int,
[field] nvarchar(500)
)
INSERT INTO #test ([id], [field])
VALUES (1, 'this is a test string http://url, and https://google.com'),
(2, 'another string, hello world http://example.com'),
(3, 'a string with no url')
SELECT *
FROM #test
;WITH URL_count_cte ([id], [url_index], [field])
AS
(
SELECT [id], CHARINDEX('http', [field], 0)+1 AS [url_index], [field]
FROM #test AS [t]
WHERE CHARINDEX('http', [field], 0) != 0
UNION ALL
SELECT [id], CHARINDEX('http', [field], [url_index])+1 AS [url_index], [field]
FROM URL_count_cte
WHERE CHARINDEX('http', [field], [url_index]) > 0
)
-- total urls
SELECT COUNT(1)
FROM URL_count_cte
-- urls per row
SELECT [id], COUNT(1) AS [url_count]
FROM URL_count_cte
GROUP BY [id]
Using this function, you can get the number of repetitions of words in a text.
/****** Object: UserDefinedFunction [dbo].[fn_getCountKeywords] Script Date: 22/11/2021 17:52:00 ******/
DROP FUNCTION IF EXISTS [dbo].[fn_getCountKeywords]
GO
/****** Object: UserDefinedFunction [dbo].[fn_getCountKeywords] Script Date: 2211/2021 17:52:00 ******/
SET ANSI_NULLS OFF
GO
SET QUOTED_IDENTIFIER ON
GO
-- =============================================
-- Author: m_Khezrian
-- Create date: 2021/11/22-17:52
-- Description: Return Count Keywords In Input Text
-- =============================================
Create OR Alter Function [dbo].[fn_getCountKeywords]
(#Text nvarchar(max)
,#Keywords nvarchar(max)
)
RETURNS #Result TABLE
(
[ID] int Not Null IDENTITY PRIMARY KEY
,[Keyword] nvarchar(max) Not Null
,[Cnt] int Not Null Default(0)
)
/*With ENCRYPTION*/ As
Begin
Declare #Key nvarchar(max);
Declare #Cnt int;
Declare #I int;
Set #I = 0 ;
--Set #Text = QUOTENAME(#Text);
Insert Into #Result
([Keyword])
Select Trim([value])
From String_Split(#Keywords,N',')
Group By [value]
Order By Len([value]) Desc;
Declare CntKey_Cursor Insensitive Cursor For
Select [Keyword]
From #Result
Order By [ID];
Open CntKey_Cursor;
Fetch Next From CntKey_Cursor Into #Key;
While (##Fetch_STATUS = 0) Begin
Set #Cnt = 0;
While (PatIndex(N'%'+#Key+'%',#Text) > 0) Begin
Set #Cnt += 1;
Set #I += 1 ;
Set #Text = Stuff(#Text,PatIndex(N'%'+#Key+'%',#Text),len(#Key),N'{'+Convert(nvarchar,#I)+'}');
--Set #Text = Replace(#Text,#Key,N'{'+Convert(nvarchar,#I)+'}');
End--While
Update #Result
Set [Cnt] = #Cnt
Where ([Keyword] = #Key);
Fetch Next From CntKey_Cursor Into #Key;
End--While
Close CntKey_Cursor;
Deallocate CntKey_Cursor;
Return
End
GO
--Test
Select *
From dbo.fn_getCountKeywords(
N'<U+0001F4E3> MARKET IMPACT Euro area Euro CPIarea annual inflation up to 3.0% MaCPIRKET forex'
,N'CPI ,core,MaRKET , Euro area'
)
Go
Reference https://learn.microsoft.com/en-us/sql/t-sql/functions/string-split-transact-sql?view=sql-server-ver15
Example:
SELECT s.*
,s.[Number1] - (SELECT COUNT(Value)
FROM string_split(s.[StringColumn],',')
WHERE RTRIM(VALUE) <> '')
FROM TableName AS s
Applies to: SQL Server 2016 (13.x) and later
You can use the following stored procedure to fetch , values.
IF EXISTS (SELECT * FROM sys.objects
WHERE object_id = OBJECT_ID(N'[dbo].[sp_parsedata]') AND type in (N'P', N'PC'))
DROP PROCEDURE [dbo].[sp_parsedata]
GO
create procedure sp_parsedata
(#cid integer,#st varchar(1000))
as
declare #coid integer
declare #c integer
declare #c1 integer
select #c1=len(#st) - len(replace(#st, ',', ''))
set #c=0
delete from table1 where complainid=#cid;
while (#c<=#c1)
begin
if (#c<#c1)
begin
select #coid=cast(replace(left(#st,CHARINDEX(',',#st,1)),',','') as integer)
select #st=SUBSTRING(#st,CHARINDEX(',',#st,1)+1,LEN(#st))
end
else
begin
select #coid=cast(#st as integer)
end
insert into table1(complainid,courtid) values(#cid,#coid)
set #c=#c+1
end
The Replace/Len test is cute, but probably very inefficient (especially in terms of memory).
A simple function with a loop will do the job.
CREATE FUNCTION [dbo].[fn_Occurences]
(
#pattern varchar(255),
#expression varchar(max)
)
RETURNS int
AS
BEGIN
DECLARE #Result int = 0;
DECLARE #index BigInt = 0
DECLARE #patLen int = len(#pattern)
SET #index = CHARINDEX(#pattern, #expression, #index)
While #index > 0
BEGIN
SET #Result = #Result + 1;
SET #index = CHARINDEX(#pattern, #expression, #index + #patLen)
END
RETURN #Result
END
Perhaps you should not store data that way. It is a bad practice to ever store a comma delimited list in a field. IT is very inefficient for querying. This should be a related table.

Resources