How to get substring in SQL Server? - sql-server

I have the next table, how can I get substring before and after dot(.) special character?
MyTable
------------------------------
Id Description
------------------------------
1 [Hugo].[date].[Subtotal]
2 [Juan].[date].[Subtotal]
3 [7/23/2013].[SubTotal]
4 [7/25/2013].[Total]
I am looking for the following result
MyResultTable
------------------------
MyTableId Description depth
-----------------------
1 [Hugo] 1
1 [date] 2
1 [Subtotal] 3
2 [Juan] 1
2 [date] 2
2 [Subtotal] 3
3 [7/23/2013] 1
3 [SubTotal] 2
4 [7/25/2013] 1
4 [Total] 2
I want to separate the words after a dot(.) and list the words as the following table
How can I solve it?

You will want to split the data based on the .. You can use a recursive CTE to split the data and return the depth:
;with cte (id, DescriptionItem, Description, depth) as
(
select id,
cast(left(Description, charindex('.',Description+'.')-1) as varchar(50)) DescriptionItem,
stuff(Description, 1, charindex('.',Description+'.'), '') Description,
1 as depth
from MyTable
union all
select id,
cast(left(Description, charindex('.',Description+'.')-1) as varchar(50)) DescriptionItem,
stuff(Description, 1, charindex('.',Description+'.'), '') Description,
depth+1
from cte
where Description > ''
)
select id, DescriptionItem, depth
from cte
order by id, depth;
See SQL Fiddle with Demo
Or you can use a UDF function that splits the data:
create FUNCTION [dbo].[Split](#String varchar(MAX), #Delimiter char(1))
returns #temptable TABLE (items varchar(MAX), depth int)
as
begin
declare #idx int
declare #slice varchar(8000)
declare #depth int = 1
select #idx = 1
if len(#String)<1 or #String is null return
while #idx!= 0
begin
set #idx = charindex(#Delimiter,#String)
if #idx!=0
set #slice = left(#String,#idx - 1)
else
set #slice = #String
if(len(#slice)>0)
insert into #temptable(Items, depth) values(#slice, #depth)
set #String = right(#String,len(#String) - #idx)
set #depth = #depth +1
if len(#String) = 0 break
end
return
end;
Then when you call the function, you will use CROSS APPLY similar to this:
select t.id, c.items description,
c.depth
from mytable t
cross apply dbo.split(t.description, '.') c
order by t.id, c.depth;
See SQL Fiddle with Demo

USE tempdb;
GO
IF OBJECT_ID('dbo.csv_split','U') IS NOT NULL DROP TABLE dbo.csv_split;
CREATE TABLE dbo.csv_split
(
Id INT NOT NULL PRIMARY KEY
,Description VARCHAR(100)
)
INSERT INTO dbo.csv_split(Id,Description)
VALUES
(1,'[Hugo].[date].[Subtotal]')
,(2,'[Juan].[date].[Subtotal]')
,(3,'[7/23/2013].[SubTotal]')
,(4,'[7/25/2013].[Total]');
WITH cte_xml AS
(
Select csv.Id
,CONVERT(XML,'<desc>'
+ REPLACE(csv.Description,'.','</desc><desc>')
+ '</desc>') AS xml_desc
From dbo.csv_split csv
)
,cte_shred_xml AS
(
Select t.Id
,xml_desc_nodes.value('(.)','varchar(50)') AS Description
,ROW_NUMBER() OVER(PARTITION BY t.Id ORDER BY t.Id ) AS Depth
From cte_xml t
CROSS APPLY t.xml_desc.nodes('/desc') AS t2(xml_desc_nodes)
)
Select *
From cte_shred_xml

Here is a simple example. I created your table as #test and used both a cursor and a loop within the cursor.
DECLARE #test TABLE ( id INT, NAME VARCHAR(MAX) )
INSERT #test
VALUES ( 1, '[Hugo].[date].[Subtotal]' )
INSERT #test
VALUES ( 2, '[Juan].[date].[Subtotal]' )
INSERT #test
VALUES ( 3, '[7/23/2013].[SubTotal]' )
INSERT #test
VALUES ( 4, '[7/25/2013].[Total]' )
DECLARE #id INT ,
#name VARCHAR(MAX)
DECLARE #values TABLE
(
MyTableId INT ,
Description VARCHAR(MAX) ,
Depth INT
)
DECLARE #v VARCHAR(2000) ,
#i INT ,
#depth INT
DECLARE #MyTableList CURSOR
SET
#MyTableList = CURSOR FOR SELECT id, name FROM #test
OPEN #MyTableList
FETCH NEXT FROM #MyTableList INTO #id, #name
WHILE ##FETCH_STATUS = 0
BEGIN
SET #depth = 1
SET #i = PATINDEX('%.%', #name)
WHILE #i > 0
BEGIN
INSERT #values
VALUES ( #id, SUBSTRING(#name, 1, #i - 1), #depth )
SET #name = SUBSTRING(#name, #i + 1, LEN(#name) - #i)
SET #i = PATINDEX('%.%', #name)
SET #depth = #depth + 1
END
INSERT #values
VALUES ( #id, #name, #depth )
FETCH NEXT FROM #MyTableList INTO #id, #name
END
SELECT MyTableId ,
Description ,
Depth
FROM #values
You output should look like this.
MyTableId Description Depth
1 [Hugo] 1
1 [date] 2
1 [Subtotal] 3
2 [Juan] 1
2 [date] 2
2 [Subtotal] 3
3 [7/23/2013] 1
3 [SubTotal] 2
4 [7/25/2013] 1
4 [Total] 2

Related

Add columns dynamically and Later Parse values in the respective columns

I have an SP which accepts the Inputtable as parameter, My Inputtable is as shown in the code
`create table inputTable ( id int,ItemQty varchar(100))
insert into inputTable(id, ItemQty) values(1,'a,b,c')
insert into inputTable(id, ItemQty) values(2,'x,y')
insert into inputTable(id, ItemQty) values(3,'l,m,n,o,p')
insert into inputTable(id, ItemQty) values(4,'a,b')
insert into inputTable(id, ItemQty) values(5,'m')`
and SP i have written is like below
`ALTER PROCEDURE [dbo].[Column_Dynamics] (#tablename varchar(50))
AS
BEGIN
-----
declare #maxcount as int
set #maxcount='select MAX(len(ITEMQTY) - len(replace(ITEMQTY, '','', '''')) +1) from '+#tablename
exec('select MAX(len(ITEMQTY) - len(replace(ITEMQTY, '','', '''')) +1) from '+#tablename)
print #maxcount
exec #maxcount
print #maxcount
declare #var varchar(100)
IF EXISTS(SELECT * FROM sys.columns WHERE object_id = Object_id(#tablename))
set #var='alter table '+ #tablename +' ADD column QTY1'
exec(#var)
select * from #tablename
select max(len(ItemQty))-max(len(replace(ItemQty, ',', ''))-1) from inputtable
END`
My table is :
step 1 ) I want to add the columns dynamically to inputtable like QTY1,QTY2,QTY3,QTY4,QTY5 because maximum count of ItemQty column is 5, by considering comma as delimiter as shown in figure1
**step 2) ** Parse values in the respective columns(by considering the delimiter comma (,).as shown in figure2
Later SP: I got till here, But Not getting the second step, that is update Parse values in the respective columns.
ALTER PROCEDURE dynamic_tbl (#tablename varchar(50))
AS
BEGIN
DECLARE #ColumnCount int
DECLARE #rowcount TABLE (Value int);
INSERT INTO #rowcount
EXEC('select MAX(len(ITEMQTY) - len(replace(ITEMQTY, '','', '''')) +1) from '+#tablename);
SELECT #ColumnCount = Value FROM #rowcount;
Declare #ColumnName nvarchar(10)='qty_'
Declare #count int =0
IF(#ColumnCount>0)
BEGIN
IF (EXISTS (SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = 'dbo' AND TABLE_NAME = 'dyn_tbl'))
BEGIN
DROP TABLE dyn_tbl
END
select * into dyn_tbl from inputtable
SET #count=#count +1;
WHile(#ColumnCount>=#count)
BEGIN
SET #ColumnName='qty_'+CONVERT(varchar(2),#count)
EXEC ('ALTER TABLE dyn_tbl ADD ['+#ColumnName +'] varchar(20)')
declare #myvar as varchar(max)
set #myvar='update '+#tablename+' set '+#ColumnName +' =itemQty'
--exec dynamic_tbl 'dyn_tbl'
--select * from dyn_tbl
--CAST('<A>'+REPLACE(ITEMQTY, ',', '</A><A>')+'</A>' AS XML)
print #myvar
exec(#myvar)
SET #count=#count +1;
END
END
----
END
Procedure to alter given table dynamically based on column length as you have asked
Alter PROCEDURE [dbo].[Column_Dynamics] (#tablename varchar(50))
AS
BEGIN
drop table ##temp
declare #query1 varchar(max)
exec ( '
create table ##temp (id int identity,columnsl varchar(100))
declare #maxcount as int
set #maxcount = (select MAX(len(ITEMQTY) - len(replace(ITEMQTY, '','', '''')) +1) from '+#tablename+')
declare #count int = 1
while (#count <= #maxcount)
begin
declare #colvar nvarchar(100)= ''QTY''
set #colvar = concat(#colvar,#count)
set #count = #count + 1
insert into ##temp select #colvar
end
')
declare #tempstart int = 1
declare #templast int = (select count(*) from ##temp)
declare #updatecol varchar(100) = ''
while (#tempstart <= #templast)
Begin
set #updatecol = (select columnsl from ##temp where id = #tempstart)
exec ('alter table '+#tablename+' Add '+#updatecol+' varchar(100) ')
set #tempstart = #tempstart + 1
end
End
output for inputTable:
id ItemQty QTY1 QTY2 QTY3 QTY4 QTY5
1 a,b,c NULL NULL NULL NULL NULL
2 x,y NULL NULL NULL NULL NULL
3 l,m,n,o,p NULL NULL NULL NULL NULL
4 a,b NULL NULL NULL NULL NULL
5 m NULL NULL NULL NULL NULL
may not be the best way but works.
edit
Altered above procedure to perform both actions, Please use below procedure
Alter PROCEDURE [dbo].[Column_Dynamics] (#tablename varchar(50))
AS
BEGIN
-- declare #tablename varchar(100) = 'inputTable'
drop table #temp if object_id('temp..#temp') is not null drop table #temp
declare #query1 varchar(max)
create table #temp (id int identity,columnsl varchar(100))
exec ( '
declare #maxcount as int
set #maxcount = (select MAX(len(ITEMQTY) - len(replace(ITEMQTY, '','', '''')) +1) from '+#tablename+')
declare #count int = 1
while (#count <= #maxcount)
begin
declare #colvar nvarchar(100)= ''QTY''
set #colvar = concat(#colvar,#count)
set #count = #count + 1
insert into #temp
select #colvar
end
')
declare #tempstart int = 1
declare #templast int = (select count(*) from #temp)
declare #updatecol varchar(100) = ''
declare #itemqty varchar(100)
while (#tempstart <= #templast)
Begin
set #updatecol = (select columnsl from #temp where id = #tempstart)
exec ('alter table '+#tablename+' Add '+#updatecol+' varchar(100) ')
set #tempstart = #tempstart + 1
end
declare #sysvar table (id int identity,cols varchar(100))
insert into #sysvar select sys.columns.name AS ColumnName FROM sys.columns JOIN sys.tables ON sys.columns.object_id = sys.tables.object_id WHERE sys.tables.name = 'inputTable'
declare #finvar table (id int identity,cols varchar(100))
insert into #finvar select cols from #sysvar where id not in (1,2)
declare #cat int = 1 declare #dog int = (select max(id) from inputTable)
while (#cat <= #dog)
begin
drop table #tab2
if object_id('temp..#tab2') is not null drop table #tab2
create table #tab2 (id int identity,fnvalues varchar(100))
set #itemqty = (select itemqty from inputTable where id = #cat)
insert into #tab2 select item from [dbo].[fnSplit](#itemQty,',')
declare #cn int = 1
declare #max int = (select max(id) from #tab2)
declare #sql nvarchar (1000);
while (#cn <= #max)
begin
declare #upcol varchar(100) = (select fnvalues from #tab2 where id = #cn)
declare #plscol varchar(100) = (select cols from #finvar where id = #cn)
set #sql = N'update '+#tablename+' set ' + #plscol + '= '''+#upcol+''' where id = '''+cast(#cat as varchar(10))+''' ';
select #sql
exec sp_executesql #sql;
set #cn = #cn + 1
end
set #cat = #cat + 1
End
End
output:
id ItemQty QTY1 QTY2 QTY3 QTY4 QTY5
1 a,b,c a b c NULL NULL
2 x,y x y NULL NULL NULL
3 l,m,n,o,p l m n o p
4 a,b a b NULL NULL NULL
5 m m NULL NULL NULL NULL
did not optimize the query but works fine.
However, if you have maximum Qty's are known which are in comma separated format then you could use xml node method to separate them into columns.QTY1...QTY5
SELECT DISTINCT ID, ITEMQTY,
a.value('/A[1]', 'VARCHAR(MAX)') as QTY1,
a.value('/A[2]', 'VARCHAR(MAX)') as QTY2,
a.value('/A[3]', 'VARCHAR(MAX)') as QTY3,
a.value('/A[4]', 'VARCHAR(MAX)') as QTY4,
a.value('/A[5]', 'VARCHAR(MAX)') as QTY5
FROM
(
SELECT ID, ITEMQTY,
CAST('<A>'+REPLACE(ITEMQTY, ',', '</A><A>')+'</A>' AS XML) AS ITEMQTY1
FROM inputTable
) A
CROSS APPLY ITEMQTY1.nodes('/A') AS split(a);
Result :
ID ITEMQTY QTY1 QTY2 QTY3 QTY4 QTY5
1 a,b,c a b c NULL NULL
2 x,y x y NULL NULL NULL
3 l,m,n,o,p l m n o p
4 a,b a b NULL NULL NULL
5 m m NULL NULL NULL NULL
Later, you could replace null by using coalesce() or isnull() function with ''
Use This
First Create a function
CREATE FUNCTION [dbo].[fn_split](
#str VARCHAR(MAX),
#delimiter CHAR(1)
)
RETURNS #returnTable TABLE (idx INT PRIMARY KEY IDENTITY, item VARCHAR(8000))
AS
BEGIN
DECLARE #pos INT
SELECT #str = #str + #delimiter
WHILE LEN(#str) > 0
BEGIN
SELECT #pos = CHARINDEX(#delimiter,#str)
IF #pos = 1
INSERT #returnTable (item)
VALUES (NULL)
ELSE
INSERT #returnTable (item)
VALUES (SUBSTRING(#str, 1, #pos-1))
SELECT #str = SUBSTRING(#str, #pos+1, LEN(#str)-#pos)
END
RETURN
END
GO
and use function like this
Declare #test TABLE (
ID VARCHAR(200),
Data VARCHAR(200)
)
insert into #test
(ID, Data)
Values
(1,'a,b,c')
insert into #test
(ID, Data )
values(2,'x,y')
insert into #test
(ID, Data )
values(3,'l,m,n,o,p')
insert into #test
(ID, Data )
values(4,'a,b')
insert into #test
(ID, Data )
values(5,'m')
select ID,data AS ItemQty,
ISNULL((select item from fn_split(Data,',') where idx in (1)),'') as QTY1 ,
ISNULL((select item from fn_split(Data,',') where idx in (2)),'') as QTY2,
ISNULL((select item from fn_split(Data,',') where idx in (3)),'') as QTY3,
ISNULL((select item from fn_split(Data,',') where idx in (4)),'') as QTY5 ,
ISNULL((select item from fn_split(Data,',') where idx in (5)),'') as QTY5
from #test
Output Same as your Image
instead of using
insert into #test
(ID, Data)
Values
(1,'a,b,c')
you can also assgin it like this
insert into #test
(ID, Data)
Values
(Select Column1, Column2 From YourTable)

T-SQL XML Reader causing Performance Bottleneck [duplicate]

I’m looking to split '1,2,3,4,5,6,7,8,9,10,11,12,13,14,15...' (comma delimited) into a table or table variable.
Does anyone have a function that returns each one in a row?
Try this
DECLARE #xml xml, #str varchar(100), #delimiter varchar(10)
SET #str = '1,2,3,4,5,6,7,8,9,10,11,12,13,14,15'
SET #delimiter = ','
SET #xml = cast(('<X>'+replace(#str, #delimiter, '</X><X>')+'</X>') as xml)
SELECT C.value('.', 'varchar(10)') as value FROM #xml.nodes('X') as X(C)
OR
DECLARE #str varchar(100), #delimiter varchar(10)
SET #str = '1,2,3,4,5,6,7,8,9,10,11,12,13,14,15'
SET #delimiter = ','
;WITH cte AS
(
SELECT 0 a, 1 b
UNION ALL
SELECT b, CHARINDEX(#delimiter, #str, b) + LEN(#delimiter)
FROM CTE
WHERE b > a
)
SELECT SUBSTRING(#str, a,
CASE WHEN b > LEN(#delimiter)
THEN b - a - LEN(#delimiter)
ELSE LEN(#str) - a + 1 END) value
FROM cte WHERE a > 0
Many more ways of doing the same is here How to split comma delimited string?
Here is somewhat old-fashioned solution:
/*
Splits string into parts delimitered with specified character.
*/
CREATE FUNCTION [dbo].[SDF_SplitString]
(
#sString nvarchar(2048),
#cDelimiter nchar(1)
)
RETURNS #tParts TABLE ( part nvarchar(2048) )
AS
BEGIN
if #sString is null return
declare #iStart int,
#iPos int
if substring( #sString, 1, 1 ) = #cDelimiter
begin
set #iStart = 2
insert into #tParts
values( null )
end
else
set #iStart = 1
while 1=1
begin
set #iPos = charindex( #cDelimiter, #sString, #iStart )
if #iPos = 0
set #iPos = len( #sString )+1
if #iPos - #iStart > 0
insert into #tParts
values ( substring( #sString, #iStart, #iPos-#iStart ))
else
insert into #tParts
values( null )
set #iStart = #iPos+1
if #iStart > len( #sString )
break
end
RETURN
END
In SQL Server 2008 you can achieve the same with .NET code. Maybe it would work faster, but definitely this approach is easier to manage.
You've tagged this SQL Server 2008 but future visitors to this question (using SQL Server 2016+) will likely want to know about STRING_SPLIT.
With this new builtin function you can now just use
SELECT TRY_CAST(value AS INT)
FROM STRING_SPLIT ('1,2,3,4,5,6,7,8,9,10,11,12,13,14,15', ',')
Some restrictions of this function and some promising results of performance testing are in this blog post by Aaron Bertrand.
This is most like .NET, for those of you who are familiar with that function:
CREATE FUNCTION dbo.[String.Split]
(
#Text VARCHAR(MAX),
#Delimiter VARCHAR(100),
#Index INT
)
RETURNS VARCHAR(MAX)
AS BEGIN
DECLARE #A TABLE (ID INT IDENTITY, V VARCHAR(MAX));
DECLARE #R VARCHAR(MAX);
WITH CTE AS
(
SELECT 0 A, 1 B
UNION ALL
SELECT B, CONVERT(INT,CHARINDEX(#Delimiter, #Text, B) + LEN(#Delimiter))
FROM CTE
WHERE B > A
)
INSERT #A(V)
SELECT SUBSTRING(#Text,A,CASE WHEN B > LEN(#Delimiter) THEN B-A-LEN(#Delimiter) ELSE LEN(#Text) - A + 1 END) VALUE
FROM CTE WHERE A >0
SELECT #R
= V
FROM #A
WHERE ID = #Index + 1
RETURN #R
END
SELECT dbo.[String.Split]('121,2,3,0',',',1) -- gives '2'
here is the split function that u asked
CREATE FUNCTION [dbo].[split](
#delimited NVARCHAR(MAX),
#delimiter NVARCHAR(100)
) RETURNS #t TABLE (id INT IDENTITY(1,1), val NVARCHAR(MAX))
AS
BEGIN
DECLARE #xml XML
SET #xml = N'<t>' + REPLACE(#delimited,#delimiter,'</t><t>') + '</t>'
INSERT INTO #t(val)
SELECT r.value('.','varchar(MAX)') as item
FROM #xml.nodes('/t') as records(r)
RETURN
END
execute the function like this
select * from dbo.split('1,2,3,4,5,6,7,8,9,10,11,12,13,14,15',',')
DECLARE
#InputString NVARCHAR(MAX) = 'token1,token2,token3,token4,token5'
, #delimiter varchar(10) = ','
DECLARE #xml AS XML = CAST(('<X>'+REPLACE(#InputString,#delimiter ,'</X><X>')+'</X>') AS XML)
SELECT C.value('.', 'varchar(10)') AS value
FROM #xml.nodes('X') as X(C)
Source of this response:
http://sqlhint.com/sqlserver/how-to/best-split-function-tsql-delimited
I am tempted to squeeze in my favourite solution. The resulting table will consist of 2 columns: PosIdx for position of the found integer; and Value in integer.
create function FnSplitToTableInt
(
#param nvarchar(4000)
)
returns table as
return
with Numbers(Number) as
(
select 1
union all
select Number + 1 from Numbers where Number < 4000
),
Found as
(
select
Number as PosIdx,
convert(int, ltrim(rtrim(convert(nvarchar(4000),
substring(#param, Number,
charindex(N',' collate Latin1_General_BIN,
#param + N',', Number) - Number))))) as Value
from
Numbers
where
Number <= len(#param)
and substring(N',' + #param, Number, 1) = N',' collate Latin1_General_BIN
)
select
PosIdx,
case when isnumeric(Value) = 1
then convert(int, Value)
else convert(int, null) end as Value
from
Found
It works by using recursive CTE as the list of positions, from 1 to 100 by default. If you need to work with string longer than 100, simply call this function using 'option (maxrecursion 4000)' like the following:
select * from FnSplitToTableInt
(
'9, 8, 7, 6, 5, 4, 3, 2, 1, 0, ' +
'9, 8, 7, 6, 5, 4, 3, 2, 1, 0, ' +
'9, 8, 7, 6, 5, 4, 3, 2, 1, 0, ' +
'9, 8, 7, 6, 5, 4, 3, 2, 1, 0, ' +
'9, 8, 7, 6, 5, 4, 3, 2, 1, 0'
)
option (maxrecursion 4000)
CREATE FUNCTION Split
(
#delimited nvarchar(max),
#delimiter nvarchar(100)
) RETURNS #t TABLE
(
-- Id column can be commented out, not required for sql splitting string
id int identity(1,1), -- I use this column for numbering splitted parts
val nvarchar(max)
)
AS
BEGIN
declare #xml xml
set #xml = N'<root><r>' + replace(#delimited,#delimiter,'</r><r>') + '</r></root>'
insert into #t(val)
select
r.value('.','varchar(max)') as item
from #xml.nodes('//root/r') as records(r)
RETURN
END
GO
usage
Select * from dbo.Split(N'1,2,3,4,6',',')
This simple CTE will give what's needed:
DECLARE #csv varchar(max) = '1,2,3,4,5,6,7,8,9,10,11,12,13,14,15';
--append comma to the list for CTE to work correctly
SET #csv = #csv + ',';
--remove double commas (empty entries)
SET #csv = replace(#csv, ',,', ',');
WITH CteCsv AS (
SELECT CHARINDEX(',', #csv) idx, SUBSTRING(#csv, 1, CHARINDEX(',', #csv) - 1) [Value]
UNION ALL
SELECT CHARINDEX(',', #csv, idx + 1), SUBSTRING(#csv, idx + 1, CHARINDEX(',', #csv, idx + 1) - idx - 1) FROM CteCsv
WHERE CHARINDEX(',', #csv, idx + 1) > 0
)
SELECT [Value] FROM CteCsv
This is another version which really does not have any restrictions (e.g.: special chars when using xml approach, number of records in CTE approach) and it runs much faster based on a test on 10M+ records with source string average length of 4000. Hope this could help.
Create function [dbo].[udf_split] (
#ListString nvarchar(max),
#Delimiter nvarchar(1000),
#IncludeEmpty bit)
Returns #ListTable TABLE (ID int, ListValue nvarchar(1000))
AS
BEGIN
Declare #CurrentPosition int, #NextPosition int, #Item nvarchar(max), #ID int, #L int
Select #ID = 1,
#L = len(replace(#Delimiter,' ','^')),
#ListString = #ListString + #Delimiter,
#CurrentPosition = 1
Select #NextPosition = Charindex(#Delimiter, #ListString, #CurrentPosition)
While #NextPosition > 0 Begin
Set #Item = LTRIM(RTRIM(SUBSTRING(#ListString, #CurrentPosition, #NextPosition-#CurrentPosition)))
If #IncludeEmpty=1 or LEN(#Item)>0 Begin
Insert Into #ListTable (ID, ListValue) Values (#ID, #Item)
Set #ID = #ID+1
End
Set #CurrentPosition = #NextPosition+#L
Set #NextPosition = Charindex(#Delimiter, #ListString, #CurrentPosition)
End
RETURN
END
/* *Object: UserDefinedFunction [dbo].[Split] Script Date: 10/04/2013 18:18:38* */
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
ALTER FUNCTION [dbo].[Split]
(#List varchar(8000),#SplitOn Nvarchar(5))
RETURNS #RtnValue table
(Id int identity(1,1),Value nvarchar(100))
AS
BEGIN
Set #List = Replace(#List,'''','')
While (Charindex(#SplitOn,#List)>0)
Begin
Insert Into #RtnValue (value)
Select
Value = ltrim(rtrim(Substring(#List,1,Charindex(#SplitOn,#List)-1)))
Set #List = Substring(#List,Charindex(#SplitOn,#List)+len(#SplitOn),len(#List))
End
Insert Into #RtnValue (Value)
Select Value = ltrim(rtrim(#List))
Return
END
go
Select *
From [Clv].[Split] ('1,2,3,3,3,3,',',')
GO
Using tally table here is one split string function(best possible approach) by Jeff Moden
CREATE FUNCTION [dbo].[DelimitedSplit8K]
(#pString VARCHAR(8000), #pDelimiter CHAR(1))
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
--===== "Inline" CTE Driven "Tally Table" produces values from 0 up to 10,000...
-- enough to cover NVARCHAR(4000)
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(#pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(#pString,t.N,1) = #pDelimiter
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(#pDelimiter,#pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(#pString, l.N1, l.L1)
FROM cteLen l
;
Referred from Tally OH! An Improved SQL 8K “CSV Splitter” Function
This blog came with a pretty good solution using XML in T-SQL.
This is the function I came up with based on that blog (change function name and result type cast per need):
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE FUNCTION [dbo].[SplitIntoBigints]
(#List varchar(MAX), #Splitter char)
RETURNS TABLE
AS
RETURN
(
WITH SplittedXML AS(
SELECT CAST('<v>' + REPLACE(#List, #Splitter, '</v><v>') + '</v>' AS XML) AS Splitted
)
SELECT x.v.value('.', 'bigint') AS Value
FROM SplittedXML
CROSS APPLY Splitted.nodes('//v') x(v)
)
GO
CREATE Function [dbo].[CsvToInt] ( #Array varchar(4000))
returns #IntTable table
(IntValue int)
AS
begin
declare #separator char(1)
set #separator = ','
declare #separator_position int
declare #array_value varchar(4000)
set #array = #array + ','
while patindex('%,%' , #array) <> 0
begin
select #separator_position = patindex('%,%' , #array)
select #array_value = left(#array, #separator_position - 1)
Insert #IntTable
Values (Cast(#array_value as int))
select #array = stuff(#array, 1, #separator_position, '')
end
This works great for me https://www.sqlshack.com/the-string-split-function-in-sql-server/
After two hours of resarching this topic this is the simplest solution (without using XML ect.).
You should only remember to use string_split after from.
DROP TABLE IF EXISTS #Countries
GO
DROP TABLE IF EXISTS #CityList
GO
CREATE TABLE #Countries
(Continent VARCHAR(100),
Country VARCHAR(100))
GO
CREATE TABLE #CityList
(Country VARCHAR(100),
City VARCHAR(5000))
GO
INSERT INTO #Countries
VALUES('Europe','France'),('Europe','Germany')
INSERT INTO #CityList
VALUES('France','Paris,Marsilya,Lyon,Lille,Nice'), ('Germany','Berlin,Hamburg,Munih,Frankfurt,Koln')
SELECT
CN.Continent,CN.Country,value
FROM #CityList CL CROSS APPLY string_split(CL.City,',') INNER JOIN
#Countries CN ON CL.Country = CN.Country
DROP TABLE IF EXISTS #Countries
GO
DROP TABLE IF EXISTS #CityList
You write this function in sql server after that problem will be solved.
http://csharpdotnetsol.blogspot.in/2013/12/csv-function-in-sql-server-for-divide.html

SQL SERVER 2008 R2 string split

I want to split a column of strings say "99 crystal springs road" and get only 2 words (99 and crystal) respectively and update 99 to one column and crystal to another column of another table. How can I do it using charindex and substring?
Here is some sample code on how to do it ...
First, create this function:
CREATE FUNCTION [dbo].[GetStringPart]
(#fullString varchar(200), #pos tinyint)
RETURNS VARCHAR(200) -- return_data_type.
AS
BEGIN
IF #pos IS NULL OR #pos <= 0
SET #pos = 1
declare #secondPart varchar(200),#firstPart varchar(200),#output varchar(200)
declare #firstSpace int, #secondSpace int
set #firstSpace = CHARINDEX(' ', #fullString)
IF #firstSpace <= 0
RETURN ''
ELSE IF #pos = 1
BEGIN
SET #output = LTRIM(RTRIM(SUBSTRING(#fullString, 1, #firstSpace)))
END
ELSE
BEGIN
SET #secondSpace = CHARINDEX(' ', #fullString, CHARINDEX(' ', #fullString)+1) - CHARINDEX(' ', #fullString)+1
IF #secondSpace <= 0
SET #secondSpace = LEN(#fullString) - #firstSpace + 1
SET #output = LTRIM(RTRIM(SUBSTRING(#fullString, #firstSpace, #secondSpace)))
END
RETURN #Output
END
GO
Then you can use it like this:
declare #origTable table(name varchar(100))
insert into #origTable (name) values ('99 crystal springs road')
declare #newTable table(col1 varchar(100), col2 varchar(100))
INSERT INTO #newTable (col1, col2)
SELECT dbo.GetStringPart(name, 1), dbo.GetStringPart(name, 2) FROM #origTable
select * from #newTable
Assuming that you are selecting "99" and "crystal" just because they're the first two words...
You can do this in a single step but for ease of reading the solution I've separated it out
declare #sourceAddresses table
(
address varchar(100)
)
declare #split table
(
address varchar(100),
firstDelimiter int,
secondDelimiter int
)
declare #table table
(
part1 varchar(20),
part2 varchar(20)
)
insert into #sourceAddresses (address) values ('99 crystal springs road')
insert into #sourceAddresses (address) values ('100 elsewhere road')
insert into #split (address, firstDelimiter)
select address, charindex(' ', address)
from #sourceAddresses
update #split
set secondDelimiter = charindex(' ', address, (firstDelimiter+1))
where firstDelimiter > -1
insert into #table (part1, part2)
select substring(address, 0, firstDelimiter), substring(address, (firstDelimiter+1), (secondDelimiter-firstDelimiter))
from #split
where firstDelimiter > -1
and secondDelimiter > -1
select * from #table
Maybe something like this:
First create a function that gets part of the strings:
CREATE FUNCTION dbo.GetStringPart (#sep char(1), #s varchar(512),#pos int)
RETURNS VARCHAR(200)
AS
BEGIN
DECLARE #output VARCHAR(200)
;WITH Pieces(pn, start, stop) AS (
SELECT 1, 1, CHARINDEX(#sep, #s)
UNION ALL
SELECT pn + 1, stop + 1, CHARINDEX(#sep, #s, stop + 1)
FROM Pieces
WHERE stop > 0
)
SELECT
#output=SUBSTRING(#s, start, CASE WHEN stop > 0 THEN stop-start ELSE 512 END)
FROM Pieces
WHERE pn=#pos
RETURN #Output
END
GO
Then you can easy do this:
DECLARE #origalTable TABLE(name VARCHAR(100))
INSERT INTO #origalTable
VALUES('99 crystal springs road')
DECLARE #newTable TABLE(col1 VARCHAR(100), col2 VARCHAR(100))
INSERT INTO #newTable (col1, col2)
SELECT dbo.GetStringPart(' ',name, 1), dbo.GetStringPart(' ',name, 2) FROM #origalTable
SELECT * FROM #newTable
DROP FUNCTION dbo.GetStringPart

SQL - Determine the most frequently occuring words within a column

Is there an easy way of determining the most frequently occuring word in a column/field using T-SQL or VBA?
I am working on a fuzzy matching system for two given recordsets and would like to produce a matching string where the most frequently occuring words are removed. As the data is from a customer relations management database terms like "limited", "ltd", "plc" and "CORPORATION" would be removed.
Written for sql-server 2005+
Function to split:
create function f_split
(
#a varchar(max),
#delimiter varchar(20)
)
RETURNS #t TABLE(substr varchar(200))
as
begin
set #a = #a + #delimiter
;with a as
(
select cast(1 as bigint) f1, charindex(#delimiter, #a) f2
where len(#a) > 0
union all
select f2 + (len(#delimiter)) + 1, charindex(#delimiter, #a, f2+1)
from a
where f2 > 0
)
insert #t
select substring(#a, f1, f2 - f1) from a
where f1 < f2
return
end
go
Query:
--testdata
declare #table table(name varchar(50))
insert #table values('bla bla bla ltd')
insert #table values('bla plc ltd')
insert #table values('more text CORPORATION')
declare #matchlist table(name varchar(50), replacement varchar(50))
insert #matchlist values('ltd', 'limited')
insert #matchlist values('plc', 'limited')
insert #matchlist values('CORPORATION', 'limited')
--query
select coalesce(m.replacement, a.substr) name, count(*) count from #table p
cross apply
(
select substr from
dbo.f_split(p.name, ' ')
) a
left join
#matchlist m
on a.substr = m.name
group by coalesce(m.replacement, a.substr)
order by 2 desc
Result:
name count
---- -----
bla 4
limited 4
more 1
text 1
Hope this will be useful to you.
create table sometable
( id integer not null primary key identity
, mYWords text not null
);
insert into sometable (mYWords)
values ('a word that appears maximum number of times in a column')
insert into sometable (mYWords)
values ('Is it possible to get words from text columns in a sql server database')
insert into sometable (mYWords)
values ('This could solve my problem if reffered column contain only single word')
insert into sometable (mYWords)
values ('that''s going to require that you split out every word in the column individually')
insert into sometable (mYWords)
values ('the query will definitely not be easy to write')
insert into sometable (mYWords)
values ('Please read the sticky at the top of the board')
insert into sometable (mYWords)
values ('The physical order of data in a database has no meaning')
GO
CREATE TABLE WordList (
Word varchar(256)
, WordId int IDENTITY(1,1)
, Add_Dt datetime DEFAULT (GetDate()))
GO
CREATE UNIQUE INDEX UnqueWords_PK ON WordList(Word)
GO
CREATE PROC isp_INS_WORD_LIST
AS
BEGIN
SET NOCOUNT ON
DECLARE #Words INT, #Pos INT, #x Int, #str varchar(256)
, #word varchar(256), #start int, #end int, #exitstart int
SELECT #Words = 0, #Pos = 1, #x = -1, #Word = '', #start = 1
DECLARE Cur1 CURSOR FOR SELECT mYWords FROM sometable
OPEN Cur1
FETCH NEXT FROM Cur1 INTO #str
WHILE ##FETCH_STATUS = 0
BEGIN
WHILE (#x <> 0)
BEGIN
SET #x = CHARINDEX(' ', #str, #Pos)
IF #x <> 0
BEGIN
SET #end = #x - #start
SET #word = SUBSTRING(#str,#start,#end)
IF NOT EXISTS (SELECT * FROM WordList WHERE Word = #Word)
INSERT INTO WordList(Word) SELECT #word
-- SELECT #Word, ##ROWCOUNT,##ERROR
-- SELECT #x, #Word, #start, #end, #str
SET #exitstart = #start + #end + 1
SET #Pos = #x + 1
SET #start = #x + 1
SET #Words = #Words + 1
END
IF #x = 0
BEGIN
SET #word = SUBSTRING(#str,#exitstart,LEN(#str)-#exitstart+1)
IF NOT EXISTS (SELECT * FROM WordList WHERE Word = #Word)
INSERT INTO WordList(Word) SELECT #word
-- SELECT #Word, ##ROWCOUNT,##ERROR
-- SELECT #x, #Word, #exitstart, LEN(#str)-#exitstart, #str
END
END
FETCH NEXT FROM Cur1 INTO #str
SELECT #Words = 0, #Pos = 1, #x = -1, #Word = '', #start = 1
END
CLOSE Cur1
DEALLOCATE Cur1
SET NOCOUNT OFF
RETURN #Words
END
GO
EXEC isp_INS_WORD_LIST
GO
SELECT * FROM WordList ORDER BY Word
GO
DROP PROC isp_INS_WORD_LIST
DROP TABLE WordList, sometable
GO

In MS SQL how to split a column into rows with no delimiter

I have data in a table which looks like this (worth noting its not CSV seperated)
It needs to be split in to single chars
Data
abcde
want to convert it to this
Data
a
b
d
c
e
I have looked on the internet but have not found the answer
CREATE FUNCTION dbo.SplitLetters
(
#s NVARCHAR(MAX)
)
RETURNS #t TABLE
(
[order] INT,
[letter] NCHAR(1)
)
AS
BEGIN
DECLARE #i INT;
SET #i = 1;
WHILE #i <= LEN(#s)
BEGIN
INSERT #t SELECT #i, SUBSTRING(#s, #i, 1);
SET #i = #i + 1;
END
RETURN;
END
GO
SELECT [letter]
FROM dbo.SplitLetters(N'abcdefgh12345 6 7')
ORDER BY [order];
Previous post that solves the problem: TSQL UDF To Split String Every 8 Characters
Pass a value of 1 to #length.
declare #T table
(
ID int identity,
Data varchar(10)
)
insert into #T
select 'ABCDE' union
select '12345'
;with cte as
(
select ID,
left(Data, 1) as Data,
stuff(Data, 1, 1, '') as Rest
from #T
where len(Data) > 0
union all
select ID,
left(Rest, 1) as Data,
stuff(Rest, 1, 1, '') as Rest
from cte
where len(Rest) > 0
)
select ID,
Data
from cte
order by ID
You could join the table to a list of numbers, and use substring to split data column into rows:
declare #YourTable table (data varchar(50))
insert #YourTable
select 'abcde'
union all select 'fghe'
; with nrs as
(
select max(len(data)) as i
from #YourTable
union all
select i - 1
from nrs
where i > 1
)
select substring(yt.data, i, 1)
from nrs
join #YourTable yt
on nrs.i < len(yt.data)
option (maxrecursion 0)
declare #input varchar(max);
set #input = 'abcde'
declare #table TABLE (char varchar(1));
while (LEN(#input)> 0)
begin
insert into #table select substring(#input,1,1)
select #input = RIGHT(#input,Len(#input)-1)
end
select * from #table

Resources