Improve running time in T-sql code - sql-server

I would like to Improve running time in T-sql code.
The Insert query is taking a long time. Is there any chance to use a different method ?
Here is my Code
Create PROCEDURE [dbo].[CreateTableDemog]
#Age int,
#RetiredAge int
as
begin
declare #t as int=1;
declare #tlast as int=#RetiredAge -#Age
declare #Period as int=0;
BEGIN
while #t<=#tlast
begin
while #Period <=#t-1
begin
INSERT INTO dbo.Table (t, Age,Period,Prob)
VALUES (#t,#Age+#t,#Period,1);
set #Period=#Period+1
end
set #Period=0
set #t=#t+1
end
end

Here is how you could do this using a tally table instead of the nested while loops. This is a strange requirement but easy enough to follow.
This article by Jeff Moden does a great job of explaining what a tally is and how you can use it. http://www.sqlservercentral.com/articles/T-SQL/62867/
Create PROCEDURE [dbo].[CreateTableDemog]
(
#Age int
, #RetiredAge int
) as
set nocount on;
WITH
E1(N) AS (select 1 from (values (1),(1),(1),(1),(1),(1),(1),(1),(1),(1))dt(n)),
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS
(
SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
)
INSERT INTO dbo.Table
(
t
, Age
, Period
, Prob
)
select t1.N as T
, #Age + t1.N as AgePlusT
, abs(t2.N - t1.N) as Period
, 1
from cteTally t1
join cteTally t2 on t2.N <= t1.N
where t1.N <= #RetiredAge - #Age
order by t1.N
, #Age + t1.N
, abs(t2.N - t1.N);
GO

Related

dbo.fn_split function cutting off next character after the split character/delimeter

I ran this query :
select * From dbo.fn_split('Age,15,14,193,188 ',',')
It's returning the values but cutting off one character in front of each value
I tried adding space after every comma like
select * From dbo.fn_split('Age, 15, 14, 193, 188 ',',')
And it worked. But I want to know why is not working with commas
select * From dbo.fn_split ('Age,15,14,193,188 ',',')
You're not posting the code of fn_split which is where the problem resides. But if your strings are less than 8000 chars long, this function will help you to split them in an optimal way. This function is a modified version of Jeff Moden's splitter made by Eirikur Eirikson.
CREATE FUNCTION [dbo].[DelimitedSplit8K_LEAD]
--===== Define I/O parameters
(#pString VARCHAR(8000), #pDelimiter CHAR(1))
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
--===== "Inline" CTE Driven "Tally Tableā€ produces values from 0 up to 10,000...
-- enough to cover VARCHAR(8000)
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "zero base" and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT 0 UNION ALL
SELECT TOP (DATALENGTH(ISNULL(#pString,1))) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT t.N+1
FROM cteTally t
WHERE (SUBSTRING(#pString,t.N,1) = #pDelimiter OR t.N = 0)
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY s.N1),
Item = SUBSTRING(#pString,s.N1,ISNULL(NULLIF((LEAD(s.N1,1,1) OVER (ORDER BY s.N1) - 1),0)-s.N1,8000))
FROM cteStart s
;
GO
ALTER FUNCTION [dbo].[fn_Split](#text varchar(8000), #delimiter varchar(20) = ',')
RETURNS #Strings TABLE
(
position int IDENTITY PRIMARY KEY,
value varchar(8000)
)
AS
BEGIN
DECLARE #index int
SET #index = -1
WHILE (LEN(#text) > 0)
BEGIN
SET #index = CHARINDEX(#delimiter , #text)
IF (#index = 0) AND (LEN(#text) > 0)
BEGIN
INSERT INTO #Strings VALUES (#text)
BREAK
END
IF (#index > 1)
BEGIN
INSERT INTO #Strings VALUES (LEFT(#text, #index - 1))
SET #text = RIGHT(#text, (LEN(#text) - #index))
END
ELSE
SET #text = RIGHT(#text, (LEN(#text) - #index))
END
RETURN
END
I figured out what was I doing wrong. Basically I was adding an extra space at the end of string that I want to split. If you change
Select * From dbo.fn_split(dbo.fn_split('Age,15,14,193,188 ',',')) to Select * From dbo.fn_split('Age,15,14,193,188',','). In other words, get rid of the extra space after number 188 you will get the desired result.
Solution

Convert strings to integers using PatIndex

I want to return integers from rather complex strings which combined unicode characters such as - and . with characters and integers.
I've come a long way in achieving this, but I still have troubles with some strings of a more complex structure. For instance:
DECLARE #Tabl as table
(
dats nvarchar(15)
)
INSERT INTO #Tabl VALUES
('103-P705hh'),
('115-xxx-44'),
('103-705.13'),
('525-hheef4')
select LEFT(SUBSTRING(REPLACE(REPLACE(dats, '.',''),'-',''), PATINDEX('%[0-9.-]%', REPLACE(REPLACE(dats, '.',''),'-','')), 8000),
PATINDEX('%[^0-9.-]%', SUBSTRING(REPLACE(REPLACE(dats, '.',''),'-',''), PATINDEX('%[0-9.-]%', REPLACE(REPLACE(dats, '.',''),'-','')), 8000) + 'X')-1)
from #tabl
Gives
Raw Input Actual return: Desired return:
103-P705hh 103 103705
115-xxx-44 115 11544
103-705.13 10370513 10370513
525-hheef4 525 5254
I had a topic regarding this yesterday to cover the case when multiple - or . are present, but as seen in the return this is actually taken care of now. However, expanding the databases I work with I encountered much more complex string such as those I presented here.
Does anyone have any idea what to do when characters and integers are "mixed up" in the string?
Regards,
Cenderze
I have seen loads of solutions that use a scalar udf with a loop, but I don't like either of these things, so throwing my hat into the ring with a different approach.
With the help of a numbers table you can deconstruct each value into its individual characters, remove non-numeric characters, then reconstruct it using FOR XML to concatenate rows, e.g.
WITH Numbers (Number) AS
( SELECT ROW_NUMBER() OVER(ORDER BY N1.N)
FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) AS N1 (N) -- 100
CROSS JOIN (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) AS N2 (N) -- 100
CROSS JOIN (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) AS N3 (N) -- 1,000
--CROSS JOIN (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) AS N4 (N) -- 10,000
--CROSS JOIN (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) AS N5 (N) -- 100,000
--COMMENT OR UNCOMMENT ROWS AS NECESSARY DEPENDING ON YOU MAX STRING LENGTH
)
SELECT t.dats,
Stripped = x.data.value('.', 'INT')
FROM #tabl AS t
CROSS APPLY
( SELECT SUBSTRING(t.dats, n.Number, 1)
FROM Numbers n
WHERE n.Number <= LEN(t.dats)
AND SUBSTRING(t.dats, n.Number, 1) LIKE '[0-9]'
ORDER BY n.Number
FOR XML PATH(''), TYPE
) x (data);
Gives:
dats Stripped
----------------------
103-P705hh 103705
115-xxx-44 11544
103-705.13 10370513
525-hheef4 5254
I haven't done any testing so it could be that the added overhead of expanding each string into individual characters and reconstructing it is actually a lot more overhead than than a UDF with a loop.
I decided to bench mark this
1. Set up functions
CREATE FUNCTION dbo.ExtractNumeric_TVF (#Input VARCHAR(8000))
RETURNS TABLE
AS
RETURN
( WITH Numbers (Number) AS
( SELECT TOP (LEN(#Input)) ROW_NUMBER() OVER(ORDER BY N1.N)
FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) AS N1 (N) -- 100
CROSS JOIN (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) AS N2 (N) -- 100
CROSS JOIN (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) AS N3 (N) -- 1,000
CROSS JOIN (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) AS N4 (N) -- 10,000
)
SELECT Stripped = x.data.value('.', 'VARCHAR(MAX)')
FROM ( SELECT SUBSTRING(#Input, n.Number, 1)
FROM Numbers n
WHERE n.Number <= LEN(#Input)
AND SUBSTRING(#Input, n.Number, 1) LIKE '[0-9]'
ORDER BY n.Number
FOR XML PATH(''), TYPE
) x (data)
);
GO
create function dbo.ExtractNumeric_UDF(#s varchar(8000))
returns varchar(8000)
as
begin
declare #out varchar(max) = ''
declare #c char(1)
while len(#s) > 0 begin
set #c = left(#s,1)
if #c like '[0123456789]' set #out += #c
set #s = substring(#s, 2, len(#s) -1)
end
return #out
end
GO
2. Create first set of sample data and log table
CREATE TABLE dbo.T (Value VARCHAR(8000) NOT NULL);
INSERT dbo.T (Value)
SELECT TOP 1000 LEFT(NEWID(), CEILING(RAND(CHECKSUM(NEWID())) * 36))
FROM sys.all_objects a
CROSS JOIN sys.all_objects b;
CREATE TABLE dbo.TestLog (Fx VARCHAR(255), NumberOfRows INT, TimeStart DATETIME2(7), TimeEnd DATETIME2(7))
3. Run Tests
GO
DECLARE #T TABLE (Val VARCHAR(8000));
INSERT dbo.TestLog (fx, NumberOfRows, TimeStart)
VALUES ('dbo.ExtractNumeric_UDF', 1000, SYSDATETIME());
INSERT #T (Val)
SELECT dbo.ExtractNumeric_UDF(Value)
FROM dbo.T;
UPDATE dbo.TestLog
SET TimeEnd = SYSDATETIME()
WHERE TimeEnd IS NULL;
GO 100
DECLARE #T TABLE (Val VARCHAR(8000));
INSERT dbo.TestLog (fx, NumberOfRows, TimeStart)
VALUES ('dbo.ExtractNumeric_TVF', 1000, SYSDATETIME());
INSERT #T (Val)
SELECT f.Stripped
FROM dbo.T
CROSS APPLY dbo.ExtractNumeric_TVF(Value) f;
UPDATE dbo.TestLog
SET TimeEnd = SYSDATETIME()
WHERE TimeEnd IS NULL;
GO 100
4. Get Results
SELECT Fx,
NumberOfRows,
RunTime = AVG(DATEDIFF(MILLISECOND, TimeStart, TimeEnd))
FROM dbo.TestLog
GROUP BY fx, NumberOfRows;
I did the following (using just NEWID() so only a maximum of 36 characters) over 1,000 and 10,000 rows, the results were:
Fx NumberOfRows RunTime
--------------------------------------------------------
dbo.ExtractNumeric_TVF 1000 31
dbo.ExtractNumeric_UDF 1000 56
dbo.ExtractNumeric_TVF 10000 280
dbo.ExtractNumeric_UDF 10000 510
So the TVF coming in at just under half the time of the UDF.
I wanted to test edge cases so put 1,000 rows of longer strings (5,400 characters)
TRUNCATE TABLE dbo.T;
INSERT dbo.T (Value)
SELECT TOP 1000
REPLICATE(CONCAT(NEWID(), NEWID(), NEWID(), NEWID(), NEWID()), 30)
FROM sys.all_objects a
CROSS JOIN sys.all_objects b;
And this is where the TVF came into its own, running over 5x faster:
Fx NumberOfRows RunTime
------------------------------------------------
dbo.ExtractNumeric_TVF 1000 2485
dbo.ExtractNumeric_UDF 1000 12955
I also really don't like the looping solutions so I decided to try my hand at one. This is using a predefined tally table but is quite similar to others posted here already.
This is my tally table. I keep this as a view on my system.
create View [dbo].[cteTally] as
WITH
E1(N) AS (select 1 from (values (1),(1),(1),(1),(1),(1),(1),(1),(1),(1))dt(n)),
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS
(
SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
)
select N from cteTally
GO
Because I don't like looping I decided to use the table valued function approach which let me reuse this functionality in other queries with little to no effort. Here is one way to write such a function.
create function GetOnlyNumbers
(
#SearchVal varchar(8000)
) returns table as return
with MyValues as
(
select substring(#SearchVal, N, 1) as number
, t.N
from cteTally t
where N <= len(#SearchVal)
and substring(#SearchVal, N, 1) like '[0-9]'
)
select distinct NumValue = STUFF((select number + ''
from MyValues mv2
order by mv2.N
for xml path('')), 1, 0, '')
from MyValues mv
That looks good but the proof is in the pudding. Let's take this out with our sample data and kick the tires a few times.
DECLARE #Tabl as table
(
dats nvarchar(15)
)
INSERT INTO #Tabl VALUES
('103-P705hh'),
('115-xxx-44'),
('103-705.13'),
('525-hheef4')
select *
from #Tabl t
cross apply dbo.GetOnlyNumbers(t.dats) x
Sure looks nice and tidy. I tested against several of the other solutions posted here and without going into deep testing this appears to be significantly faster than the other approaches posted at this time.
DECLARE #Tabl as table
(
ID INT,
dats nvarchar(15)
)
INSERT INTO #Tabl VALUES
(1, '103-P705hh'),
(2, '115-xxx-44'),
(3, '103-705.13'),
(4, '525-hheef4')
SELECT T.ID, t.dats
,(
SELECT SUBSTRING(tt.dats,V.number,1)
FROM #Tabl tt
JOIN master.dbo.spt_values V ON V.type='P' AND V.number BETWEEN 1 AND LEN(tt.dats)
WHERE tt.ID=T.ID AND SUBSTRING(TT.dats,V.number,1) LIKE '[0-9]'
ORDER BY V.number
FOR XML PATH('')
) S
FROM #Tabl t
ORDER BY T.ID;
Can you use a udf ? If so, try this
create alter function numerals(#s varchar(max))
returns varchar(max)
as
begin
declare #out varchar(max) = ''
declare #c char(1)
while len(#s) > 0 begin
set #c = left(#s,1)
if #c like '[0123456789]' set #out += #c
set #s = substring(#s, 2, len(#s) -1)
end
return #out
end
to use it on your temp table...
select dbo.numerals(dats) from #Tabl
another solution, that does not use a UDF, but will work only if your table has a primary key, uses a recursive CTE. It is:
DECLARE #Tabl as table
(pk int identity not null, -- <=== added a primary key
dats nvarchar(max) )
INSERT INTO #Tabl VALUES
('103-P705hh'),
('115-xxx-44'),
('103-705.13'),
('525-hheef4');
with newVals(pk, pos, newD) as
(select pk, 1,
case when left(Dats,1) like '[0123456789]'
then left(Dats,1) else '' end
from #tabl
Union All
Select t.pk, pos + 1, n.newD +
case when substring(dats, pos+1, 1) like '[0123456789]'
then substring(dats, pos+1, 1) else '' end
from #tabl t join newVals n on n.pk = t.pk
where pos+1 <= len(dats) )
Select newD from newVals x
where pos = (Select Max(pos)
from newVals
where pk = x.pk)

Sql UDF Optimization

I have written the following function that takes in two strings (comma-separated), splits them into two different temp tables and then uses those temp tables to find what percentage of words match in those two temp tables. The problem is that when I am using it per row basis on a data set of about 200k rows, the query times out!
Are there any optimizations that you can see that can be done?
ALTER FUNCTION [GetWordSimilarity](#String varchar(8000),
#String2 varchar(8000),#Delimiter char(1))
returns decimal(16,2)
as
begin
declare #result as decimal (16,2)
declare #temptable table (items varchar(8000))
declare #temptable2 table (items varchar(8000))
declare #numberOfCommonWords decimal(16,2)
declare #countTable1 decimal(16,2)
declare #countTable2 decimal(16,2)
declare #denominator decimal(16,2)
set #result = 0.0 --dummy value
declare #idx int
declare #slice varchar(8000)
select #idx = 1
if len(#String)<1 or #String is null or len(#String2) = 0 or #String2 is null return 0.0
--populating #temptable
while #idx!= 0
begin
set #idx = charindex(#Delimiter,#String)
if #idx!=0
set #slice = left(#String,#idx - 1)
else
set #slice = #String
if(len(#slice)>0)
insert into #temptable(Items) values(ltrim(rtrim(#slice)))
set #String = right(#String,len(#String) - #idx)
if len(#String) = 0 break
end
select #idx = 1
----populating #temptable2
while #idx!= 0
begin
set #idx = charindex(#Delimiter,#String2)
if #idx!=0
set #slice = left(#String2,#idx - 1)
else
set #slice = #String2
if(len(#slice)>0)
insert into #temptable2(Items) values(ltrim(rtrim(#slice)))
set #String2 = right(#String2,len(#String2) - #idx)
if len(#String2) = 0 break
end
--calculating percentage of words match
if (((select COUNT(*) from #temptable) = 0) or ((select COUNT(*) from #temptable2) = 0))
return 0.0
select #numberOfCommonWords = COUNT(*) from
(
select distinct items from #temptable
intersect
select distinct items from #temptable2
) a
select #countTable1 = COUNT (*) from #temptable
select #countTable2 = COUNT (*) from #temptable2
if(#countTable1 > #countTable2) set #denominator = #countTable1
else set #denominator = #countTable2
set #result = #numberOfCommonWords/#denominator
return #result
end
Thanks a bunch !
There is no way to write a T SQL UDF with heavy string manipulation inside that will behave OK on large number of rows. You will get some gain if you use the Numbers table, though:
declare
#col_list varchar(1000),
#sep char(1)
set #col_list = 'TransactionID, ProductID, ReferenceOrderID, ReferenceOrderLineID, TransactionDate, TransactionType, Quantity, ActualCost, ModifiedDate'
set #sep = ','
select substring(#col_list, n, charindex(#sep, #col_list + #sep, n) - n)
from numbers where substring(#sep + #col_list, n, 1) = #sep
and n < len(#col_list) + 1
Your best course of action would be to write the whole thing in SQLCLR.
The problem of course is with the design. You shouldn't be storing comma-separated data in a SQL database to start with.
But, I guess we're stuck with it for now.
One thing to consider is converting the function to SQLCLR; SQL by itself is not very good with string operations. (Well, in fact, no language is good with string operations IMHO but SQL really is bad at it =)
The splitter you use to fill #Temptables 1 & 2 can be optimized by using the code from Jeff Moden who wrote several fantastic articles of which the last one can be found here : http://www.sqlservercentral.com/articles/Tally+Table/72993/
Taking his splitter + optimizing the rest of the code a bit I managed to get from 771 seconds to 305 seconds on a 200K random data sample.
Some things to note: the results aren't quite the same. I checked some manually and I actually think the new results are more accurate but don't really have time to go bughunting on both versions.
I tried to convert this to a more set-based approach where I first load all the words in a table that has all words for all row_id's and then join them back together. Although the joining is quite fast, it simply takes too long to create the initial tables so it even loses out on the original function.
Maybe I'll try to figure out another way to make it faster but for now I hope this will help you out a bit.
ALTER FUNCTION [GetWordSimilarity2](#String1 varchar(8000),
#String2 varchar(8000),#Delimiter char(1))
returns decimal(16,2)
as
begin
declare #temptable1 table (items varchar(8000), row_id int IDENTITY(1, 1), PRIMARY KEY (items, row_id))
declare #temptable2 table (items varchar(8000), row_id int IDENTITY(1, 1), PRIMARY KEY (items, row_id))
declare #numberOfCommonWords decimal(16,2)
declare #countTable1 decimal(16,2)
declare #countTable2 decimal(16,2)
-- based on code from Jeff Moden (http://www.sqlservercentral.com/articles/Tally+Table/72993/)
--populating #temptable1
;WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(#String1),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(#String1,t.N,1) = #Delimiter
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(#Delimiter,#String1,s.N1),0)-s.N1,8000)
FROM cteStart s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
INSERT #temptable1 (items)
SELECT Item = SUBSTRING(#String1, l.N1, l.L1)
FROM cteLen l
SELECT #countTable1 = ##ROWCOUNT
----populating #temptable2
;WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(#String2),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(#String2,t.N,1) = #Delimiter
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(#Delimiter,#String2,s.N1),0)-s.N1,8000)
FROM cteStart s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
INSERT #temptable2 (items)
SELECT Item = SUBSTRING(#String2, l.N1, l.L1)
FROM cteLen l
SELECT #countTable2 = ##ROWCOUNT
--calculating percentage of words match
if #countTable1 = 0 OR #countTable2 = 0
return 0.0
select #numberOfCommonWords = COUNT(DISTINCT t1.items)
from #temptable1 t1
JOIN #temptable2 t2
ON t1.items = t2.items
RETURN #numberOfCommonWords / (CASE WHEN (#countTable1 > #countTable2) THEN #countTable1 ELSE #countTable2 END)
end

What's with this SQL 'With' clause?

This SQL function below can be used to generate numbers between the low and high.
I sort of understand how it is working, but I can't wrap my head around a couple things.
First the WITH Clause. This doesn't follow the CTE construct. It's not a CTE, right? What is this syntax?
The L0-l5 and Nums are all tables but I don't understand the 'L0 as' syntax. You can't declare a table that right way,right? Can I do that outside of a function?
ALTER FUNCTION [dbo].[GetNums](#low AS BIGINT, #high AS BIGINT) RETURNS TABLE
AS
RETURN
WITH
L0 AS (SELECT c FROM (SELECT 1 UNION ALL SELECT 1) AS D(c)),
L1 AS (SELECT 1 AS c FROM L0 AS A CROSS JOIN L0 AS B),
L2 AS (SELECT 1 AS c FROM L1 AS A CROSS JOIN L1 AS B),
L3 AS (SELECT 1 AS c FROM L2 AS A CROSS JOIN L2 AS B),
L4 AS (SELECT 1 AS c FROM L3 AS A CROSS JOIN L3 AS B),
L5 AS (SELECT 1 AS c FROM L4 AS A CROSS JOIN L4 AS B),
Nums AS (SELECT ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) AS rownum FROM L5)
SELECT TOP(#high - #low + 1) #low + rownum - 1 AS n FROM Nums ORDER BY rownum;
Read about CTEs. They are defined with a WITH statement and their definitions are comma separated.
TechNet
WITH expression_name [ ( column_name [,...n] ) ]
AS
( CTE_query_definition )
You might consider a whole different approach too:
DECLARE #i integer = 0
DECLARE #l integer = 0
DECLARE #h integer = 100
DECLARE #output integer
DECLARE #mytable table (m_output int)
WHILE (#i >= #l AND #i < #h)
BEGIN
SET #output = #i
INSERT INTO #mytable(m_output) VALUES(#output);
SET #i = #i + 1;
END
SELECT * FROM #mytable;

Faster approach to enter rows in a time table in ms sql

I need to fill a time table to use it for joining the data in reporting services.
Generally I do this with this code:
TRUNCATE TABLE tqTimeTable
DECLARE #CNT int
DECLARE #DATE datetime
DECLARE #END int
SET #CNT = 1
SET #DATE = 25567 -- 01.01.1970
SET #END = 20000 -- + 20k days => years 2024
WHILE(#CNT < #END)
BEGIN
INSERT INTO tqTimeTable (Tag, Monat, Jahr)
VALUES (DATEADD(day,#CNT,#DATE), MONTH(DATEADD(day,#CNT,#DATE)), YEAR(DATEADD(day,#CNT,#DATE)))
SET #CNT = #CNT + 1
END;
But this takes a while (on my test system around 2 minutes) so I hope someone had the same issue and solved it better then me.
As I fire this statement from a .NET connection I need a faster solution or if there isn't one to raise the timeout of my connection.
Simply adding
BEGIN TRAN
WHILE(#CNT < #END)
BEGIN
INSERT INTO tqTimeTable (Tag, Monat, Jahr)
VALUES (DATEADD(day,#CNT,#DATE), MONTH(DATEADD(day,#CNT,#DATE)), YEAR(DATEADD(day,#CNT,#DATE)))
SET #CNT = #CNT + 1
END;
COMMIT
will speed it up as you are doing many individual commits (that all require the log to be written to disc). I would do a set based insert in a single statement though.
WITH E1(N) AS
(
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
) -- 1*10^1 or 10 rows
, E2(N) AS (SELECT 1 FROM E1 a, E1 b) -- 1*10^2 or 100 rows
, E4(N) AS (SELECT 1 FROM E2 a, E2 b) -- 1*10^4 or 10,000 rows
, E8(N) AS (SELECT 1 FROM E4 a, E4 b) -- 1*10^8 or 100,000,000 rows
, NUMS(N) AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT 0)) FROM E8)
INSERT INTO tqTimeTable
(Tag,
Monat,
Jahr)
SELECT DATEADD(day, N, #DATE),
MONTH(DATEADD(day, N, #DATE)),
YEAR(DATEADD(day, N, #DATE))
FROM NUMS
WHERE N <= 20000

Resources