SQL SERVER INSERT STATEMENT WITH LOOP Needs Optimized - sql-server

I have SQL Statement running in SQL server 2014.
I have 3 columns: id, text1, text2
Inserting records in text1, text2
text1 is nvarchar
text2 is varchar
so far inserted 1.2 million rows in about 3.5 hrs
trying to insert 3 million need help in reduce time for insert
CODE:
DECLARE #i as int
SET #i = 0
WHILE #i < 3000000
BEGIN
SET #i = #i + 1
insert into test (text1 , text2)
values(N'你好','VJ'+ cast(#i as varchar(20)))
END

Here is another way to do it ... It finish pretty fast ... less then 5 sec on my SQL server
if object_id('tempdb..#Numbers') is not null drop table #Numbers
create table #Numbers (Num int)
insert into #Numbers (Num)
SELECT TOP (3000000) n = CONVERT(INT, ROW_NUMBER() OVER (ORDER BY s1.[object_id]))
FROM sys.all_objects AS s1 CROSS JOIN sys.all_objects AS s2
OPTION (MAXDOP 1);
if object_id('tempdb..#test') is not null drop table #test
create table #test (text1 nvarchar(50), text2 nvarchar(50))
insert into #test (text1, text2)
select N'你好' [text1], 'VJ' + cast(Num as nvarchar) [text2] from #Numbers

You can try like this
WITH CTE_TEST
AS
(
SELECT N'你好' AS CODE,'VJ'+ cast(1 as varchar(20)) NAME, 1 AS VCOUNT
UNION ALL
SELECT N'你好' ,'VJ'+ cast(VCOUNT+1 as varchar(20)) NAME, VCOUNT+1 AS VCOUNT
FROM CTE_TEST
WHERE VCOUNT+1 < 3000000
)
INSERT INTO test (text1 , text2)
SELECT CODE,NAME FROM CTE_TEST
OPTION (MAXRECURSION 0)
You can use your additional logic inside the CTE, and simply insert the result set to actual table. Here the insert statement is outside the loop (1 insert statement with 3000000 records) , so it will faster than inserting the record inside a loop 3000000 times (3000000 insert statements with 1 record each)
By default the MAXRECURSION is 100 to avoid infinite looping, here you need to override this (but it is not a good practice).

An alternative that does not use recursive CTE is to use a known table with many enough records, so that you can iterate against it:
-- generation parameters
declare #batchCount INT = 100000
declare #totalCount INT = 30000000
declare #loopCount INT = #totalCount / #batchCount
DECLARE #i as int = 0
-- loops are slow, but here we have only a few
WHILE (#i < #loopCount)
BEGIN
-- insert can be put just here to actually perform the insert
-- ROW_NUMBER gives us the numbering, but order does not matter, so using SELECT 1
select TOP (#batchCount) N'你好','VJ'+ cast(#i * #batchCount + ROW_NUMBER() OVER (ORDER BY (SELECT 1)) as varchar(20))
from sys.messages
SET #i = #i + 1
END
sys.messages is a pretty large table (at least 200K records), so it can safely be used for batches of 100K.
Time using recursive CTE: 51s
Time using above solution: 28s
(tested on a SQL Server 2014 Express instance, SELECT only)

Related

Substring is slow with while loop in SQL Server

One of my table column stores ~650,000 characters (each value of the column contains entire table). I know its bad design however, Client will not be able to change it.
I am tasked to convert the column into multiple columns.
I chose to use dbo.DelimitedSplit8K function
Unfortunately, it can only handle 8k characters at max.
So I decided to split the column into 81 8k batches using while loop and store the same in a variable table (temp or normal table made no improvement)
DECLARE #tab1 table ( serialnumber int, etext nvarchar(1000))
declare #scriptquan int = (select MAX(len (errortext)/8000) from mytable)
DECLARE #Counter INT
DECLARE #A bigint = 1
DECLARE #B bigint = 8000
SET #Counter=1
WHILE ( #Counter <= #scriptquan + 1)
BEGIN
insert into #tab1 select ItemNumber, Item from dbo.mytable cross apply dbo.DelimitedSplit8K(substring(errortext, #A, #B), CHAR(13)+CHAR(10))
SET #A = #A + 8000
SET #B = #B + 8000
SET #Counter = #Counter + 1
END
This followed by using below code
declare #tab2 table (Item nvarchar(max),itemnumber int, Colseq varchar(10)) -- declare table variable
;with cte as (
select [etext] ,ItemNumber, Item from #tab1 -- insert table name
cross apply dbo.DelimitedSplit8K(etext,' ')) -- insert table columns name that contains text
insert into #tab2 Select Item,itemnumber, 'a'+ cast (ItemNumber as varchar) colseq
from cte -- insert values to table variable
;WITH Tbl(item, colseq) AS(
select item, colseq from #tab2
),
CteRn AS(
SELECT item, colseq,
Rn = ROW_NUMBER() OVER(PARTITION BY colseq ORDER BY colseq)
FROM Tbl
)
SELECT
a1 Time,a2 Number,a3 Type,a4 Remarks
FROM CteRn r
PIVOT(
MAX(item)
FOR colseq IN(a1,a2,a3,a4)
)p
where a3 = 'error'
gives the desired output. However, just the loop takes 15 minutes to complete and overall query completes by 27 minutes. Is there any way I can make it faster? Total row count in my table is 2. So I don't think Index can help.
Client uses Azure SQL Database so I can't choose PowerShell or Python to accomplish this either.
Please let me know if more information is needed. I tried my best to mention everything I could.

How to insert multiple rows into a table based on a range of numbers

I have to insert a specific number of rows into a SQL Server table.
DECLARE #val AS INT = 20,
#val2 AS VARCHAR(50),
#Date AS DATETIME = CONVERT(DATETIME,'02-05-2016'),
#i AS INT = 0
SET #val2 = 'abc'
DECLARE #tbl TABLE
(
[ID] [int] IDENTITY(1,1) NOT NULL,
[val2] VARCHAR(50) NULL,
[datum] [datetime] NULL
)
--INSERT INTO #tbl
SELECT #val2, DATEADD(DAY, #i, #Date)
UNION ALL
SELECT #val2, DATEADD(DAY, #i, #Date)
In this query, I have to insert dates starting from a given date till the number of value assigned to the variable '#val'. So, in this case, 20 rows need to be inserted into the table starting from '02-05-2016' and then date increasing 1 day for each row.
How can I do it in a single statement without any looping or multiple insert statements?
You can use a numbers table if you have one, use master.dbo.spt_values if you want one that has values till 2048, or create one of your own. In this case, you could use master.dbo.spt_values:
DECLARE #val AS INT=20, #val2 AS VARCHAR(50);
DECLARE #Date AS DATETIME = CONVERT(DATETIME,'02-05-2016');
SET #val2 = 'abc'
INSERT INTO dbo.YourTable
SELECT #val2, DATEADD(DAY,number,#Date)
FROM master.dbo.spt_values
WHERE type = 'P'
AND number <= #val;
Though since this starts at zero, you'll get 21 rows as a result
Besides the detailed answer I pointed to in my comment, this is the idea in short:
DECLARE #start INT=0;
DECLARE #end INT=19; --0 to 19 are 20 days
DECLARE #StartDate DATE={d'2016-01-01'};
--Create a List of up to 1.000.000.000 rows on the fly
--This is limited by start and end parameter
;WITH x AS(SELECT 1 AS N FROM(VALUES(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) AS tbl(N))--10^1
,N3 AS (SELECT 1 AS N FROM x CROSS JOIN x AS N2 CROSS JOIN x N3) --10^3
,Tally AS(SELECT TOP(#end-#start +1) ROW_NUMBER() OVER(ORDER BY(SELECT NULL)) + #start -1 AS Nr FROM N3
CROSS JOIN N3 N6 CROSS JOIN N3 AS N9)
--INSERT INTO your_table
SELECT #val2 --your #val2 here as a constant value
,DATEADD(DAY,Nr,#StartDate)
FROM Tally
You could use a recursive CTE.
DECLARE #i INT = 1
, #m INT = 19
, #d DATETIME2 = '2016-05-02';
WITH i AS (
SELECT 0 AS increment
UNION ALL
SELECT i.increment + #i
FROM i
WHERE i.increment < #m
)
SELECT i.increment
, DATEADD(DAY, i.increment, #d)
FROM i
OPTION (MAXRECURSION 100);
Note the OPTION (MAXRECUSION 100) hint at the bottom, which is not strictly necessary but I have included it to illustrate how it works. By default, there is a limit of 100 results using this method, so without this statement and if #m were a large number e.g. 1000 then SQL would generate an error. You can set the lmit to 0 which means unbounded, but only do this after testing your code, because it can get stuck in an infinite loop this way (which is why the limit exists by default).

How to bulk insert a bunch of sequential numbers quickly?

I recently inherited this SQL:
TRUNCATE TABLE [tb_Whitelist]
--DECLARE #Counter INT, #Max INT
SELECT #Counter = 10000000
SELECT #Max = 19999900
WHILE #Counter <= #Max
BEGIN
INSERT [tb_Whitelist] ([AvailableId]) VALUES(#Counter)
SELECT #Counter = #Counter + 1
END
It's taking 7 ish hours to run, which I'm told is too slow. Are there any other bulk insert strategies that will allow me to insert a bunch of sequential numbers more quickly, or anything I can do to make this one run faster?
Try something like this....
INSERT [tb_Whitelist] ([AvailableId])
SELECT TOP (9999900)
10000000 + ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM master..spt_values t1
cross join master..spt_values t2
cross join master..spt_values t3
While loop will be slow, I just ran this query on my laptop which has 8GB Ram and pretty standard processor and it took me 1 min 45 sec to insert these records.
Avoid RBAR. Give it a try with a CTE and inserting in a single statement.
DECLARE
#counter INT = 1,
#max INT = 50000;
with i (i) AS (
select #counter i
UNION ALL
select i+1 from i
where
i< #MAX
)
insert into [tb_Whitelist]
([AvailableId])
select
i
from i
OPTION(MAXRECURSION 0)

SQL Server query with pagination and count

I want to make a database query with pagination. So, I used a common-table expression and a ranked function to achieve this. Look at the example below.
declare #table table (name varchar(30));
insert into #table values ('Jeanna Hackman');
insert into #table values ('Han Fackler');
insert into #table values ('Tiera Wetherbee');
insert into #table values ('Hilario Mccray');
insert into #table values ('Mariela Edinger');
insert into #table values ('Darla Tremble');
insert into #table values ('Mammie Cicero');
insert into #table values ('Raisa Harbour');
insert into #table values ('Nicholas Blass');
insert into #table values ('Heather Hayashi');
declare #pagenumber int = 2;
declare #pagesize int = 3;
declare #total int;
with query as
(
select name, ROW_NUMBER() OVER(ORDER BY name ASC) as line from #table
)
select top (#pagesize) name from query
where line > (#pagenumber - 1) * #pagesize
Here, I can specify the #pagesize and #pagenumber variables to give me just the records that I want. However, this example (that comes from a stored procedure) is used to make a grid pagination in a web application. This web application requires to show the page numbers. For instance, if a have 12 records in the database and the page size is 3, then I'll have to show 4 links, each one representing a page.
But I can't do this without knowing how many records are there, and this example just gives me the subset of records.
Then I changed the stored procedure to return the count(*).
declare #pagenumber int = 2;
declare #pagesize int = 3;
declare #total int;
with query as
(
select name, ROW_NUMBER() OVER(ORDER BY name ASC) as line, total = count(*) over()from #table
)
select top (#pagesize) name, total from query
where line > (#pagenumber - 1) * #pagesize
So, along with each line, it will show the total number of records. But I didn't like it.
My question is if there's a better way (performance) to do this, maybe setting the #total variable without returning this information in the SELECT. Or is this total column something that won't harm the performance too much?
Thanks
Assuming you are using MSSQL 2012, you can use Offset and Fetch which cleans up server-side paging greatly. We've found performance is fine, and in most cases better. As far as getting the total column count, just use the window function below inline...it will not include the limits imposed by 'offset' and 'fetch'.
For Row_Number, you can use window functions the way you did, but I would recommend that you calculate that client side as (pagenumber*pagesize + resultsetRowNumber), so if you're on the 5th page of 10 results and on the third row you would output row 53.
When applied to an Orders table with about 2 million orders, I found the following:
FAST VERSION
This ran in under a second. The nice thing about it is that you can do your filtering in the common table expression once and it applies both to the paging process and the count. When you have many predicates in the where clause, this keeps things simple.
declare #skipRows int = 25,
#takeRows int = 100,
#count int = 0
;WITH Orders_cte AS (
SELECT OrderID
FROM dbo.Orders
)
SELECT
OrderID,
tCountOrders.CountOrders AS TotalRows
FROM Orders_cte
CROSS JOIN (SELECT Count(*) AS CountOrders FROM Orders_cte) AS tCountOrders
ORDER BY OrderID
OFFSET #skipRows ROWS
FETCH NEXT #takeRows ROWS ONLY;
SLOW VERSION
This took about 10 sec, and it was the Count(*) that caused the slowness. I'm surprised this is so slow, but I suspect it's simply calculating the total for each row. It's very clean though.
declare #skipRows int = 25,
#takeRows int = 100,
#count int = 0
SELECT
OrderID,
Count(*) Over() AS TotalRows
FROM Location.Orders
ORDER BY OrderID
OFFSET #skipRows ROWS
FETCH NEXT #takeRows ROWS ONLY;
CONCLUSION
We've gone through this performance tuning process before and actually found that it depended on the query, predicates used, and indexes involved. For instance, the second we introduced a view it chugged, so we actually query off the base table and then join up the view (which includes the base table) and it actually performs very well.
I would suggest having a couple of straight-forward strategies and applying them to high-value queries that are chugging.
DECLARE #pageNumber INT = 1 ,
#RowsPerPage INT = 20
SELECT *
FROM TableName
ORDER BY Id
OFFSET ( ( #pageNumber - 1 ) * #RowsPerPage ) ROWS
FETCH NEXT #RowsPerPage ROWS ONLY;
What if you calculate the count beforehand?
declare #pagenumber int = 2;
declare #pagesize int = 3;
declare #total int;
SELECT #total = count(*)
FROM #table
with query as
(
select name, ROW_NUMBER() OVER(ORDER BY name ASC) as line from #table
)
select top (#pagesize) name, #total total from query
where line > (#pagenumber - 1) * #pagesize
Another way, is to calculate max(line). Check the link
Return total records from SQL Server when using ROW_NUMBER
UPD:
For single query, check marc_s's answer on the link above.
with query as
(
select name, ROW_NUMBER() OVER(ORDER BY name ASC) as line from #table
)
select top (#pagesize) name,
(SELECT MAX(line) FROM query) AS total
from query
where line > (#pagenumber - 1) * #pagesize
#pagenumber=5
#pagesize=5
Create a common table expression and write logic like this
Between ((#pagenumber-1)*(#pagesize))+1 and (#pagenumber *#pagesize)
There are many way we can achieve pagination: I hope this information is useful to you and others.
Example 1: using offset-fetch next clause. introduce in 2005
declare #table table (name varchar(30));
insert into #table values ('Jeanna Hackman');
insert into #table values ('Han Fackler');
insert into #table values ('Tiera Wetherbee');
insert into #table values ('Hilario Mccray');
insert into #table values ('Mariela Edinger');
insert into #table values ('Darla Tremble');
insert into #table values ('Mammie Cicero');
insert into #table values ('Raisa Harbour');
insert into #table values ('Nicholas Blass');
insert into #table values ('Heather Hayashi');
declare #pagenumber int = 1
declare #pagesize int = 3
--this is a CTE( common table expression and this is introduce in 2005)
with query as
(
select ROW_NUMBER() OVER(ORDER BY name ASC) as line, name from #table
)
--order by clause is required to use offset-fetch
select * from query
order by name
offset ((#pagenumber - 1) * #pagesize) rows
fetch next #pagesize rows only
Example 2: using row_number() function and between
declare #table table (name varchar(30));
insert into #table values ('Jeanna Hackman');
insert into #table values ('Han Fackler');
insert into #table values ('Tiera Wetherbee');
insert into #table values ('Hilario Mccray');
insert into #table values ('Mariela Edinger');
insert into #table values ('Darla Tremble');
insert into #table values ('Mammie Cicero');
insert into #table values ('Raisa Harbour');
insert into #table values ('Nicholas Blass');
insert into #table values ('Heather Hayashi');
declare #pagenumber int = 2
declare #pagesize int = 3
SELECT *
FROM
(select ROW_NUMBER() OVER (ORDER BY PRODUCTNAME) AS RowNum, * from Products)
as Prodcut
where RowNum between (((#pagenumber - 1) * #pageSize )+ 1)
and (#pagenumber * #pageSize )
I hope these will be helpful to all
I don't like other solutions for being too complex, so here is my version.
Execute three select queries in one go and use output parameters for getting the count values. This query returns the total count, the filter count, and the page rows. It supports sorting, searching, and filtering the source data. It's easy to read and modify.
Let's say you have two tables with one-to-many relationship, items and their prices changed over time so the example query is not too trivial.
create table shop.Items
(
Id uniqueidentifier not null primary key,
Name nvarchar(100) not null,
);
create table shop.Prices
(
ItemId uniqueidentifier not null,
Updated datetime not null,
Price money not null,
constraint PK_Prices primary key (ItemId, Updated),
constraint FK_Prices_Items foreign key (ItemId) references shop.Items(Id)
);
Here is the query:
select #TotalCount = count(*) over()
from shop.Items i;
select #FilterCount = count(*) over()
from shop.Items i
outer apply (select top 1 p.Price, p.Updated from shop.Prices p where p.ItemId = i.Id order by p.Updated desc) as p
where (#Search is null or i.Name like '%' + #Search + '%')/**where**/;
select i.Id as ItemId, i.Name, p.Price, p.Updated
from shop.Items i
outer apply (select top 1 p.Price, p.Updated from shop.Prices p where p.ItemId = i.Id order by p.Updated desc) as p
where (#Search is null or i.Name like '%' + #Search + '%')/**where**/
order by /**orderby**/i.Id
offset #SkipCount rows fetch next #TakeCount rows only;
You need to provide the following parameters to the query:
#SkipCount - how many records to skip, calculated from the page number.
#TakeCount - how many records to return, calculated from or equal to the page size.
#Search - a text to search for in some columns, provided by the grid search box.
#TotalCount - the total number of records in the data source, the output parameter.
#FilterCount - the number of records after the search and filtering operations, the output parameter.
You can replace /**orderby**/ comment with the list of columns and their ordering directions if the grid must support sorting the rows by columns. you get this info from the grid and translate it to an SQL expression. We still need to order the records by some column initially, I usually use ID column for that.
If the grid must support filtering data by each column individually, you can replace /**where**/ comment with an SQL expression for that.
If the user is not searching and filtering the data, but only clicks through the grid pages, this query doesn't change at all and the database server executes it very quickly.

How to expand rows from count in tsql only

I have a table that contains a number and a range value. For instance, one column has the value of 40 and the other column has a value of 100 meaning that starting 40 the range has 100 values ending in 139 inclusive of the number 40. I want to write a tsql statement that expands my data into individual rows.
I think I need a cte for this but do not know how I can achieve this.
Note: when expanded I am expecting 7m rows.
If you want CTE here is an example:
Initial insert:
insert into rangeTable (StartValue, RangeValue)
select 40,100
union all select 150,10
go
the query:
with r_CTE (startVal, rangeVal, generatedVal)
as
(
select r.startValue, r.rangeValue, r.startValue
from rangeTable r
union all
select r.startValue, r.rangeValue, generatedVal+1
from rangeTable r
inner join r_CTE rc
on r.startValue = rc.startVal
and r.rangeValue = rc.rangeVal
and r.startValue + r.rangeValue > rc.generatedVal + 1
)
select * from r_CTE
order by startVal, rangeVal, generatedVal
Just be aware that the default maximum number of recursions is 100. You can change it to the maximum of 32767 by calling
option (maxrecursion 32767)
or to no limit
option (maxrecursion 0)
See BOL for details
I don't know how this could be done with common table expressions, but here is a solution using a temporary table:
SET NOCOUNT ON
DECLARE #MaxValue INT
SELECT #MaxValue = max(StartValue + RangeValue) FROM MyTable
DECLARE #Numbers table (
Number INT IDENTITY(1,1) PRIMARY KEY
)
INSERT #Numbers DEFAULT VALUES
WHILE COALESCE(SCOPE_IDENTITY(), 0) <= #MaxValue
INSERT #Numbers DEFAULT VALUES
SELECT n.Number
FROM #Numbers n
WHERE EXISTS(
SELECT *
FROM MyTable t
WHERE n.Number BETWEEN t.StartValue AND t.StartValue + t.RangeValue - 1
)
SET NOCOUNT OFF
Could be optimized if the Numbers table was a regular table. So you don't have to fill the temporary table on every call.
you could try this approach:
create function [dbo].[fRange](#a int, #b int)
returns #ret table (val int)
as
begin
declare #val int
declare #end int
set #val = #a
set #end = #a + #b
while #val < #end
begin
insert into #ret(val)
select #val
set #val = #val+1
end
return
end
go
declare #ranges table(start int, noOfEntries int)
insert into #ranges (start, noOfEntries)
select 40,100
union all select 150, 10
select * from #ranges r
cross apply dbo.fRange(start,noOfEntries ) fr
not the fastest but should work
I would do something slightly different from splattne...
SET NOCOUNT ON
DECLARE #MaxValue INT
DECLARE #Numbers table (
Number INT IDENTITY(1,1) PRIMARY KEY CLUSTERED
)
SELECT #MaxValue = max(RangeValue) FROM MyTable
INSERT #Numbers DEFAULT VALUES
WHILE COALESCE(SCOPE_IDENTITY(), 0) <= #MaxValue
INSERT #Numbers DEFAULT VALUES
SELECT
t.startValue + n.Number
FROM
MyTable t
INNER JOIN
#Numbers n
ON n.Number < t.RangeValue
SET NOCOUNT OFF
This will minimise the number of rows you need to insert into the table variable, then use a join to 'multiply' one table by the other...
By the nature of the query, the source table table doesn't need indexing, but the "numbers" table should have an index (or primary key). Clustered Indexes refer to how they're stored on the Disk, so I can't see CLUSTERED being relevant here, but I left it in as I just copied from Splattne.
(Large joins like this may be slow, but still much faster than millions of inserts.)

Resources