How to chunk updates to SQL Server?

How to chunk updates to SQL Server? - sql-server

I want to update a table in SQL Server by setting a FLAG column to 1 for all values since the beginning of the year:
TABLE
DATE ID FLAG (more columns...)
2016/01/01 1 0 ...
2016/01/01 2 0 ...
2016/01/02 3 0 ...
2016/01/02 4 0 ...
(etc)
Problem is that this table contains hundreds of millions of records and I've been advised to chunk the updates 100,000 rows at a time to avoid blocking other processes.
I need to remember which rows I update because there are background processes which immediately flip the FLAG back to 0 once they're done processing it.
Does anyone have suggestions on how I can do this?
Each day's worth of data has over a million records, so I can't simply loop using the DATE as a counter. I am thinking of using the ID

Assuming the date column and the ID column are sequential you could do a simple loop. By this I mean that if there is a record id=1 and date=2016-1-1 then record id=2 date=2015-12-31 could not exist. If you are worried about locks/exceptions you should add a transaction in the WHILE block and commit or rollback on failure.
Change the #batchSize to whatever you feel is right after some experimentation.
DECLARE #currentId int, #maxId int, #batchSize int = 10000
SELECT #currentId = MIN(ID), #maxId = MAX(ID) FROM YOURTABLE WHERE DATE >= '2016-01-01'
WHILE #currentId < #maxId
BEGIN
UPDATE YOURTABLE SET FLAG = 1 WHERE ID BETWEEN #currentId AND (#currentId + #batchSize)
SET #currentId = #currentId + #batchSize
END
As this as the update will never flag the same record to 1 twice I do not see a need to track which records were touched unless you are going to manually stop the process partway through.
You should also ensure that the ID column has an index on it so the retrieval is fast in each update statement.

Looks like a simple question or maybe I'm missing something.
You can create a temp/permanent table to keep track of updated rows.
create tbl (Id int) -- or temp table based on your case
insert into tbl values (0)
declare #lastId int = (select Id from tbl)
;with cte as (
select top 100000
from YourMainTable
where Id > #lastId
ORDER BY Id
)
update cte
set Flag = 1
update tbl set Id = #lastId + 100000
You can do this process in a loop (except the table creation part)

create table #tmp_table
(
id int ,
row_number int
)
insert into #tmp_table
(
id,
row_number
)
--logic to load records from base table
select
bt.id,
row_number() over(partition by id order by id ) as row_number
from
dbo.bas_table bt
where
--ur logic to limit the records
declare #batch_size int = 100000;
declare #start_row_number int,#end_row_number int;
select
#start_row_number = min(row_number),
#end_row_number = max(row_number)
from
#tmp_table
while(#start_row_number < #end_row_number)
begin
update top #batch_size
bt
set
bt.flag = 1
from
dbo.base_table bt
inner join #tmp_table tt on
tt.Id = bt.Id
where
bt.row_number between #start_row_number and (#start_row_number + #batch_size)
set #start_row_number = #start_row_number + #batch_size
end

Related

How to fiind out the missing records (ID) from an indexed [order] table in sql

I have a table [Order] that has records with sequential ID (in odd number only, i.e. 1,3,5,7...989, 991, 993, 995, 997, 999), it is seen that a few records were accidentally deleted and should be inserted back, first thing is to find out what records are missing in the current table, there are hundreds of records in this table
Don't know how to write the query, can anyone kindly help, please?
I am thinking if I have to write a stored procedure or function but would be better if I can avoid them for environment reasons.
Below peuso code is what I am thinking:
set #MaxValue = Max(numberfield)
set #TestValue = 1
open cursor on recordset ordered by numberfield
foreach numberfield
while (numberfield != #testvalue) and (#testvalue < #MaxValue) then
Insert #testvalue into #temp table
set #testvalue = #textvalue + 2
Next
Next
UPDATE:
Expected result:
Order ID = 7 should be picked up as the only missing record.
Update 2:
If I use
WHERE
o.id IS NULL;
It returns nothing:

Since I didn't get a response from you, in the comments, I've altered the script for you to fill in accordingly:
declare #id int
declare #maxid int
set #id = 1
select #maxid = max([Your ID Column Name]) from [Your Table Name]
declare #IDseq table (id int)
while #id < #maxid --whatever you max is
begin
insert into #IDseq values(#id)
set #id = #id + 1
end
select
s.id
from #IDseq s
left join [Your Table Name] t on s.id = t.[Your ID Column Name]
where t.[Your ID Column Name] is null
Where you see [Your ID Column Name], replace everything with your column name and the same goes for [Your Table Name].
I'm sure this will give you the results you seek.

We can try joining to a number table, which contains all the odd numbers which you might expect to appear in your own table.
DECLARE #start int = 1
DECLARE #end int = 1000
WITH cte AS (
SELECT #start num
UNION ALL
SELECT num + 2 FROM cte WHERE num < #end
)
SELECT num
FROM cte t
LEFT JOIN [Order] o
ON t.num = o.numberfield
WHERE
o.numberfield IS NULL;

How to UPDATE TOP(n) with ORDER BY giving a predictable result?

I'm trying to read the top 100 items of a database table that is being used like a queue. As I do this I'm trying to mark the items as done like this:
UPDATE TOP(#qty)
QueueTable WITH (READPAST)
SET
IsDone = 1
OUTPUT
inserted.Id,
inserted.Etc
FROM
QueueTable
WHERE
IsDone = 0
ORDER BY
CreatedDate ASC;
The only problem is, according to UPDATE (Transact-SQL) on MSDN, the ORDER BY is not valid in an UPDATE and:
The rows referenced in the TOP expression used with INSERT, UPDATE, or
DELETE are not arranged in any order.
How can I achieve what I need which is to update the items at the top of the queue while also selecting them?

SQL Server allows you to update a derived table, CTE or view:
UPDATE x
SET
IsDone = 1
OUTPUT
inserted.Id,
inserted.Etc
FROM (
select TOP (N) *
FROM
QueueTable
WHERE
IsDone = 0
ORDER BY
CreatedDate ASC;
) x
No need to compute a set of IDs first. This is faster and usually has more desirable locking behavior.

Tested in SSMS, it works fine. You may need to do some modification accordingly.
--create table structure
create table #temp1 (
id int identity(1,1),
value int
)
go
--insert sample data
insert #temp1 values (1)
go 20
--below is solution
declare #qty int = 10
declare #cmd nvarchar(2000) =
N'update #temp1
set value= 100
output inserted.value
where id in
(
select top '+ cast(#qty as nvarchar(5)) +' id from #temp1
order by id
)';
execute sp_executesql #cmd

You can use ranking function (for example row_number).
update top (100) q
set IsDone = 1
output
inserted.Id,
inserted.Etc
from (
select *, row_number() over(order by CreatedDate asc, (select 0)) rn
from QueueTable) q
where rn <= 100

Generate a repetitive sequential number using SQL Server 2008

Can anyone help me to generate a repetitive sequential number using SQL Server 2008. Say I have a table of 1000 rows and a new field (int) added to the table. All I need is to auto fill that particular field with sequential numbers 1-100 all the way to the last row.
I have this but doesnt seem that it is working. You help is much appreciated.
DECLARE #id INT
SET #id = 0
while #id < 101
BEGIN
UPDATE Names SET id=#id
set #id=#id+1
END

USE tempdb
GO
DROP TABLE tableof1000rows
GO
CREATE TABLE tableof1000rows (id int identity(1,1), nb int, value varchar(128))
GO
INSERT INTO tableof1000rows (value)
SELECT TOP 1000 o1.name
FROM sys.objects o1
CROSS JOIN sys.objects o2
GO
UPDATE t1
SET nb = t2.nb
FROM tableof1000rows t1
JOIN (SELECT id, (ROW_NUMBER() OVER (ORDER BY id) % 100) + 1 as nb FROM tableof1000rows) t2 ON t1.id = t2.id
GO
SELECT *
FROM tableof1000rows

Use ROW_NUMBER to generate a number. Use modulo maths to get values from 1 to 100.
go
create table dbo.Demo1
(
DID int not null identity primary key,
RepetitiveSequentialNumber int not null
) ;
go
insert into dbo.Demo1 values ( 0 )
go 1000 -- This is just to get 1,000 rows into the table.
-- Get a sequential number.
select DID, row_number() over ( order by DID ) as RSN
into #RowNumbers
from dbo.Demo1 ;
-- Take the last two digits. This gives us values from 0 to 99.
update #RowNumbers
set RSN = RSN % 100 ;
-- Change the 0 values to 100.
update #RowNumbers
set RSN = case when RSN = 0 then 100 else RSN end ;
-- Update the main table.
update dbo.Demo1
set RepetitiveSequentialNumber = r.RSN
from dbo.Demo1 as d inner join #RowNumbers as r on r.DID = d.DID ;
select *
from dbo.Demo1 ;

Not pretty or elegant, but..
while exists(select * from tbl where new_col is null)
update top(1) tbl
set new_col=(select ISNULL(max(new_col),0)+1 from tbl WHERE new_col is null)

Your way isn't working because you are trying to set a value rather than insert one. I'm sure you have found a solution by now but if not then try this instead.
DECLARE #id INT
SET #id = 0
while #id < 101
BEGIN
INSERT Names select #id
set #id=#id+1
END

Do Inserted Records Always Receive Contiguous Identity Values

Consider the following SQL:
CREATE TABLE Foo
(
ID int IDENTITY(1,1),
Data nvarchar(max)
)
INSERT INTO Foo (Data)
SELECT TOP 1000 Data
FROM SomeOtherTable
WHERE SomeColumn = #SomeParameter
DECLARE #LastID int
SET #LastID = SCOPE_IDENTITY()
I would like to know if I can depend on the 1000 rows that I inserted into table Foo having contiguous identity values. In order words, if this SQL block produces a #LastID of 2000, can I know for certain that the ID of the first record I inserted was 1001? I am mainly curious about multiple statements inserting records into table Foo concurrently.
I know that I could add a serializable transaction around my insert statement to ensure the behavior that I want, but do I really need to? I'm worried that introducing a serializable transaction will degrade performance, but if SQL Server won't allow other statements to insert into table Foo while this statement is running, then I don't have to worry about it.

I disagree with the accepted answer. This can easily be tested and disproved by running the following.
Setup
USE tempdb
CREATE TABLE Foo
(
ID int IDENTITY(1,1),
Data nvarchar(max)
)
Connection 1
USE tempdb
SET NOCOUNT ON
WHILE NOT EXISTS(SELECT * FROM master..sysprocesses WHERE context_info = CAST('stop' AS VARBINARY(128) ))
BEGIN
INSERT INTO Foo (Data)
VALUES ('blah')
END
Connection 2
USE tempdb
SET NOCOUNT ON
SET CONTEXT_INFO 0x
DECLARE #Output TABLE(ID INT)
WHILE 1 = 1
BEGIN
/*Clear out table variable from previous loop*/
DELETE FROM #Output
/*Insert 1000 records*/
INSERT INTO Foo (Data)
OUTPUT inserted.ID INTO #Output
SELECT TOP 1000 NEWID()
FROM sys.all_columns
IF EXISTS(SELECT * FROM #Output HAVING MAX(ID) - MIN(ID) <> 999 )
BEGIN
/*Set Context Info so other connection inserting
a single record in a loop terminates itself*/
DECLARE #stop VARBINARY(128)
SET #stop = CAST('stop' AS VARBINARY(128))
SET CONTEXT_INFO #stop
/*Return results for inspection*/
SELECT ID, DENSE_RANK() OVER (ORDER BY Grp) AS ContigSection
FROM
(SELECT ID, ID - ROW_NUMBER() OVER (ORDER BY [ID]) AS Grp
FROM #Output) O
ORDER BY ID
RETURN
END
END

Yes, they will be contiguous because the INSERT is atomic: complete success or full rollback. It is also performed as a single unit of work: you wont get any "interleaving" with other processes
However (or to put your mind at rest!), consider the OUTPUT clause
DECLARE #KeyStore TABLE (ID int NOT NULL)
INSERT INTO Foo (Data)
OUTPUT INSERTED.ID INTO #KeyStore (ID) --this line
SELECT TOP 1000 Data
FROM SomeOtherTable
WHERE SomeColumn = #SomeParameter

If you want the Identity values for multiple rows use OUTPUT:
DECLARE #NewIDs table (PKColumn int)
INSERT INTO Foo (Data)
OUTPUT INSERTED.PKColumn
INTO #NewIDs
SELECT TOP 1000 Data
FROM SomeOtherTable
WHERE SomeColumn = #SomeParameter
you now have the entire set of values in the #NewIDs table. You can add any columns from the Foo table into the #NewIDs table and insert those columns as well.

It is not good practice to attach any sort of meaning whatsoever to identity values. You should assume that they are nothing more than integers guaranteed to be unique within the scope of your table.

Try adding the following:
option(maxdop 1)

SQL Server: UPDATE a table by using ORDER BY

I would like to know if there is a way to use an order by clause when updating a table. I am updating a table and setting a consecutive number, that's why the order of the update is important. Using the following sql statement, I was able to solve it without using a cursor:
DECLARE #Number INT = 0
UPDATE Test
SET #Number = Number = #Number +1
now what I'd like to to do is an order by clause like so:
DECLARE #Number INT = 0
UPDATE Test
SET #Number = Number = #Number +1
ORDER BY Test.Id DESC
I've read: How to update and order by using ms sql The solutions to this question do not solve the ordering problem - they just filter the items on which the update is applied.
Take care,
Martin

No.
Not a documented 100% supported way. There is an approach sometimes used for calculating running totals called "quirky update" that suggests that it might update in order of clustered index if certain conditions are met but as far as I know this relies completely on empirical observation rather than any guarantee.
But what version of SQL Server are you on? If SQL2005+ you might be able to do something with row_number and a CTE (You can update the CTE)
With cte As
(
SELECT id,Number,
ROW_NUMBER() OVER (ORDER BY id DESC) AS RN
FROM Test
)
UPDATE cte SET Number=RN

You can not use ORDER BY as part of the UPDATE statement (you can use in sub-selects that are part of the update).
UPDATE Test
SET Number = rowNumber
FROM Test
INNER JOIN
(SELECT ID, row_number() OVER (ORDER BY ID DESC) as rowNumber
FROM Test) drRowNumbers ON drRowNumbers.ID = Test.ID

Edit
Following solution could have problems with clustered indexes involved as mentioned here. Thanks to Martin for pointing this out.
The answer is kept to educate those (like me) who don't know all side-effects or ins and outs of SQL Server.
Expanding on the answer gaven by Quassnoi in your link, following works
DECLARE #Test TABLE (Number INTEGER, AText VARCHAR(2), ID INTEGER)
DECLARE #Number INT
INSERT INTO #Test VALUES (1, 'A', 1)
INSERT INTO #Test VALUES (2, 'B', 2)
INSERT INTO #Test VALUES (1, 'E', 5)
INSERT INTO #Test VALUES (3, 'C', 3)
INSERT INTO #Test VALUES (2, 'D', 4)
SET #Number = 0
;WITH q AS (
SELECT TOP 1000000 *
FROM #Test
ORDER BY
ID
)
UPDATE q
SET #Number = Number = #Number + 1

The row_number() function would be the best approach to this problem.
UPDATE T
SET T.Number = R.rowNum
FROM Test T
JOIN (
SELECT T2.id,row_number() over (order by T2.Id desc) rowNum from Test T2
) R on T.id=R.id

update based on Ordering by the order of values in a SQL IN() clause
Solution:
DECLARE #counter int
SET #counter = 0
;WITH q AS
(
select * from Products WHERE ID in (SELECT TOP (10) ID FROM Products WHERE ID IN( 3,2,1)
ORDER BY ID DESC)
)
update q set Display= #counter, #counter = #counter + 1
This updates based on descending 3,2,1
Hope helps someone.

I had a similar problem and solved it using ROW_NUMBER() in combination with the OVER keyword. The task was to retrospectively populate a new TicketNo (integer) field in a simple table based on the original CreatedDate, and grouped by ModuleId - so that ticket numbers started at 1 within each Module group and incremented by date. The table already had a TicketID primary key (a GUID).
Here's the SQL:
UPDATE Tickets SET TicketNo=T2.RowNo
FROM Tickets
INNER JOIN
(select TicketID, TicketNo,
ROW_NUMBER() OVER (PARTITION BY ModuleId ORDER BY DateCreated) AS RowNo from Tickets)
AS T2 ON T2.TicketID = Tickets.TicketID
Worked a treat!

I ran into the same problem and was able to resolve it in very powerful way that allows unlimited sorting possibilities.
I created a View using (saving) 2 sort orders (*explanation on how to do so below).
After that I simply applied the update queries to the View created and it worked great.
Here are the 2 queries I used on the view:
1st Query:
Update MyView
Set SortID=0
2nd Query:
DECLARE #sortID int
SET #sortID = 0
UPDATE MyView
SET #sortID = sortID = #sortID + 1
*To be able to save the sorting on the View I put TOP into the SELECT statement. This very useful workaround allows the View results to be returned sorted as set when the View was created when the View is opened. In my case it looked like:
(NOTE: Using this workaround will place an big load on the server if using a large table and it is therefore recommended to include as few fields as possible in the view if working with large tables)
SELECT TOP (600000)
dbo.Items.ID, dbo.Items.Code, dbo.Items.SortID, dbo.Supplier.Date,
dbo.Supplier.Code AS Expr1
FROM dbo.Items INNER JOIN
dbo.Supplier ON dbo.Items.SupplierCode = dbo.Supplier.Code
ORDER BY dbo.Supplier.Date, dbo.Items.ID DESC
Running: SQL Server 2005 on a Windows Server 2003
Additional Keywords: How to Update a SQL column with Ascending or Descending Numbers - Numeric Values / how to set order in SQL update statement / how to save order by in sql view / increment sql update / auto autoincrement sql update / create sql field with ascending numbers

SET #pos := 0;
UPDATE TABLE_NAME SET Roll_No = ( SELECT #pos := #pos + 1 ) ORDER BY First_Name ASC;
In the above example query simply update the student Roll_No column depending on the student Frist_Name column. From 1 to No_of_records in the table. I hope it's clear now.

IF OBJECT_ID('tempdb..#TAB') IS NOT NULL
BEGIN
DROP TABLE #TAB
END
CREATE TABLE #TAB(CH1 INT,CH2 INT,CH3 INT)
DECLARE #CH2 INT = NULL , #CH3 INT=NULL,#SPID INT=NULL,#SQL NVARCHAR(4000)='', #ParmDefinition NVARCHAR(50)= '',
#RET_MESSAGE AS VARCHAR(8000)='',#RET_ERROR INT=0
SET #ParmDefinition='#SPID INT,#CH2 INT OUTPUT,#CH3 INT OUTPUT'
SET #SQL='UPDATE T
SET CH1=#SPID,#CH2= T.CH2,#CH3= T.CH3
FROM #TAB T WITH(ROWLOCK)
INNER JOIN (
SELECT TOP(1) CH1,CH2,CH3
FROM
#TAB WITH(NOLOCK)
WHERE CH1 IS NULL
ORDER BY CH2 DESC) V ON T.CH2= V.CH2 AND T.CH3= V.CH3'
INSERT INTO #TAB
(CH2 ,CH3 )
SELECT 1,2 UNION ALL
SELECT 2,3 UNION ALL
SELECT 3,4
BEGIN TRY
WHILE EXISTS(SELECT TOP 1 1 FROM #TAB WHERE CH1 IS NULL)
BEGIN
EXECUTE #RET_ERROR = sp_executesql #SQL, #ParmDefinition,#SPID =##SPID, #CH2=#CH2 OUTPUT,#CH3=#CH3 OUTPUT;
SELECT * FROM #TAB
SELECT #CH2,#CH3
END
END TRY
BEGIN CATCH
SET #RET_ERROR=ERROR_NUMBER()
SET #RET_MESSAGE = '#ERROR_NUMBER : ' + CAST(ERROR_NUMBER() AS VARCHAR(255)) + '#ERROR_SEVERITY :' + CAST( ERROR_SEVERITY() AS VARCHAR(255))
+ '#ERROR_STATE :' + CAST(ERROR_STATE() AS VARCHAR(255)) + '#ERROR_LINE :' + CAST( ERROR_LINE() AS VARCHAR(255))
+ '#ERROR_MESSAGE :' + ERROR_MESSAGE() ;
SELECT #RET_ERROR,#RET_MESSAGE;
END CATCH

Develop Reference

c reactjs sql-server angularjs arrays wpf database batch-file google-app-engine silverlight

How to chunk updates to SQL Server? - sql-server

Related

How to fiind out the missing records (ID) from an indexed [order] table in sql

How to UPDATE TOP(n) with ORDER BY giving a predictable result?

Generate a repetitive sequential number using SQL Server 2008

Do Inserted Records Always Receive Contiguous Identity Values

SQL Server: UPDATE a table by using ORDER BY

Categories

Resources