MERGE in SQL Server - sql-server

MERGE in SQL Server - sql-server

I am trying to perform the following using a MERGE statement
I have a table that has two columns (TagId is an Identity (PK) and Name as a VARCHAR). I would like to check first if Name exists before I insert it. If it does exist, I would like to get the Identity value. Otherwise, I would insert it and pick up the inserted value.
The beauty of MERGE is it is transactional in nature. so, I won't have to worry about getting an UNIQUE index violation due to timing. Any suggestions? I prefer not to use transactions.
DECLARE
#TagId INT,
#Name VARCHAR(100) = 'TagName'
SELECT TOP(1)
#TagId = T.TagId
FROM dbo.Tag AS T
WHERE T.Name = #Name
IF #TagId IS NULL
BEGIN
INSERT dbo.Tag (Name) VALUES (#Name)
SELECT #TagId = SCOPE_IDENTITY()
END
After trying, this seems to work: it doesn't seem right. The MATCHED clause is required. Otherwise, #Table won't have value.
DECLARE #Table TABLE
(
TagId INT,
Name VARCHAR(100)
)
DECLARE
#TagId INT,
#Name VARCHAR(100) = 'TdagNamed122'
MERGE dbo.Tag AS Target
USING(SELECT #Name) AS Source (Name)
ON Source.Name = Target.Name
WHEN MATCHED THEN
UPDATE SET #TagId = Target.TagId
WHEN NOT MATCHED THEN
INSERT (Name) VALUES (Source.Name)
OUTPUT INSERTED.* INTO #Table
;
SELECT * FROM #Table

Yes as per the documentation , it should execute all statements in atomic fashion
I didn't face any concurrency issues
But There are some concerns as per the link
http://www.mssqltips.com/sqlservertip/3074/use-cauti

Related

Which approach is good to insert a record IF NOT EXISTS VS ##ROWCOUNT = 0

I need to insert a record into table if the record not exists.
I have below table
CREATE TABLE #EMP
(
EmpID INT IDENTITY(1, 1),
EmpName VARCHAR(100),
Designation VARCHAR(10)
)
INSERT INTO #EMP (EmpName, Designation)
VALUES ('Ramesh', 'Teamlead'),
('Suresh', 'Manager')
Now I want to insert a record by checking the record exists or not
I can one of these two approaches:
Approach #1:
DECLARE #EmpID INT
SELECT #EmpID = EmpID
FROM #EMP
WHERE EmpName = 'Ramesh' AND Designation = 'Teamlead'
IF ##ROWCOUNT = 0
INSERT INTO #EMP (EmpName, Designation)
VALUES ('Ramesh', 'Teamlead')
Approach #2:
IF NOT EXISTS (SELECT NULL
FROM #EMP
WHERE EmpName = 'Ramesh' AND Designation = 'Teamlead')
BEGIN
INSERT INTO #EMP (EmpName, Designation)
VALUES ('Ramesh', 'Teamlead')
END
Can anyone suggest which one is the better of the two?
Thank you

Your second approach is flawed, you check for 'Ramesh' but if they don't exist also INSERT 'Suresh', who might already exist.
I would personally suggest doing it all in one statement. Something like this:
INSERT INTO #EMP (EmpName, Designation)
SELECT EmpName,
Designation
FROM (VALUES('Ramesh','Teamlead'),
('Suresh','Manager'))V(EmpName, Designation)
WHERE NOT EXISTS (SELECT 1
FROM #EMP E
WHERE E.EmpName = V.EmpName
AND E.Designation = V.Designation);
As mentioned by #DanGuzman, this does pose a problem (as do yours) that it could suffer race conditions (though with a local temporary table this would not be an issue). One method would be to therefore change the isolation level to serializable prior to the statement:
SET TRANSACTION ISOLATION LEVEL SERIALIZABLE;
Alternatively, you could use the hint HOLDLOCK on the table in the NOT EXISTS, which would be by using WITH (HOLDLOCK).

How do I use update multiple rows in a stored procedure

I get batches of inventory items to update and I would like to eliminate calling the stored procedure multiple times and instead call it once with multiple values. I have done similar in oracle with the parameters as an array trick. I would like to do something similar for SQL Server.
I have a comma separated list of Sku
I have a comma separated list of Quantity.
I have a comma separated list of StoreIds.
The standard update is
Update Inventory
set quantity = #Quantity
where sku = #Sku and StoreId = #StoreId;
Table definition
CREATE TABLE Inventory
(
[Sku] NVARCHAR(50) NOT NULL,
[Quantity] DECIMAL NULL DEFAULT 0.0,
[StoreId] INT NOT NULL
}
My bad attempt at doing this
ALTER PROCEDURE UpdateList
(#Sku varchar(max),
#Quantity varchar(max),
#StoreId varchar(max))
AS
BEGIN
DECLARE #n int = 0;
DECLARE #skuTable TABLE = SELECT CONVERT(value) FROM STRING_SPLIT(#Sku, ',');
DECLARE #quantityTable = SELECT CONVERT(value) FROM STRING_SPLIT(#Quantity, ',');
DECLARE #StoreIdTable = SELECT CONVERT(value) FROM STRING_SPLIT(#StoreId , ',');
WHILE #n < #skuTable.Count
BEGIN
UPDATE inventoryItem
SET Quantity = #quantityTable
WHERE Sku = #skuTable AND StoreId = #StoreIdTable;
SELECT #n = #n + 1;
END
END
I am open to using temp tables as parameters instead of comma separated. This is being called from an Entity Framework 6 context object from the front end system.

It's a bad practice to pass tabular values in this way.
Best solution is to pass it as a "user defined table type", if possible,
otherwise, it's better to get JSON/XML parameter
and then you can update your table like this:
--[ Parameters ]--
DECLARE #json AS NVARCHAR(MAX) = '[{"Sku":"A","Quantity":1.4,"StoreId":1},{"Sku":"B","Quantity":2.5,"StoreId":2},{"Sku":"C","Quantity":3.6,"StoreId":3}]';
--[ Bulk Update ]--
UPDATE inventoryItem
SET Quantity = I.Quantity
FROM inventoryItem AS T
JOIN OPENJSON(#json) WITH (Sku NVARCHAR(50), Quantity DECIMAL(5,1),StoreId INT) AS I
ON I.Sku = T.Sku
AND I.StoreId = T.StoreId

It's a bad practice to pass tabular values as varchar columns parameters,
but if you still want to go this way, here is a working code:
--[ Parameters ]--
DECLARE #Sku VARCHAR(max) = 'A,B,C',
#Quantity VARCHAR(max) = '1.4,2.5,3.6',
#StoreId VARCHAR(max) = '1,2,3'
--[ Converting VARCHAR Parameters to Table #Inventory ]--
DROP TABLE IF EXISTS #Sku
SELECT IDENTITY(int, 1,1) AS RowNum,
T.value
INTO #Sku
FROM STRING_SPLIT(#Sku, ',') AS T
DROP TABLE IF EXISTS #Quantity
SELECT IDENTITY(int, 1,1) AS RowNum,
T.value
INTO #Quantity
FROM STRING_SPLIT(#Quantity, ',') AS T
DROP TABLE IF EXISTS #StoreId
SELECT IDENTITY(int, 1,1) AS RowNum,
T.value
INTO #StoreId
FROM STRING_SPLIT(#StoreId, ',') AS T
DROP TABLE IF EXISTS #Inventory
SELECT Sku.value AS Sku,
Quantity.value AS Quantity,
StoreId.value AS StoreId
INTO #Inventory
FROM #Sku AS Sku
JOIN #Quantity AS Quantity ON Quantity.RowNum = Sku.RowNum
JOIN #StoreId AS StoreId ON StoreId.RowNum = Sku.RowNum
--[ Bulk Update ]--
UPDATE inventoryItem
SET Quantity = I.Quantity
FROM inventoryItem AS T
JOIN #Inventory AS I
ON I.Sku = T.Sku
AND I.StoreId = T.StoreId

The above answers are correct for updates and answered my question. But I wanted to add the insert here as I am sure someone will be looking for both. Maybe I will come back an make a new question and answer it myself.
I think the JSON version is best for my issue because I am doing entity framework and serializing an object to JSON is a trivial task. The basic process is to create an inline temp table from the json string. Calling out the objects via a simple dot notation string. I would suggest making the object passed in as simple as possible and preferably one level of properties.
create or alter Procedure bulkInventoryInsert( #json AS NVARCHAR(MAX))
AS
BEGIN
INSERT into inventory
SELECT Sku, Quantity, StoreId FROM
OPENJSON(#json)
WITH(Sku varchar(200) '$.Sku',
Quantity decimal(5,1) '$.Quantity',
StoreId INT '$.StoreId');
END
DECLARE #json AS NVARCHAR(MAX) = '[{"Sku":"A","Quantity":1.4,"StoreId":2},{"Sku":"B","Quantity":2.5,"StoreId":3},{"Sku":"C","Quantity":3.6,"StoreId":2}]';
EXECUTE bulkInventoryInsert #json;
The key part to recognize is this section here:
SELECT Sku, Quantity, StoreId FROM
OPENJSON(#json)
WITH(Sku varchar(200) '$.Sku',
Quantity decimal(5,1) '$.Quantity',
StoreId INT '$.StoreId');
This is creating a temp table with columns that match the table that it will be inserted into. The "WITH" portion specifies the column name, type, and where in the Json string to get the value.
I hope this will help. Maybe when I get time I will do a question and answer for this.

Insert random text into columns from a reference table variable

I have a table ABSENCE that has 40 employee ids and need to add two columns from a table variable, which acts as a reference table. For each emp id, I need to randomly assign the values from the table variable. Here's the code I tried without randomizing:
USE TSQL2012;
GO
DECLARE #MAX SMALLINT;
DECLARE #MIN SMALLINT;
DECLARE #RECODE SMALLINT;
DECLARE #RE CHAR(100);
DECLARE #rearray table (recode smallint,re char(100));
insert into #rearray values (100,'HIT BY BEER TRUCK')
,(200,'BAD HAIR DAY')
,(300,'ASPIRIN OVERDOSE')
,(400,'MAKEUP DISASTER')
,(500,'GOT LOCKED IN THE SALOON')
DECLARE #REFCURSOR AS CURSOR;
SET #REFCURSOR = CURSOR FOR
SELECT RECODE,RE FROM #REARRAY;
OPEN #REFCURSOR;
SET #MAX = (SELECT DISTINCT ##ROWCOUNT FROM ABSENCE);
SET #MIN = 0;
ALTER TABLE ABSENCE ADD CODE SMALLINT, REASONING CHAR(100);
WHILE (#MIN <= #MAX)
BEGIN
FETCH NEXT FROM #REFCURSOR INTO #RECODE,#RE;
INSERT INTO ABSENCE (CODE, REASONING) VALUES (#RECODE,#RE);
SET #MIN+=1;
END
CLOSE #REFCURSOR
DEALLOCATE #REFCURSOR
SELECT EMPID,CODE,REASONING FROM ABSENCE
Though am inserting into two columns only, it is attempting to insert into empid (which has already been filled) and as it cannot be NULL, the insertion fails.
Also, how to randomize the values from the REARRAY table variable to insert them into the ABSENCE table?

Since this is a small dataset, one approach might be to use CROSS APPLY with a SELECT TOP(1) ... FROM #rearray ORDER BY NEWID() approach. This will essentially join your ABSENCE table with your reference table in an UPDATE statement, selecting a random row each time in the join. In full, it would look like:
UPDATE ABSENCE
SET col1 = x1.recode, col2 = x2.recode
FROM ABSENCE a
CROSS APPLY (SELECT TOP(1) * FROM #rearray ORDER BY NEWID()) x1(recode, re)
CROSS APPLY (SELECT TOP(1) * FROM #rearray ORDER BY NEWID()) x2(recode, re)

INSERT INTO View, INSTEAD OF Trigger, Identity, multiple tables?

Here as my tables (Entier = Integer // Caractère long variable = Varchar) :
http://i.stack.imgur.com/lNjyy.jpg
I created a view V_Enterprise(idContact, phoneNumber, email, name, city, adress)
I tried to create a Trigger on that View to allow users to update the view :
CREATE TRIGGER test
ON V_Entreprise
INSTEAD OF INSERT
AS
DECLARE #T_ContactId INT
BEGIN
INSERT INTO T_Contact
SELECT i.phoneNumber, i.email
FROM Inserted i
SELECT #T_ContactId = ##IDENTITY
INSERT INTO T_Entreprise
SELECT #T_ContactId, i.name, i.city, i.adress
FROM Inserted i
END ;
As I expected, it work on simple inserts, but when I add couples of rows at once, it fails because #T_ContactId only contains the first id. Can someone help me to fix it ? I feel like I should use INNER JOIN inserts but I can't figure out how to deal with it.

OK you should never set scalar variables to a value in inserted or delted in a trigger.
Use the OUTPUT clause instead to get your id values back.

This trigger uses a loop over a cursor and won't require any particular uniqueness in the tables;
CREATE TRIGGER test
ON V_Enterprise
INSTEAD OF INSERT
AS
BEGIN
DECLARE #name VARCHAR(32)
DECLARE #city VARCHAR(32)
DECLARE #address VARCHAR(32)
DECLARE #pn VARCHAR(32)
DECLARE #email VARCHAR(32)
DECLARE cursor1 CURSOR FOR
SELECT name,city,address,phoneNumber,email FROM inserted;
OPEN cursor1;
FETCH NEXT FROM cursor1 INTO #name, #city, #address, #pn, #email;
WHILE ##FETCH_STATUS = 0
BEGIN
INSERT INTO T_Contact (phoneNumber,email) VALUES (#pn, #email);
INSERT INTO T_Enterprise (idcontact,name,city,address) VALUES
(##IDENTITY,#name,#city,#address);
FETCH NEXT FROM cursor1 INTO #name, #city, #address, #pn, #email;
END
CLOSE cursor1;
DEALLOCATE cursor1;
END
GO

I don't know if this is a good way to do it, but you can do this without relying on unique columns or using a cursor using the OUTPUT clause for INSERT. This approach does make use of an in-memory temporary table that could get big with large inserts.
DECLARE #Table table( NewID BIGINT);
INSERT INTO T_Contact (PhoneNumber)
OUTPUT Inserted.ID
INTO #Table
SELECT PhoneNumber FROM inserted WHERE
;
INSERT INTO T_Enterprise (Contact_ID)
SELECT NewID FROM #Table;

If phoneNumber and email are a unique key in T_Contact then you could do this:
CREATE TRIGGER test
ON V_Entreprise
INSTEAD OF INSERT
AS
DECLARE #T_ContactId INT
BEGIN
INSERT INTO T_Contact
SELECT i.phoneNumber, i.email
FROM Inserted i
SELECT #T_ContactId = ##IDENTITY
INSERT INTO T_Entreprise
SELECT
(SELECT idContact FROM T_Contact
WHERE phoneNumber = i.phoneNumber AND email = i.email),
i.name, i.city, i.adress
FROM Inserted i
END ;

Using merge..output to get mapping between source.id and target.id

Very simplified, I have two tables Source and Target.
declare #Source table (SourceID int identity(1,2), SourceName varchar(50))
declare #Target table (TargetID int identity(2,2), TargetName varchar(50))
insert into #Source values ('Row 1'), ('Row 2')
I would like to move all rows from #Source to #Target and know the TargetID for each SourceID because there are also the tables SourceChild and TargetChild that needs to be copied as well and I need to add the new TargetID into TargetChild.TargetID FK column.
There are a couple of solutions to this.
Use a while loop or cursors to insert one row (RBAR) to Target at a time and use scope_identity() to fill the FK of TargetChild.
Add a temp column to #Target and insert SourceID. You can then join that column to fetch the TargetID for the FK in TargetChild.
SET IDENTITY_INSERT OFF for #Target and handle assigning new values yourself. You get a range that you then use in TargetChild.TargetID.
I'm not all that fond of any of them. The one I used so far is cursors.
What I would really like to do is to use the output clause of the insert statement.
insert into #Target(TargetName)
output inserted.TargetID, S.SourceID
select SourceName
from #Source as S
But it is not possible
The multi-part identifier "S.SourceID" could not be bound.
But it is possible with a merge.
merge #Target as T
using #Source as S
on 0=1
when not matched then
insert (TargetName) values (SourceName)
output inserted.TargetID, S.SourceID;
Result
TargetID SourceID
----------- -----------
2 1
4 3
I want to know if you have used this? If you have any thoughts about the solution or see any problems with it? It works fine in simple scenarios but perhaps something ugly could happen when the query plan get really complicated due to a complicated source query. Worst scenario would be that the TargetID/SourceID pairs actually isn't a match.
MSDN has this to say about the from_table_name of the output clause.
Is a column prefix that specifies a table included in the FROM clause of a DELETE, UPDATE, or MERGE statement that is used to specify the rows to update or delete.
For some reason they don't say "rows to insert, update or delete" only "rows to update or delete".
Any thoughts are welcome and totally different solutions to the original problem is much appreciated.

In my opinion this is a great use of MERGE and output. I've used in several scenarios and haven't experienced any oddities to date.
For example, here is test setup that clones a Folder and all Files (identity) within it into a newly created Folder (guid).
DECLARE #FolderIndex TABLE (FolderId UNIQUEIDENTIFIER PRIMARY KEY, FolderName varchar(25));
INSERT INTO #FolderIndex
(FolderId, FolderName)
VALUES(newid(), 'OriginalFolder');
DECLARE #FileIndex TABLE (FileId int identity(1,1) PRIMARY KEY, FileName varchar(10));
INSERT INTO #FileIndex
(FileName)
VALUES('test.txt');
DECLARE #FileFolder TABLE (FolderId UNIQUEIDENTIFIER, FileId int, PRIMARY KEY(FolderId, FileId));
INSERT INTO #FileFolder
(FolderId, FileId)
SELECT FolderId,
FileId
FROM #FolderIndex
CROSS JOIN #FileIndex; -- just to illustrate
DECLARE #sFolder TABLE (FromFolderId UNIQUEIDENTIFIER, ToFolderId UNIQUEIDENTIFIER);
DECLARE #sFile TABLE (FromFileId int, ToFileId int);
-- copy Folder Structure
MERGE #FolderIndex fi
USING ( SELECT 1 [Dummy],
FolderId,
FolderName
FROM #FolderIndex [fi]
WHERE FolderName = 'OriginalFolder'
) d ON d.Dummy = 0
WHEN NOT MATCHED
THEN INSERT
(FolderId, FolderName)
VALUES (newid(), 'copy_'+FolderName)
OUTPUT d.FolderId,
INSERTED.FolderId
INTO #sFolder (FromFolderId, toFolderId);
-- copy File structure
MERGE #FileIndex fi
USING ( SELECT 1 [Dummy],
fi.FileId,
fi.[FileName]
FROM #FileIndex fi
INNER
JOIN #FileFolder fm ON
fi.FileId = fm.FileId
INNER
JOIN #FolderIndex fo ON
fm.FolderId = fo.FolderId
WHERE fo.FolderName = 'OriginalFolder'
) d ON d.Dummy = 0
WHEN NOT MATCHED
THEN INSERT ([FileName])
VALUES ([FileName])
OUTPUT d.FileId,
INSERTED.FileId
INTO #sFile (FromFileId, toFileId);
-- link new files to Folders
INSERT INTO #FileFolder (FileId, FolderId)
SELECT sfi.toFileId, sfo.toFolderId
FROM #FileFolder fm
INNER
JOIN #sFile sfi ON
fm.FileId = sfi.FromFileId
INNER
JOIN #sFolder sfo ON
fm.FolderId = sfo.FromFolderId
-- return
SELECT *
FROM #FileIndex fi
JOIN #FileFolder ff ON
fi.FileId = ff.FileId
JOIN #FolderIndex fo ON
ff.FolderId = fo.FolderId

I would like to add another example to add to #Nathan's example, as I found it somewhat confusing.
Mine uses real tables for the most part, and not temp tables.
I also got my inspiration from here: another example
-- Copy the FormSectionInstance
DECLARE #FormSectionInstanceTable TABLE(OldFormSectionInstanceId INT, NewFormSectionInstanceId INT)
;MERGE INTO [dbo].[FormSectionInstance]
USING
(
SELECT
fsi.FormSectionInstanceId [OldFormSectionInstanceId]
, #NewFormHeaderId [NewFormHeaderId]
, fsi.FormSectionId
, fsi.IsClone
, #UserId [NewCreatedByUserId]
, GETDATE() NewCreatedDate
, #UserId [NewUpdatedByUserId]
, GETDATE() NewUpdatedDate
FROM [dbo].[FormSectionInstance] fsi
WHERE fsi.[FormHeaderId] = #FormHeaderId
) tblSource ON 1=0 -- use always false condition
WHEN NOT MATCHED
THEN INSERT
( [FormHeaderId], FormSectionId, IsClone, CreatedByUserId, CreatedDate, UpdatedByUserId, UpdatedDate)
VALUES( [NewFormHeaderId], FormSectionId, IsClone, NewCreatedByUserId, NewCreatedDate, NewUpdatedByUserId, NewUpdatedDate)
OUTPUT tblSource.[OldFormSectionInstanceId], INSERTED.FormSectionInstanceId
INTO #FormSectionInstanceTable(OldFormSectionInstanceId, NewFormSectionInstanceId);
-- Copy the FormDetail
INSERT INTO [dbo].[FormDetail]
(FormHeaderId, FormFieldId, FormSectionInstanceId, IsOther, Value, CreatedByUserId, CreatedDate, UpdatedByUserId, UpdatedDate)
SELECT
#NewFormHeaderId, FormFieldId, fsit.NewFormSectionInstanceId, IsOther, Value, #UserId, CreatedDate, #UserId, UpdatedDate
FROM [dbo].[FormDetail] fd
INNER JOIN #FormSectionInstanceTable fsit ON fsit.OldFormSectionInstanceId = fd.FormSectionInstanceId
WHERE [FormHeaderId] = #FormHeaderId

Here's a solution that doesn't use MERGE (which I've had problems with many times I try to avoid if possible). It relies on two memory tables (you could use temp tables if you want) with IDENTITY columns that get matched, and importantly, using ORDER BY when doing the INSERT, and WHERE conditions that match between the two INSERTs... the first one holds the source IDs and the second one holds the target IDs.
-- Setup... We have a table that we need to know the old IDs and new IDs after copying.
-- We want to copy all of DocID=1
DECLARE #newDocID int = 99;
DECLARE #tbl table (RuleID int PRIMARY KEY NOT NULL IDENTITY(1, 1), DocID int, Val varchar(100));
INSERT INTO #tbl (DocID, Val) VALUES (1, 'RuleA-2'), (1, 'RuleA-1'), (2, 'RuleB-1'), (2, 'RuleB-2'), (3, 'RuleC-1'), (1, 'RuleA-3')
-- Create a break in IDENTITY values.. just to simulate more realistic data
INSERT INTO #tbl (Val) VALUES ('DeleteMe'), ('DeleteMe');
DELETE FROM #tbl WHERE Val = 'DeleteMe';
INSERT INTO #tbl (DocID, Val) VALUES (6, 'RuleE'), (7, 'RuleF');
SELECT * FROM #tbl t;
-- Declare TWO temp tables each with an IDENTITY - one will hold the RuleID of the items we are copying, other will hold the RuleID that we create
DECLARE #input table (RID int IDENTITY(1, 1), SourceRuleID int NOT NULL, Val varchar(100));
DECLARE #output table (RID int IDENTITY(1,1), TargetRuleID int NOT NULL, Val varchar(100));
-- Capture the IDs of the rows we will be copying by inserting them into the #input table
-- Important - we must specify the sort order - best thing is to use the IDENTITY of the source table (t.RuleID) that we are copying
INSERT INTO #input (SourceRuleID, Val) SELECT t.RuleID, t.Val FROM #tbl t WHERE t.DocID = 1 ORDER BY t.RuleID;
-- Copy the rows, and use the OUTPUT clause to capture the IDs of the inserted rows.
-- Important - we must use the same WHERE and ORDER BY clauses as above
INSERT INTO #tbl (DocID, Val)
OUTPUT Inserted.RuleID, Inserted.Val INTO #output(TargetRuleID, Val)
SELECT #newDocID, t.Val FROM #tbl t
WHERE t.DocID = 1
ORDER BY t.RuleID;
-- Now #input and #output should have the same # of rows, and the order of both inserts was the same, so the IDENTITY columns (RID) can be matched
-- Use this as the map from old-to-new when you are copying sub-table rows
-- Technically, #input and #output don't even need the 'Val' columns, just RID and RuleID - they were included here to prove that the rules matched
SELECT i.*, o.* FROM #output o
INNER JOIN #input i ON i.RID = o.RID
-- Confirm the matching worked
SELECT * FROM #tbl t

Develop Reference

c reactjs sql-server angularjs arrays wpf database batch-file google-app-engine silverlight