Insert two columns from different tables - sql-server

I need to write an insert query to insert some rows into a table using data from different tables. I have:
A variable #ID which contains ID of the new inserted row in a table. (1)
A table (2) contains some IDs
A table (3) define the relation between the two above tables.
Now I need to insert for each ID in (2) a new row in the table (3).
So if #ID=2 and IDs = 1, 2, 3, 4, 5, 6, I want to insert the following rows in Table (3):
table1_ID table2_ID
--------- ---------
1 1
1 2
1 3
1 4
1 5

Assuming that the value 6 was not meant to be discarded (if it was, please explain the logic). Also assuming that you just want every row in table 2 inserted into table 2.
INSERT dbo.Table3(table1_ID, table2_ID)
SELECT #ID, ID
FROM dbo.Table2
-- WHERE ID <> 6???
;
If the list of IDs is really a CSV, then:
First create a Split function (several alternatives described here, except they output strings instead of integers), e.g.
CREATE FUNCTION dbo.SplitInts
(
#List VARCHAR(MAX),
#Delimiter VARCHAR(255)
)
RETURNS #t TABLE(Item INT)
AS
BEGIN
INSERT #t(Item) SELECT CONVERT(INT, SUBSTRING(#List, Number,
CHARINDEX(#Delimiter, #List + #Delimiter, Number) - Number))
FROM (SELECT ROW_NUMBER() OVER (ORDER BY [object_id])
FROM sys.all_objects) AS n(Number)
WHERE Number <= CONVERT(INT, LEN(#List))
AND SUBSTRING(#Delimiter + #List, Number, 1) = #Delimiter;
RETURN;
END
GO
Now your stored procedure can simply say:
CREATE PROCEDURE dbo.whatever
#ID INT,
#IDs VARCHAR(MAX)
AS
BEGIN
SET NOCOUNT ON;
INSERT dbo.Table3(table1_ID, table2_ID)
SELECT #ID, Item
FROM dbo.SplitInts(#IDs, ',');
END
GO
However if you are on SQL Server 2008 or above, and this list of comma-separated IDs is coming from your application (e.g. from a DataTable or other set), you could use a Table-Valued Parameter and avoid the need for splitting.
CREATE TYPE dbo.SetOfIntegers
( Number INT );
GO
CREATE PROCEDURE dbo.whatever
#ID INT,
#IDs dbo.SetOfIntegers READONLY
AS
BEGIN
SET NOCOUNT ON;
INSERT dbo.Table3(table1_ID, table2_ID)
SELECT #ID, Item
FROM #IDs;
END
GO
Then you have to change the C# code to pass your DataTable in as SqlDbType.Structured instead of passing your CSV in as a string. More info here:
http://www.sqlperformance.com/2012/08/t-sql-queries/splitting-strings-now-with-less-t-sql

Related

SQL Server: Fastest way to transform millions of rows of data from one table to multiple others

I have a staging table with more than 6 million rows of flattened data from a CSV file that I bulk inserted.
I need to take each rows, convert various column values from varchar to int/decimal/datetime, and input each row into numerous new database tables all with foreign key relationships, for now I'll simplify this to: (Parent, Child, OptionalChild) .
I don't need to read it row by row, as each single row contains the parent/child/optional child flattened data.
I am currently going through the records row by row using a SELECT TOP 1, then delete each row after its processed but this is taking hours obviously.
Would appreciate some faster / more efficient approaches.
DECLARE #Id UNIQUEIDENTIFIER;
DECLARE #Date DATETIME2;
DECLARE #Code VARCHAR(10);
DECLARE #Number INT;
DECLARE #OptionalChildCode VARCHAR(10);
DECLARE #OptionalChildNumber INT;
WHILE EXISTS(SELECT * FROM Records)
BEGIN
SELECT TOP 1
#Id = Id,
#Date = Date,
#Code = Code,
#Number = Number,
#OptionalChildCode = OptionalChildCode,
#OptionalChildNumber = OptionalChildNumber
FROM Records
DECLARE #OutputTable AS TABLE(Id UNIQUEIDENTIFIER, Name VARCHAR(10))
INSERT INTO (Parent) (ParentDate)
OUTPUT INSERTED.Id, 'ParentId' INTO #OutputTable(Id, Name)
VALUES (CONVERT(DATETIME2,#Date, 20))
INSERT INTO (Child)(ParentId, ChildCode, ChildNumber)
VALUES (
(SELECT ObjectId FROM #OutputTable WHERE Name = 'ParentId'),
#Code,
CONVERT(INT, #Number)
)
IF (#OptionalChildCode IS NULL)
BEGIN
INSERT INTO (Child)(ParentId, ChildCode, ChildNumber)
VALUES (
(SELECT ObjectId FROM #OutputTable WHERE Name = 'ParentId'),
#OptionalChildCode,
CONVERT(INT, #OptionalChildNumber)
)
END
DELETE FROM Records WHERE Id = #Id
END
Records table (all columns from CSV bulk import):
Columns: Id INT, Date VARCHAR(50), Code VARCHAR(50), Number VARCHAR(50), OptionalChildCode VARCHAR(50), OptionalChildNumber VARCHAR(50)
Target tables:
--Parent
Columns: (Id UNIQUEIDENTIFIER, ParentDate DATETIME2)
--Child
Columns: (Id UNIQUEIDENTIFIER, ParentId UNIQUEIDENTIFIER, ChildCode VARCHAR(10), ChildNumber INT)
Sample data (a row from Records table):
1, "2020-01-01-00-00", "Code123", "55", "OptionalCode456", "66"
Expected results:
--Row from Parent table:
111-222-333, 2020-01-01-00-00
--Row from Child table:
333-333-333, 111-222-333, "Code123", 55
--Row from Child table from optional child:
444-444-444, 111-222-333, "OptionalCode456", 66
The issue here is mainly that you need to get the inserted identity numbers matched against the original table, at the same time as inserting multiple child rows. You cannot use OUTPUT in an INSERT to output anything other than inserted columns.
We can start by using a hack involving MERGE to output the other columns.
We can then conditionally unpivot those rows in order to get one or two child rows to insert.
DECLARE #OutputTable AS TABLE(
Id UNIQUEIDENTIFIER,
Code VARCHAR(10),
Number INT,
OptionalChildCode VARCHAR(10),
OptionalChildNumber INT);
MERGE Parent p
USING Records r
ON 1 = 0 -- never match
WHEN NOT MATCHED THEN
INSERT (ParentDate)
VALUES (CONVERT(DATETIME2, r.[Date], 20))
OUTPUT inserted.Id, r.Code, CONVERT(INT, r.Number), OptionalChildCode, CONVERT(INT, r.OptionalChildNumber)
INTO #OutputTable (Id, Code, Number, OptionalChildCode, OptionalChildNumber)
;
INSERT INTO Child
(ParentId, ChildCode, ChildNumber)
SELECT t.Id, v.Code, v.Number
FROM #OutputTable t
CROSS APPLY (
SELECT t.Code, t.Number
UNION ALL
SELECT t.OptionalChildCode, t.OptionalChildNumber
WHERE OptionalChildCode IS NOT NULL
) v;

How to pass multiple input id into a stored procedure and get the ids that have all input ids?

My goal is that when the user chooses the product, the application will export the supplier that provides both products. However, if I use a stored procedure, the input must be a fixed value so I can't put a listId like C#.
So, how can I just put a list of Id into the store and I have produced supplier business id of the added productId. See my table below and InventoryOfSentoId is ProductId:
If input is 1 then output is supplierid 1 and 3
If input is 2, 3 then output is supplierid 3
If you are using SQL Server 2014, it is easy to create your own split function. The one I use is the following (but you can google for others that are more efficient, particularly if your list is long):
create FUNCTION [dbo].[fnSplit](
#sDelimiter VARCHAR(5) -- delimiter that separates items
, #sInputList VARCHAR(max) -- List of delimited items
) RETURNS #List TABLE (item VARCHAR(max))
BEGIN
DECLARE #sItem VARCHAR(max)
WHILE CHARINDEX(#sDelimiter,#sInputList,0) <> 0
BEGIN
SELECT
#sItem=RTRIM(LTRIM(SUBSTRING(#sInputList,1,CHARINDEX(#sDelimiter,#sInputList,0)-1))),
#sInputList=RTRIM(LTRIM(SUBSTRING(#sInputList,CHARINDEX(#sDelimiter,#sInputList,0)+LEN(#sDelimiter),LEN(#sInputList))))
IF LEN(#sItem) > 0
INSERT INTO #List SELECT #sItem
END
IF LEN(#sInputList) > 0
INSERT INTO #List SELECT #sInputList -- Put the last item in
RETURN
END
You will then be able to achieve what you want with:
declare #demo table (SupplierID int, InventoryOfSentoID int)
declare #list varchar(10) = '2,3'
declare #ids table (SentoID int)
declare #cnt int
INSERT INTO #demo VALUES (1,1),(1,2),(2,5),(3,3),(3,2),(3,1)
INSERT INTO #ids
SELECT * FROM dbo.fnSplit(',',#list)
SELECT #cnt = COUNT(*) FROM #ids
SELECT SupplierID FROM #demo d
INNER JOIN #ids i
ON i.SentoID = d.InventoryOfSentoID
GROUP BY SupplierID
HAVING COUNT(*) = #cnt
By way of explanation, note the use of the HAVING restriction. This is to ensure that only those suppliers who supply all the elements in the list are returned. If you did not have this restriction, the resultset would include supplierid 1, which from your question I presumed you wanted to exclude.

Adding constraints to list items in SQL Server database

I have table holding items for a given list id in my Ms Sql server database (2008R2).
I would like to add constraints so that no two list ids have same item list. Below illustrate my schema.
ListID , ItemID
1 a
1 b
2 a
3 a
3 b
In above example ListID 3 should fail. I guess you can't put constarint/check within the database itself (Triggers,check) and the logic constaint can only be done from the frontend?
Thanks in advance for any help.
Create a function that performs the logic you want and then create a check constraint or index that leverages that function.
Here is a functional example, the final insert fails. The function is evaluated row by row, so if you need to insert as a set and evaluate after, you'd need to do an "instead of" trigger:
CREATE TABLE dbo.Test(ListID INT, ItemID CHAR(1))
GO
CREATE FUNCTION dbo.TestConstraintPassed(#ListID INT, #ItemID CHAR(1))
RETURNS TINYINT
AS
BEGIN
DECLARE #retVal TINYINT = 0;
DECLARE #data TABLE (ListID INT, ItemID CHAR(1),[Match] INT)
INSERT INTO #data(ListID,ItemID,[Match]) SELECT ListID,ItemID,-1 AS [Match] FROM dbo.Test
UPDATE #data
SET [Match]=1
WHERE ItemID IN (SELECT ItemID FROM #data WHERE ListID=#ListID)
DECLARE #MatchCount INT
SELECT #MatchCount=SUM([Match]) FROM #data WHERE ListID=#ListID
IF NOT EXISTS(
SELECT *
FROM (
SELECT ListID,SUM([Match]) AS [MatchCount]
FROM #data
WHERE ListID<>#ListID
GROUP BY ListID
) dat
WHERE #MatchCount=[MatchCount]
)
BEGIN
SET #retVal=1;
END
RETURN #retVal;
END
GO
ALTER TABLE dbo.Test
ADD CONSTRAINT chkTest
CHECK (dbo.TestConstraintPassed(ListID, ItemID) = 1);
GO
INSERT INTO dbo.Test(ListID,ItemID) SELECT 1,'a'
INSERT INTO dbo.Test(ListID,ItemID) SELECT 1,'b'
INSERT INTO dbo.Test(ListID,ItemID) SELECT 2,'a'
INSERT INTO dbo.Test(ListID,ItemID) SELECT 2,'b'
Related

Count # of Rows in Stored Procedure Result, then Insert result into table

I have an SSIS package which will first run my sp_doSomething. This stored procedure will select data from several different tables and join them for possible storage into dbo.someTable. But I only want that IF the select is > 1 row of selected data.
I want to then have a precedence restraint that looks at the amount of rows my stored procedure returned.
If my row count > 1, then I want to take the results of the stored procedure and insert them into one of my tables.
Otherwise, I will record an error/send an email, or whatever.
I really don't want to run this stored procedure more then once, but that is the only way I could think to do it (Run it, count the rows. Then, run it again and insert the result).
I'm a complete TSQL/SSIS newb. So I'm sorry if this question is trivial.
I can't find a good answer anywhere.
Create a variable with Package Scope of type Int32 and name rowcount.
Data Flow
Control Flow
you can try this
declare #tableVar table(col1 varchar(100))
declare #Counter int
insert into #tableVar(col1) exec CompanyNames
set #Counter = (select count(*) from #tableVar)
insert into Anytable(col) Values (#counter)
Within the Stored Proc, write the results to a #Temp. Then Select Count(*) from the #Temp, into a variable.
Select #intRows = Count(*) from myTempResults
Then evaluate the value of #intRows.
If #intRows > 1 BEGIN
Insert Into dbo.SomeTable
Select * from #Temp
End
Will a #temp table work for you?
IF OBJECT_ID('tempdb..#Holder') IS NOT NULL
begin
drop table #Holder
end
CREATE TABLE #Holder
(ID INT )
declare #MyRowCount int
declare #MyTotalCount int = 0
/* simulate your insert, you would read from your real table(s) here */
INSERT INTO #HOLDER (ID)
select 1 union all select 2 union all select 3 union all select 4
Select #MyRowCount = ##ROWCOUNT, #MyTotalCount = #MyTotalCount + #MyRowCount
Select 'TheMagicValue1' = #MyRowCount, 'TheMagicTotal' = #MyTotalCount
INSERT INTO #HOLDER (ID)
select 5 union all select 6 union all select 7 union all select 8
/* you will note that I am NOT doing a count(*) here... which is another strain on the procedure */
Select #MyRowCount = ##ROWCOUNT, #MyTotalCount = #MyTotalCount + #MyRowCount
Select 'TheMagicValue1' = #MyRowCount, 'TheMagicTotal' = #MyTotalCount
/* Optional index if needed */
CREATE INDEX IDX_TempHolder_ID ON #Holder (ID)
/* CREATE CLUSTERED INDEX IDX_TempHolder_ID ON #Holder (ID) */
if #MyTotalCount > 0
BEGIN
Select 'Put your INSERT statement here'
END
/* this will return the data to the report */
Select ID from #HOLDER
IF OBJECT_ID('tempdb..#Holder') IS NOT NULL
begin
drop table #Holder
end

Do Inserted Records Always Receive Contiguous Identity Values

Consider the following SQL:
CREATE TABLE Foo
(
ID int IDENTITY(1,1),
Data nvarchar(max)
)
INSERT INTO Foo (Data)
SELECT TOP 1000 Data
FROM SomeOtherTable
WHERE SomeColumn = #SomeParameter
DECLARE #LastID int
SET #LastID = SCOPE_IDENTITY()
I would like to know if I can depend on the 1000 rows that I inserted into table Foo having contiguous identity values. In order words, if this SQL block produces a #LastID of 2000, can I know for certain that the ID of the first record I inserted was 1001? I am mainly curious about multiple statements inserting records into table Foo concurrently.
I know that I could add a serializable transaction around my insert statement to ensure the behavior that I want, but do I really need to? I'm worried that introducing a serializable transaction will degrade performance, but if SQL Server won't allow other statements to insert into table Foo while this statement is running, then I don't have to worry about it.
I disagree with the accepted answer. This can easily be tested and disproved by running the following.
Setup
USE tempdb
CREATE TABLE Foo
(
ID int IDENTITY(1,1),
Data nvarchar(max)
)
Connection 1
USE tempdb
SET NOCOUNT ON
WHILE NOT EXISTS(SELECT * FROM master..sysprocesses WHERE context_info = CAST('stop' AS VARBINARY(128) ))
BEGIN
INSERT INTO Foo (Data)
VALUES ('blah')
END
Connection 2
USE tempdb
SET NOCOUNT ON
SET CONTEXT_INFO 0x
DECLARE #Output TABLE(ID INT)
WHILE 1 = 1
BEGIN
/*Clear out table variable from previous loop*/
DELETE FROM #Output
/*Insert 1000 records*/
INSERT INTO Foo (Data)
OUTPUT inserted.ID INTO #Output
SELECT TOP 1000 NEWID()
FROM sys.all_columns
IF EXISTS(SELECT * FROM #Output HAVING MAX(ID) - MIN(ID) <> 999 )
BEGIN
/*Set Context Info so other connection inserting
a single record in a loop terminates itself*/
DECLARE #stop VARBINARY(128)
SET #stop = CAST('stop' AS VARBINARY(128))
SET CONTEXT_INFO #stop
/*Return results for inspection*/
SELECT ID, DENSE_RANK() OVER (ORDER BY Grp) AS ContigSection
FROM
(SELECT ID, ID - ROW_NUMBER() OVER (ORDER BY [ID]) AS Grp
FROM #Output) O
ORDER BY ID
RETURN
END
END
Yes, they will be contiguous because the INSERT is atomic: complete success or full rollback. It is also performed as a single unit of work: you wont get any "interleaving" with other processes
However (or to put your mind at rest!), consider the OUTPUT clause
DECLARE #KeyStore TABLE (ID int NOT NULL)
INSERT INTO Foo (Data)
OUTPUT INSERTED.ID INTO #KeyStore (ID) --this line
SELECT TOP 1000 Data
FROM SomeOtherTable
WHERE SomeColumn = #SomeParameter
If you want the Identity values for multiple rows use OUTPUT:
DECLARE #NewIDs table (PKColumn int)
INSERT INTO Foo (Data)
OUTPUT INSERTED.PKColumn
INTO #NewIDs
SELECT TOP 1000 Data
FROM SomeOtherTable
WHERE SomeColumn = #SomeParameter
you now have the entire set of values in the #NewIDs table. You can add any columns from the Foo table into the #NewIDs table and insert those columns as well.
It is not good practice to attach any sort of meaning whatsoever to identity values. You should assume that they are nothing more than integers guaranteed to be unique within the scope of your table.
Try adding the following:
option(maxdop 1)

Resources