How to handle T-SQL missing columns when using OPENJSON function? - sql-server

Consider the three T-SQL scenarios below:
-- scenario 1: SSN is specified with a value
DECLARE #Json_Array nvarchar(max) = '[{"Employee_ID":123,"Employee_Name":"John Doe","SSN":123456789}]';
SELECT *
FROM OPENJSON(#Json_Array)
WITH (Employee_ID int, Employee_Name nvarchar(60), SSN int)
-- scenario 2: SSN is specified with null for the value
SET #Json_Array = '[{"Employee_ID":123,"Employee_Name":"John Doe","SSN":null}]';
SELECT *
FROM OPENJSON(#Json_Array)
WITH (Employee_ID int, Employee_Name nvarchar(60), SSN int)
-- scenario 3: SSN is not specified
SET #Json_Array = '[{"Employee_ID":123,"Employee_Name":"John Doe"}]';
SELECT *
FROM OPENJSON(#Json_Array)
WITH (Employee_ID int, Employee_Name nvarchar(60), SSN int)
Both scenario 2 and 3 return a NULL value for SSN.
How can I determine if SSN was not included in the #Json_Array? The reason I need to do this is to avoid updating the employee's SSN to NULL if SSN is not included in the JSON.

The answer is to select [value] from OPENJSON(#json_item) where [key]='Column_Name'. Below is a full example.
-- create table of original data that will be modified
DECLARE #Json_Array_Original nvarchar(max) = '[
{"Employee_ID":123,"Employee_Name":"John Doe","SSN":123456789},
{"Employee_ID":124,"Employee_Name":"John Smith","SSN":343434343},
{"Employee_ID":125,"Employee_Name":"Jim Doe","SSN":515151515}
]';
declare #Original_Data table (Employee_ID int, Employee_Name nvarchar(20), SSN int);
insert into #Original_Data
select * from openjson(#Json_Array_Original)
with (Employee_ID int, Employee_Name nvarchar(20), SSN int)
select * from #Original_Data
-- create table of data changes that will be applied
DECLARE #Json_Array_Changes nvarchar(max) = '[
{"Employee_ID":123,"Employee_Name":"Jon Doe","SSN":null},
{"Employee_ID":124,"Employee_Name":"John Smythe"},
{"Employee_ID":125,"SSN":555151515}
]';
declare #Data_Changes table (Employee_ID int, Employee_Name nvarchar(20), SSN int);
insert into #Data_Changes
select * from openjson(#Json_Array_Changes)
with (Employee_ID int, Employee_Name nvarchar(20), SSN int)
select * from #Data_Changes
-- create table containing json records of data changes
declare #Json_Items table ([key] int, [value] nvarchar(max), type int);
insert into #Json_Items
select * from openjson(#Json_Array_Changes)
-- create table that specifies with a 1 the columns (properties) of each json record that have new values provided
declare #Json_Keys table (Employee_ID int, Employee_Name bit, SSN bit);
insert into #Json_Keys
select
json_value([value], '$.Employee_ID'),
(select 1 from openjson([value]) where [key]='Employee_Name'),
(select 1 from openjson([value]) where [key]='SSN')
from #Json_Items
select * from #Json_Keys
-- apply new values only for columns (properties) that have values specified
update od
set
od.Employee_Name = iif(jk.Employee_Name = 1, dc.Employee_Name, od.Employee_Name),
od.SSN = iif(jk.SSN = 1, dc.SSN, od.SSN)
from #Original_Data od
inner join #Data_Changes dc on od.Employee_ID = dc.Employee_ID
inner join #Json_Keys jk on dc.Employee_ID = jk.Employee_ID
select * from #Original_Data

Related

SQL Server: Fastest way to transform millions of rows of data from one table to multiple others

I have a staging table with more than 6 million rows of flattened data from a CSV file that I bulk inserted.
I need to take each rows, convert various column values from varchar to int/decimal/datetime, and input each row into numerous new database tables all with foreign key relationships, for now I'll simplify this to: (Parent, Child, OptionalChild) .
I don't need to read it row by row, as each single row contains the parent/child/optional child flattened data.
I am currently going through the records row by row using a SELECT TOP 1, then delete each row after its processed but this is taking hours obviously.
Would appreciate some faster / more efficient approaches.
DECLARE #Id UNIQUEIDENTIFIER;
DECLARE #Date DATETIME2;
DECLARE #Code VARCHAR(10);
DECLARE #Number INT;
DECLARE #OptionalChildCode VARCHAR(10);
DECLARE #OptionalChildNumber INT;
WHILE EXISTS(SELECT * FROM Records)
BEGIN
SELECT TOP 1
#Id = Id,
#Date = Date,
#Code = Code,
#Number = Number,
#OptionalChildCode = OptionalChildCode,
#OptionalChildNumber = OptionalChildNumber
FROM Records
DECLARE #OutputTable AS TABLE(Id UNIQUEIDENTIFIER, Name VARCHAR(10))
INSERT INTO (Parent) (ParentDate)
OUTPUT INSERTED.Id, 'ParentId' INTO #OutputTable(Id, Name)
VALUES (CONVERT(DATETIME2,#Date, 20))
INSERT INTO (Child)(ParentId, ChildCode, ChildNumber)
VALUES (
(SELECT ObjectId FROM #OutputTable WHERE Name = 'ParentId'),
#Code,
CONVERT(INT, #Number)
)
IF (#OptionalChildCode IS NULL)
BEGIN
INSERT INTO (Child)(ParentId, ChildCode, ChildNumber)
VALUES (
(SELECT ObjectId FROM #OutputTable WHERE Name = 'ParentId'),
#OptionalChildCode,
CONVERT(INT, #OptionalChildNumber)
)
END
DELETE FROM Records WHERE Id = #Id
END
Records table (all columns from CSV bulk import):
Columns: Id INT, Date VARCHAR(50), Code VARCHAR(50), Number VARCHAR(50), OptionalChildCode VARCHAR(50), OptionalChildNumber VARCHAR(50)
Target tables:
--Parent
Columns: (Id UNIQUEIDENTIFIER, ParentDate DATETIME2)
--Child
Columns: (Id UNIQUEIDENTIFIER, ParentId UNIQUEIDENTIFIER, ChildCode VARCHAR(10), ChildNumber INT)
Sample data (a row from Records table):
1, "2020-01-01-00-00", "Code123", "55", "OptionalCode456", "66"
Expected results:
--Row from Parent table:
111-222-333, 2020-01-01-00-00
--Row from Child table:
333-333-333, 111-222-333, "Code123", 55
--Row from Child table from optional child:
444-444-444, 111-222-333, "OptionalCode456", 66
The issue here is mainly that you need to get the inserted identity numbers matched against the original table, at the same time as inserting multiple child rows. You cannot use OUTPUT in an INSERT to output anything other than inserted columns.
We can start by using a hack involving MERGE to output the other columns.
We can then conditionally unpivot those rows in order to get one or two child rows to insert.
DECLARE #OutputTable AS TABLE(
Id UNIQUEIDENTIFIER,
Code VARCHAR(10),
Number INT,
OptionalChildCode VARCHAR(10),
OptionalChildNumber INT);
MERGE Parent p
USING Records r
ON 1 = 0 -- never match
WHEN NOT MATCHED THEN
INSERT (ParentDate)
VALUES (CONVERT(DATETIME2, r.[Date], 20))
OUTPUT inserted.Id, r.Code, CONVERT(INT, r.Number), OptionalChildCode, CONVERT(INT, r.OptionalChildNumber)
INTO #OutputTable (Id, Code, Number, OptionalChildCode, OptionalChildNumber)
;
INSERT INTO Child
(ParentId, ChildCode, ChildNumber)
SELECT t.Id, v.Code, v.Number
FROM #OutputTable t
CROSS APPLY (
SELECT t.Code, t.Number
UNION ALL
SELECT t.OptionalChildCode, t.OptionalChildNumber
WHERE OptionalChildCode IS NOT NULL
) v;

SQL Query to Append Past Order with Most Recent Order

I have an order table that has both past membership data and current data. I want to view this data in single row. I have a temp table for past data, but not exactly sure how to write this query to get current data in the same row. I know it has something to do with the MAX(order no). Here is the query to get the past membership data in a temp table
set transaction isolation level read uncommitted
declare
#ship_master_customer_id varchar (10), #cycle_begin_date datetime, #cycle_end_date datetime, #OrderNo varchar(10), #Description Char(100)
create table #t1(ShipMasterCustomerID varchar(10), OrderNo varchar (10), cycle_begin_date datetime, cycle_end_date datetime, Description Char(100))
Insert into #t1
Select SHIP_MASTER_CUSTOMER_ID, ORDER_NO, CYCLE_BEGIN_DATE,CYCLE_END_DATE, DESCRIPTION FROM [ORDER_DETAIL]
where SHIP_MASTER_CUSTOMER_ID = '11115555' and
CYCLE_END_DATE = '2/29/2016'
Select * from #t1
Drop table #t1
Here is my script.
declare
#ship_master_customer_id varchar (10), #cycle_begin_date datetime, #cycle_end_date datetime, #OrderNo varchar(10), #Description Char(100)
create table #t2(ShipMasterCustomerID varchar(10), OrderNo varchar (10), cycle_begin_date datetime, cycle_end_date datetime, Description Char(100))
Insert into #t2 (shipmastercustomerid, orderno, cycle_begin_date, cycle_end_date, DESCRIPTION)
VALUES (1111555,9004731815, 2015/01/01, 2015/31/12,'Annual Mem'),
(1111555, 9005148308, 2016/01/01, 2016/31/12,'Annual Mem'),
(1111222, 9005027152, 2015/01/03, 2016/29/02,'Annual Mem'),
(1111222, 9005440369, 2016/01/03, 2017/31/03,'Annual Mem'),
(2223333, 9005027152, 2014/01/01, 2016/31/12,'Annual Mem'),
(2223333, 9005442116, 2016/01/01, 2017/31/12,'Annual Mem')
Select * from #t2
Drop table #t2
Sample Data
You don't need a temp table. You can query the same table twice, giving it an alias and then use the alias to prefix your column names. Since you didn't give us a complete schema or a fiddle I'm simulating your database with a temp table but the essence is here. There are considerations that you didn't mention, though. Are you guaranteed that every customer will have both a historical AND a current record? If not, they will not appear in the query below because of the INNER JOIN. You could change it to an OUTER join but when customers don't have a new record you will see NULL values in those columns. My point is that here be dragons... this is by no means a complete or bulletproof solution, only a nudge in the right direction.
DECLARE #ORDER_DETAIL AS TABLE(
ShipMasterCustomerId varchar(20),
OrderNo varchar(20),
cycle_begin_date date,
cycle_end_date date,
Description varchar(100)
)
INSERT #ORDER_DETAIL SELECT '11115555', '9005337015', '02/26/15', '2/29/16', 'Membership 26-Feb-2015 to 29-Feb-2016'
INSERT #ORDER_DETAIL SELECT '11115555', '9005743023', '02/28/17', '2/28/17', 'Membership 01-Mar-2016 to 28-Feb-2017'
SELECT
hist.ShipMasterCustomerId,
hist.OrderNo,
hist.cycle_begin_date,
hist.CYCLE_END_DATE,
hist.[Description],
curr.ShipMasterCustomerId,
curr.OrderNo,
curr.cycle_begin_date,
curr.CYCLE_END_DATE,
curr.[Description]
FROM
#ORDER_DETAIL AS hist
INNER JOIN #ORDER_DETAIL AS curr ON (
(curr.ShipMasterCustomerId = hist.ShipMasterCustomerId) AND (curr.cycle_end_date =
(SELECT MAX(cycle_end_date) FROM #ORDER_DETAIL WHERE ShipMasterCustomerId = hist.ShipMasterCustomerId))
)
WHERE
(hist.ShipMasterCustomerId = '11115555')
AND
(hist.cycle_end_date = '2/29/2016')

Executing a stored procedure to populate a table column?

Can I use the EXEC keyword to run a stored procedure to populate the QuestionScore column?
CREATE TABLE Question_Result
(
ResultID INT IDENTITY (10000, 1) NOT NULL,
QResultID VARCHAR (10) NOT NULL DEFAULT 'QRES' + CAST(NEXT VALUE
FOR non_identity_incrementer AS VARCHAR(10)),
QuestionScore DECIMAL (4,2) EXEC dbo.update_question_result_question_score(),
Assessment INT NULL,
DateTimeModified DATETIME NOT NULL DEFAULT SYSDATETIME(),
PRIMARY KEY CLUSTERED (ResultID),
FOREIGN KEY (Assessment) REFERENCES Assessment_Result (ResultID)
);
My stored procedure is:
CREATE PROCEDURE update_question_result_question_score (
#QuestionScore DECIMAL(4,2) OUTPUT,
#StudentAnswer NVARCHAR(MAX) OUTPUT,
#AssessingAnswer NVARCHAR(MAX) OUTPUT
)
AS
BEGIN
SELECT *
FROM StudentAnswerQuestionScore
SET #QuestionScore = (SELECT Score FROM StudentAnswerQuestionScore)
SET #StudentAnswer = (SELECT [Student Answer] FROM StudentAnswerQuestionScore)
SET #AssessingAnswer = (SELECT [Assessing Answer] FROM StudentAnswerQuestionScore)
IF (#StudentAnswer = #AssessingAnswer)
INSERT INTO Question_Result (QuestionScore)
VALUES (#QuestionScore)
END
You can try to use function for your computed column. Hopefully below codes can give you some ideas:
use staging
go
create table Question
(
QuestionKey int identity(1,1),
QuestionDesc varchar(50),
AssignedAnswer varchar(50),
Score int
)
go
insert into Question
select 'Capital city of AU', 'ACT', 10
go
create function ufnAssignScore (#Question varchar(50), #Answer varchar(50))
Returns int
as
BEGIN
declare #return int
select #return = Score
FROM Question
where QuestionDesc = #Question and AssignedAnswer = #Answer
return #return
END
go
create table Answer
(
AnswerKey int identity(1,1),
StudentID varchar(50),
QuestionDesc varchar(50),
AnswerDesc varchar(50),
Score as dbo.ufnAssignScore(QuestionDesc, AnswerDesc)
)
go
insert into Answer (StudentID, QuestionDesc, AnswerDesc)
select 'a1', 'Capital city of AU', 'Mel'
union all
select 'a2', 'Capital city of AU', 'ACT'
go
select * from Answer

Working with Temp Tables and Stored Procedure to get some data

I have a project, the aim is to get result analysed as shown in image:
However, My table looks like this in the image here:
Now, I have search using my title in this question to see how to achieve the result I showed in image 1 using my table in image 2 to generate a sql procedure as stated here but I am stuck and I need your help.
CREATE proc [dbo].[getScoreClassificationbySchools]
#NameofSchool nvarchar(31), #levelName nvarchar(5)
as
Begin
create table #A(LevelNames varchar(50), SchoolSubject varchar (50))
create table #B(LevelNames varchar(50), TotalNostudent int, SchoolSubjectt varchar (50))
create table #C(LevelNames varchar(50), ScoreClassDistinction varchar (10), SchoolSubject varchar (50))
create table #D(LevelNames varchar(50), ScoreClassCredit varchar (10), SchoolSubject varchar (50))
create table #E(LevelNames varchar(50), ScoreClassPass varchar (10), SchoolSubject varchar (50))
create table #F(LevelNames varchar(50), ScoreClassFail varchar (10), SchoolSubject varchar (50))
insert into #A(LevelNames, SchoolSubject)
select distinct LevelName, Subject from tb_schlist where #levelName =LevelName AND #NameofSchool = SchoolName
insert into #B(LevelNames, TotalNostudent,SchoolSubjectt)
select distinct LevelName, count (LevelName) as TotalNoOfStudent, Subject from tb_schlist where #levelName =LevelName AND #NameofSchool = SchoolName
insert into #C(LevelNames, ScoreClassDistinction, SchoolSubject)
select distinct LevelName, Grades, Subject from tb_schlist where #levelName =LevelName and #NameofSchool = SchoolName and Grades='A1'
insert into #D(LevelNames, ScoreClassCredit, SchoolSubject)
select distinct LevelName, Grades, Subject from tb_schlist where #levelName =LevelName and #NameofSchool = SchoolName and Grades='B2'
insert into #E(LevelNames, ScoreClassPass, SchoolSubject)
select distinct LevelName, Grades, Subject from tb_schlist where #levelName =LevelName and #NameofSchool = SchoolName and Grades='B3'
insert into #F(LevelNames, ScoreClassFail, SchoolSubject)
select distinct LevelName, Grades, Subject from tb_schlist where #levelName =LevelName and #NameofSchool = SchoolName and Grades='C4'
SELECT t1.lnames , t2.SchoolSubject, t3.ScoreClassDistinction, t4.ScoreClassCredit, t5.ScoreClassPass, t6.ScoreClassFail from #A t1 join #B t2 on t1.LevelNameS = t2.LevelNames join #C t3 join #D t4 on t3.LevelNames = t4.LevelNames join #E t5 join #F t6 on t5.LevelNames = t6.LevelNames
Try this :
select SUBJECT,
[A1]+[B2]+[B3]+[C4]+[C5]+[C6] + [D7]+ [E8] + [F9]
As TotalNoOfStudentsThatSat,
[A1],[B2],[B3],[C4],[C5],[C6] ,
[A1]+[B2]+[B3]+[C4]+[C5]+[C6] AS [A1-C6]
, [D7],[E8],[D7]+ [E8] AS [D7-E8]
, [F9] from marks
pivot (
count(grade)
for grade in ([A1],[B2],[B3],[C4],[C5],[C6],[D7],[E8],[F9])
)PVT
SQL Fiddle

Inserting batch of rows into two tables in SQL Server 2008

I have a requirement to insert multiple rows into table1 and at the same time insert a row into table2 with a pkID from table1 and a value that comes from a SP parameter.
I created a stored procedure that performs a batch insert with a table valued parameter which contains the rows to be inserted into table1. But I have a problem with inserting the row into table2 with the corresponding Id (identity) from table1, along with parameter value that I have passed.
Is there anyone who implemented this, or what is the good solution for this?
CREATE PROCEDURE [dbo].[oSP_TV_Insert]
#uID int
,#IsActive int
,#Type int -- i need to insert this in table 2
,#dTableGroup table1 READONLY -- this one is a table valued
AS
DECLARE #SQL varchar(2000)
DECLARE #table1Id int
BEGIN
INSERT INTO dbo.table1
(uID
,Name
,Contact
,Address
,City
,State
,Zip
,Phone
,Active)
SELECT
#uID
,Name
,Contact
,Address
,City
,State
,Zip
,Phone
,Active
,#G_Active
FROM #dTableGroup
--the above query will perform batch insert using the records from dTableGroup which is table valued
SET #table1ID = SCOPE_IDENTITY()
-- this below will perform inserting records to table2 with every Id inserted in table1.
Insert into table2(#table1ID , #type)
You need to temporarily store the inserted identity values and then create a second INSERT statement - using the OUTPUT clause.
Something like:
-- declare table variable to hold the ID's that are being inserted
DECLARE #InsertedIDs TABLE (ID INT)
-- insert values into table1 - output the inserted ID's into #InsertedIDs
INSERT INTO dbo.table1(ID, Name, Contact, Address, City, State, Zip, Phone, Active)
OUTPUT INSERTED.ID INTO #InsertedIDs
SELECT
#ID, Name, Contact, Address, City, State, Zip, Phone, Active, #G_Active
FROM #dTableGroup
and then you can have your second INSERT statement:
INSERT INTO dbo.table2(Table1ID, Type)
SELECT ID, #type FROM #InsertedIDs
See the MSDN docs on the OUTPUT clause for more details on what you can do with the OUTPUT clause - one of the most underused and most "unknown" features of SQL Server these days!
Another approach using OUTPUT clause and only one statement for inserting data in both destination tables:
--Parameters
DECLARE #TableGroup TABLE
(
Name NVARCHAR(100) NOT NULL
,Phone VARCHAR(10) NOT NULL
);
DECLARE #Type INT;
--End Of parameters
--Destination tables
DECLARE #FirstDestinationTable TABLE
(
FirstDestinationTableID INT IDENTITY(1,1) PRIMARY KEY
,Name NVARCHAR(100) NOT NULL
,Phone VARCHAR(10) NOT NULL
);
DECLARE #SecondDestinationTable TABLE
(
SecondDestinationTable INT IDENTITY(2,2) PRIMARY KEY
,FirstDestinationTableID INT NOT NULL
,[Type] INT NOT NULL
,CHECK([Type] > 0)
);
--End of destination tables
--Test1
--initialization
INSERT #TableGroup
VALUES ('Bogdan SAHLEAN', '0721200300')
,('Ion Ionescu', '0211002003')
,('Vasile Vasilescu', '0745600800');
SET #Type = 9;
--execution
INSERT #SecondDestinationTable (FirstDestinationTableID, [Type])
SELECT FirstINS.FirstDestinationTableID, #Type
FROM
(
INSERT #FirstDestinationTable (Name, Phone)
OUTPUT inserted.FirstDestinationTableID
SELECT tg.Name, tg.Phone
FROM #TableGroup tg
) FirstINS
--check records
SELECT *
FROM #FirstDestinationTable;
SELECT *
FROM #SecondDestinationTable;
--End of test1
--Test2
--initialization
DELETE #TableGroup;
DELETE #FirstDestinationTable;
DELETE #SecondDestinationTable;
INSERT #TableGroup
VALUES ('Ion Ionescu', '0210000000')
,('Vasile Vasilescu', '0745000000');
SET #Type = 0; --Wrong value
--execution
INSERT #SecondDestinationTable (FirstDestinationTableID, [Type])
SELECT FirstINS.FirstDestinationTableID, #Type
FROM
(
INSERT #FirstDestinationTable (Name, Phone)
OUTPUT inserted.FirstDestinationTableID
SELECT tg.Name, tg.Phone
FROM #TableGroup tg
) FirstINS
--check records
DECLARE #rc1 INT, #rc2 INT;
SELECT *
FROM #FirstDestinationTable;
SET #rc1 = ##ROWCOUNT;
SELECT *
FROM #SecondDestinationTable;
SET #rc2 = ##ROWCOUNT;
RAISERROR('[Test2 results] #FirstDestinationTable: %d rows; ##SecondDestinationTable: %d rows;',1,1,#rc1,#rc2);
--End of test1
Since you need all inserted identity values, look at the output clause of the insert statement: http://msdn.microsoft.com/en-us/library/ms177564.aspx

Resources