Getting XML to feed into SQL Server table - sql-server

I am trying to get a (for right now) simple XML to feed into a SQL Server table.
The XML is:
<?xml version="1.0" encoding="utf-8"?>
<ArrayOfSafeEODBalance xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<SafeEODBalance>
<Lane>1</Lane>
<PouchId>06292019053041001</PouchId>
<BusinessDay>6/29/2019</BusinessDay>
<BusinessStartingTime>6/29/2019 5:36:58 AM</BusinessStartingTime>
<BusinessEndingTime>6/30/2019 12:15:55 AM</BusinessEndingTime>
<StartingBalance>0.0000</StartingBalance>
<EndingBalance>8</EndingBalance>
</SafeEODBalance>
<SafeEODBalance>
<Lane>2</Lane>
<PouchId>06292019053042002</PouchId>
<BusinessDay>6/29/2019</BusinessDay>
<BusinessStartingTime>6/29/2019 5:36:58 AM</BusinessStartingTime>
<BusinessEndingTime>6/30/2019 12:15:55 AM</BusinessEndingTime>
<StartingBalance>100.0000</StartingBalance>
<EndingBalance>2</EndingBalance>
</SafeEODBalance>
</ArrayOfSafeEODBalance>
And saved to C:\Users\cj\Documents\EodBalance.xml
I have set up the SQL Server table [dbo].[EndofDay] which has the columns of each of these exactly:
Here is the query I am trying:
INSERT INTO [dbo].[EndofDay] ([PouchID], [Lane], [BusinessDay], BusinessStartingTime, BusinessEndingTime, [StartingBalance], [EndingBalance])
SELECT
MY_XML.SafeEODBalance.query('PouchId').value('.', 'VARCHAR(25)'),
MY_XML.SafeEODBalance.query('Lane').value('.', 'NCHAR(2)'),
MY_XML.SafeEODBalance.query('BusinessDay').value('.', 'DATE'),
MY_XML.SafeEODBalance.query('BusinessStartingTime').value('.', 'DATETIME'),
MY_XML.SafeEODBalance.query('BusinessEndingTime').value('.', 'DATETIME'),
MY_XML.SafeEODBalance.query('StartingBalance').value('.', 'NCHAR(10)'),
MY_XML.SafeEODBalance.query('EndingBalance').value('.', 'NCHAR(10)')
FROM
(SELECT CAST(MY_XML AS XML)
FROM OPENROWSET(BULK 'C:\Users\cj\Documents\EodBalance.xml',SINGLE_BLOB) AS T(MY_XML)) AS T(MY_XML)
CROSS APPLY MY_XML.nodes('SafeEODBalance/SafeEODBalances') AS MY_XML (SafeEODBalance);
When I run this I get:
(0 rows affected)
Completion time: 2019-08-29T16:07:12.3361442-04:00
Which obviously should feed two lines into this, but it is giving nothing in the table.

Here is adjusted working SQL. Just uncomment the INSERT lines when you are ready.
SQL
WITH XmlFile (xmlData) AS
(
SELECT CAST(BulkColumn AS XML)
FROM OPENROWSET(BULK 'C:\Users\cj\Documents\EodBalance.xml', SINGLE_BLOB) AS x
)
--INSERT INTO [dbo].[EndofDay]
--([PouchID], [Lane], [BusinessDay], BusinessStartingTime, BusinessEndingTime, [StartingBalance], [EndingBalance])
SELECT c.value('(PouchId/text())[1]', 'VARCHAR(25)') AS [PouchId]
, c.value('(Lane/text())[1]', 'NCHAR(2)') AS [Lane]
, c.value('(BusinessDay/text())[1]', 'DATE') AS [BusinessDay]
, c.value('(BusinessStartingTime)[1]', 'datetime') AS [BusinessStartingTime]
, c.value('(BusinessEndingTime/text())[1]', 'datetime') AS [BusinessEndingTime]
, c.value('(StartingBalance/text())[1]', 'MONEY') AS [StartingBalance]
, c.value('(EndingBalance/text())[1]', 'MONEY') AS [EndingBalance]
FROM XmlFile CROSS APPLY xmlData.nodes('/ArrayOfSafeEODBalance/SafeEODBalance') AS t(c);

** EDIT ** As pointed out in the comments below, this answer uses legacy functions and SPs so should not be used unless you are running on a pre-2005 version of SQL
Here is a slightly different approach, using a variable to store the XML from OPENROWSET and the stored procedure sp_xml_preparedocument to convert it into an XML document.
Once in XML document form it can be queried using OPENXML(). This has the possible advantage that if you have a large or complex XML structure from which you wish to make several extracts, you can re-use the XML document repeatedly without having to reload the original XML file.
Be sure to remove the XML document using sp_xml_removedocument when you have finished with it to free up the server cache.
-- Load the XML file and convert it to an XML document
DECLARE #XML AS XML, #hXML AS INT;
SELECT #XML = CONVERT(XML, x.BulkColumn)
FROM OPENROWSET(BULK 'C:\Users\cj\Documents\EodBalance.xml\EodBalance.xml', SINGLE_BLOB) AS x;
EXEC sp_xml_preparedocument #hXML OUTPUT, #XML
-- Select data from the XML document
SELECT Lane, PouchID, BusinessDay, BusinessStartingTime, BusinessEndingTime, StartingBalance, EndingBalance
FROM OPENXML(#hXML, 'ArrayOfSafeEODBalance/SafeEODBalance') WITH
(
Lane [varchar](2) 'Lane',
PouchId [varchar](50) 'PouchId',
BusinessDay [date] 'BusinessDay',
BusinessStartingTime [datetime] 'BusinessStartingTime',
BusinessEndingTime [datetime] 'BusinessEndingTime',
StartingBalance [varchar](50) 'StartingBalance',
EndingBalance [varchar](50) 'EndingBalance'
);
-- Remove the XML document from the cache
EXEC sp_xml_removedocument #hXML;

Related

What is the best way to import this very large xml file into a SQL Server database

I have a XML file with 95 gb of data (1444 mio rows). I need to import some of the data into a SQL Server table.
I have made a sample file that I'm trying to import into my SQL Server with the following code. I don't get any errors, but I also don't get any data in the table.
Sample file: https://1drv.ms/u/s!AnJeuk8W8KbEjblueb6bjKaDWJ5XAw?e=2molDZ
CREATE DATABASE DMR_DB2
GO
USE DMR_DB2
GO
CREATE TABLE XMLwithOpenXML
(
Id INT IDENTITY PRIMARY KEY,
XMLData XML,
LoadedDateTime DATETIME
)
INSERT INTO XMLwithOpenXML(XMLData, LoadedDateTime)
SELECT CONVERT(XML, BulkColumn) AS BulkColumn, GETDATE()
FROM OPENROWSET(BULK 'C:\Users\kn\Desktop\ESStatistikListeModtag-20220911-222128\Test.xml', SINGLE_BLOB) AS x;
--SELECT * FROM XMLwithOpenXML
DECLARE #XML AS XML, #hDoc AS INT, #SQL NVARCHAR (MAX)
SELECT #XML = XMLData FROM XMLwithOpenXML
EXEC sp_xml_preparedocument #hDoc OUTPUT, #XML
SELECT KoeretoejIdent, KoeretoejArtNummer, KoeretoejArtNavn
FROM OPENXML(#hDoc, 'ESStatistikListeModtag_I/StatistikSamling/Statistik')
WITH
(
KoeretoejIdent [varchar](50) '#KoeretoejIdent',
KoeretoejArtNummer [varchar](100) '#KoeretoejArtNummer',
KoeretoejArtNavn [varchar](100) 'KoeretoejArtNavn'
)
EXEC sp_xml_removedocument #hDoc
GO
Please try the following solution.
It works for your small XML sample.
Microsoft proprietary OPENXML() and its companions sp_xml_preparedocument and sp_xml_removedocument are kept just for backward compatibility with the obsolete SQL
Server 2000. Their use is diminished just to very few fringe cases.
Starting from SQL Server 2005 onwards, it is strongly recommended to re-write your SQL and switch it to XQuery.
As #Lamu already pointed out, SQL Server XML type column can hold up to 2 GB XML.
For 95 GB you would need to use SSQL Server Integration Services (SSIS).
SSIS has no limitation on the XML fie size. It will handle whatever the Operation System (OS) file system allows.
SQL
USE tempdb;
GO
DROP TABLE IF EXISTS dbo.tbl;
CREATE TABLE tbl (
ID INT IDENTITY(1, 1) PRIMARY KEY,
XmlColumn XML
);
INSERT INTO tbl(XmlColumn)
SELECT * FROM OPENROWSET(BULK N'c:\Downloads\Test.xml', SINGLE_BLOB) AS x;
WITH XMLNAMESPACES(DEFAULT 'http://skat.dk/dmr/2007/05/31/')
SELECT c.value('(KoeretoejIdent/text())[1]', 'VARCHAR(50)') as KoeretoejIdent
, c.value('(KoeretoejArtNummer/text())[1]', 'INT') as KoeretoejArtNummer
, c.value('(KoeretoejArtNavn/text())[1]', 'NVARCHAR(50)') as KoeretoejArtNavn
FROM tbl
CROSS APPLY XmlColumn.nodes('/ESStatistikListeModtag_I/StatistikSamling/Statistik') AS t(c);

SQL Server 2012 - cannot Import XML with xmlns into table

I'm trying to import some nodes into a sql server table from an xml file with ns
<?xml version="1.0" encoding="UTF-8"?>
<GetSellerTransactionsResponse xmlns="urn:ebay:apis:eBLBaseComponents">
<Timestamp>2019-08-03T17:51:45.081Z</Timestamp>
...
</GetSellerTransactionsResponse>
with this query:
DECLARE #XML AS XML, #hDoc AS INT, #SQL NVARCHAR (MAX)
SELECT #XML = XMLData FROM XMLwithOpenXML
EXEC sp_xml_preparedocument #hDoc OUTPUT, #XML
;WITH XMLNAMESPACES (DEFAULT 'urn:ebay:apis:eBLBaseComponents' )
insert into Users (EIASToken,Email, UserID,RegistrationDate)
SELECT distinct EIASToken,Email, UserID,RegistrationDate
FROM OPENXML(#hDoc, 'GetSellerTransactionsResponse/TransactionArray/Transaction/Buyer')
WITH
(
EiasToken[nvarchar](100) 'EIASToken',
Email[nvarchar](100) 'Email',
UserID [nvarchar](100) 'UserID',
RegistrationDate [datetime] 'RegistrationDate',
Name [nvarchar] (100) 'BuyerInfo/ShippingAddress/Name'
)
where EIASToken not in (select EIASToken from Users)
EXEC sp_xml_removedocument #hDoc
I solved in this way, creating a name for xmlns (ebl) and adding #xmlns as third parameter to sp_xml_preparedocument and then adding ebl: to all nodes and subnodes and removing the ;WITH XMLNAMESPACES statement:
SET #xmlns = '<GetSellerTransactionsResponse xmlns:ebl="urn:ebay:apis:eBLBaseComponents" />'
SELECT #XML = XMLData FROM XMLwithOpenXML
EXEC sp_xml_preparedocument #hDoc OUTPUT, #XML, #xmlns
insert into Users (EIASToken,Email, UserID,RegistrationDate,Name)
SELECT distinct EIASToken,Email, UserID,RegistrationDate,Name
FROM OPENXML(#hDoc, 'ebl:GetSellerTransactionsResponse/ebl:TransactionArray/ebl:Transaction/ebl:Buyer')
WITH
(
EiasToken[nvarchar](100) 'ebl:EIASToken',
Email[nvarchar](100) 'ebl:Email',
UserID [nvarchar](100) 'ebl:UserID',
RegistrationDate [datetime] 'ebl:RegistrationDate',
Name [nvarchar] (100) 'ebl:BuyerInfo/ebl:ShippingAddress/ebl:Name'
)
where EIASToken not in (select EIASToken from Users)
but I'm quite sure there's a way to avoid all that ebl:
can somebody suggest the solution?
Thanks
-----------------
Following the suggestion of marc_s I tried to build the queries with XQuery (actually not using #XML.query)
;WITH XMLNAMESPACES (DEFAULT 'urn:ebay:apis:eBLBaseComponents' )
INSERT INTO dbo.Sales
(...)
SELECT
...
FROM #XML.nodes('//OrderArray/Order/TransactionArray/Transaction') as O(X)
and
;WITH XMLNAMESPACES (DEFAULT 'urn:ebay:apis:eBLBaseComponents' )
INSERT INTO dbo.Sales
(...)
SELECT
...
FROM #XML.nodes('//OrderArray/Order') as O(X)
CROSS APPLY O.X.nodes('//TransactionArray/Transaction') as I(X)
WHERE O.X.value('(TransactionArray/Transaction/TransactionID/text())[1]', 'nvarchar(70)')=I.X.value('(TransactionID/text())[1]', 'nvarchar(70)')
Firsts solution returned the same number of records, while second is missing some 10%.
But biggest issue is on performances:
Parsing the same 12MB XML with 1237 "Records" I got the following results:
OPENXML
(1237 rows affected)
SQL Server Execution Times:
CPU time = 687 ms, elapsed time = 986 ms.
-
XQuery without Cross Apply
(1237 rows affected)
SQL Server Execution Times:
CPU time = 14.106.641 ms, elapsed time = 7.788.378 ms.
-
XQuery with Cross Apply
(1134 rows affected)
SQL Server Execution Times:
CPU time = 1.661.968 ms, elapsed time = 1.668.166 ms.
Therefore the OPENXML Solution is the only one reasonable, or have I made some mistakes or should use #XML.query() ?

Retrieve specific fields of the imported to SQL Server multiple XMLs

I have multiple XML files with the same structure, I have imported them to SQL Server 2017 with the following commands:
DDL:
CREATE DATABASE xmlFiles
GO
USE xmlFiles
CREATE TABLE tblXMLFiles (IntCol int, XmlData xml);
GO
DML:
USE xmlFiles
INSERT INTO [dbo].[tblXMLFiles](XmlData) SELECT * FROM OPENROWSET(BULK 'C:\xmls\1.xml', SINGLE_BLOB) AS x;
INSERT INTO [dbo].[tblXMLFiles](XmlData) SELECT * FROM OPENROWSET(BULK 'C:\xmls\2.xml', SINGLE_BLOB) AS x;
…
INSERT INTO [dbo].[tblXMLFiles](XmlData) SELECT * FROM OPENROWSET(BULK 'C:\xmls\N.xml', SINGLE_BLOB) AS x;
Now I want to query the data:
USE xmlFiles
GO
DECLARE #XML AS XML, #hDoc AS INT, #SQL NVARCHAR (MAX)
SELECT #XML = XmLData FROM tblXMLFiles
EXEC sp_xml_preparedocument #hDoc OUTPUT, #XML
SELECT Surname , GivenNames
FROM OPENXML(#hDoc, 'article/ref-list/ref/mixed-citation')
WITH
(
Surname [varchar](100) 'string-name/surname',
GivenNames [varchar](100) 'string-name/given-names'
)
EXEC sp_xml_removedocument #hDoc
GO
The query is working, but the problem is that it returns the data only when there is only one row in a data source table — tblXMLFiles. If I add more than one row, I get empty result set.
Important:
The situation is changing if I add to the outer SELECT clause (SELECT #XML = XmLData…) the TOP statement, then it returns the queried data of the specific row number, according to the TOP value.
How can I retrieve the data not only when there is one line in the table, but many rows?
FROM OPENXML with the corresponding SPs to prepare and to remove a document is outdated and should not be used any more. Rather use the appropriate methods the XML data type provides.
Without an example of your XML it is quite difficult to offer a solution, but my magic crystal ball tells me, that it might be something like this:
SELECT f.IntCol
,mc.value('(string-name/surname)[1]','nvarchar(max)') AS Surname
,mc.value('(string-name/given-names)[1]','nvarchar(max)') AS GivenNames
FROM dbo.tblXMLFiles AS f
OUTER APPLY f.XmlData.nodes('article/ref-list/ref/mixed-citation') AS A(mc)

Creating XML from SQL Table (Unique XML formatting)

So, I'm trying to create a XML file from a SQL table. I do know of the route of using...
Select * From dbo.[db_name]
FOR XML PATH
But the issue at hand is that the XML styling/formatting is quite odd...
<ID>170607A13</ID>
<MaterialActual>
<MaterialLotID>170607A13</MaterialLotID>
<MaterialActualProperty>
<ID>CreationDate</ID>
<Value>
<ValueString>2017-06-07T12:26:27.667-05:00</ValueString>
</Value>
</MaterialActualProperty>
Therefore, I decided I could go the route of concatenating it and inserting into the XML file. Like so...
DECLARE #NAME varchar(50)
DECLARE #LOCATION varchar(50)
DECLARE #SearchXML xml
SET #SearchXML = '<Root>
<CallerInformation>
<LastName>' + #LOCATION + '</LastName>
<FirstName>' + #NAME + '</FirstName>
</CallerInformation>
</Root>'
SELECT #SearchXML
But when doing this I get returned...
If I could get pointed in the right direction or even a example that would be great!
But the issue at hand is that the XML styling/formatting is quite odd...
What is odd there? The only thing odd I can see is the attempt to solve this on string level...
Your question is missing sample data and expected output. The simple select you provide tells us nothing, the XML you provide is an inclompete fragment and the actual example is something completely different...
Just some hints:
Do not concatenate XML on string level!
please read How to ask a good SQL question
and How to create a MCVE
Your simple example should be done like this:
DECLARE #NAME varchar(50)
DECLARE #LOCATION varchar(50)
DECLARE #SearchXML xml
SET #SearchXML =
(
SELECT #LOCATION AS LastName
,#NAME AS FirstName
FOR XML PATH('CallerInformation'),ROOT('Root'),TYPE
);
SELECT #SearchXML;
This will lead to an almost empty (but valid!) XML, put any value into the variables and you will see the XML filled.
UPDATE: Your odd XML...
Try something like this:
SET #xml=
(
SELECT '170607A13' AS ID
,'170607A13' AS [MaterialActual/MaterialLot]
,'CreationDate' AS [MaterialActual/MaterialActualProperty/ID]
,GETDATE() AS [MaterialActual/MaterialActualProperty/Value/ValueString]
FOR XML PATH('')
);
SELECT #xml
UPDATE 2: Very long XPath...
This is your error: name-length more then 128
DECLARE #xml XML;
--SET #xml=
--(
-- SELECT '170607A13' AS ID
-- ,'170607A13' AS [MaterialActual1234567890/MaterialLot1234567890]
-- ,'CreationDate' AS [MaterialActual1234567890/SomeMore1234567890/EvenMore1234567890/StillMore1234567890/MaterialActualProperty1234567890/ID1234567890]
-- ,GETDATE() AS [MaterialActual1234567890/SomeMore1234567890/EvenMore1234567890/StillMore1234567890/MaterialActualProperty1234567890/ValueString1234567890]
-- FOR XML PATH('')
--);
--SELECT #xml
--This is a solution: nested sub-select:
SET #xml=
(
SELECT '170607A13' AS ID
,'170607A13' AS [MaterialActual1234567890/MaterialLot1234567890]
,(
SELECT
'CreationDate' AS [EvenMore1234567890/StillMore1234567890/MaterialActualProperty1234567890/ID1234567890]
,GETDATE() AS [EvenMore1234567890/StillMore1234567890/MaterialActualProperty1234567890/ValueString1234567890]
FOR XML PATH('SomeMore1234567890'),TYPE
) AS [MaterialActual1234567890]
FOR XML PATH('')
);
SELECT #xml;
UPDATE 3: Your follow-up question in comment
HINT: Avoid follow-up questions. Next time please add a new question!
Both return the result requested:
SELECT 'yyyy-MM-dd''T''HH:mm:ss.SSSXXX' AS [PublishedDate/#format]
,GETDATE() AS PublishedDate
FOR XML PATH('')
SELECT 'yyyy-MM-dd''T''HH:mm:ss.SSSXXX' AS [#format]
,GETDATE() AS [*]
FOR XML PATH('PublishedDate');
In my eyes there's no need for the format. Within XML a datetime should be in this format (which is ISO8601) anyway. This is the standard format...

SQL process XML performance: Insert into columns in a table

I am having an issue in a SQL procedure and I can't seem to find the proper solution.
The stored procedure is containing one parameter of the XML datatype (name = #data).
An example of the incoming message is the following (the actual message is containing a lot more nodes, but I left them out for simplicity):
<Suppliers xmlns="">
<Supplier>
<IDCONO>3</IDCONO>
<IDSUNO>009999</IDSUNO>
<IDSUTY>0</IDSUTY>
</Supplier>
</Suppliers>
In my SQL database I have a table called "Supplier" and it contains the exact same columns as the nodes in the XML (IDCONO, IDSUNO, IDSUTY,..)
I need to loop over the nodes and insert the data in the columns.
I have implemented the procedure below, but this is giving me a lot of perfomance issues on the bigger files (long processing time, even timeouts):
INSERT INTO SUPPLIER
(IDCONO
,IDSUNO
,IDSUTY)
SELECT
T.C.value('IDCONO[1]', 'VARCHAR(50)') as IDCONO,
T.C.value('IDSUNO[1]', 'VARCHAR(50)') as IDSUNO,
T.C.value('IDSUTY[1]', 'VARCHAR(50)') as IDSUTY
from #data.nodes('/Suppliers/Supplier') T(C)
Any help is appreciated!
Note that the SQL version is SQL server 2012.
Thanks in advance.
The first I would try is the specify the text() node when using the XML datatype to prevent SQL Server from doing a deep search for text elements.
INSERT INTO SUPPLIER
(IDCONO
,IDSUNO
,IDSUTY)
SELECT
T.C.value('(IDCONO/text())[1]', 'VARCHAR(50)') as IDCONO,
T.C.value('(IDSUNO/text())[1]', 'VARCHAR(50)') as IDSUNO,
T.C.value('(IDSUTY/text())[1]', 'VARCHAR(50)') as IDSUTY
FROM #data.nodes('/Suppliers/Supplier') T(C)
If that is not good enough I would try OPENXML instead.
DECLARE #idoc INT
EXEC sp_xml_preparedocument #idoc OUT, #data
INSERT INTO SUPPLIER
(IDCONO
,IDSUNO
,IDSUTY)
SELECT IDCONO, IDSUNO, IDSUTY
FROM OPENXML(#idoc, '/Suppliers/Supplier', 2) WITH
(IDCONO VARCHAR(50),
IDSUNO VARCHAR(50),
IDSUTY VARCHAR(50))
EXEC sp_xml_removedocument #idoc

Resources