SQL Server 2012 - cannot Import XML with xmlns into table - sql-server

I'm trying to import some nodes into a sql server table from an xml file with ns
<?xml version="1.0" encoding="UTF-8"?>
<GetSellerTransactionsResponse xmlns="urn:ebay:apis:eBLBaseComponents">
<Timestamp>2019-08-03T17:51:45.081Z</Timestamp>
...
</GetSellerTransactionsResponse>
with this query:
DECLARE #XML AS XML, #hDoc AS INT, #SQL NVARCHAR (MAX)
SELECT #XML = XMLData FROM XMLwithOpenXML
EXEC sp_xml_preparedocument #hDoc OUTPUT, #XML
;WITH XMLNAMESPACES (DEFAULT 'urn:ebay:apis:eBLBaseComponents' )
insert into Users (EIASToken,Email, UserID,RegistrationDate)
SELECT distinct EIASToken,Email, UserID,RegistrationDate
FROM OPENXML(#hDoc, 'GetSellerTransactionsResponse/TransactionArray/Transaction/Buyer')
WITH
(
EiasToken[nvarchar](100) 'EIASToken',
Email[nvarchar](100) 'Email',
UserID [nvarchar](100) 'UserID',
RegistrationDate [datetime] 'RegistrationDate',
Name [nvarchar] (100) 'BuyerInfo/ShippingAddress/Name'
)
where EIASToken not in (select EIASToken from Users)
EXEC sp_xml_removedocument #hDoc
I solved in this way, creating a name for xmlns (ebl) and adding #xmlns as third parameter to sp_xml_preparedocument and then adding ebl: to all nodes and subnodes and removing the ;WITH XMLNAMESPACES statement:
SET #xmlns = '<GetSellerTransactionsResponse xmlns:ebl="urn:ebay:apis:eBLBaseComponents" />'
SELECT #XML = XMLData FROM XMLwithOpenXML
EXEC sp_xml_preparedocument #hDoc OUTPUT, #XML, #xmlns
insert into Users (EIASToken,Email, UserID,RegistrationDate,Name)
SELECT distinct EIASToken,Email, UserID,RegistrationDate,Name
FROM OPENXML(#hDoc, 'ebl:GetSellerTransactionsResponse/ebl:TransactionArray/ebl:Transaction/ebl:Buyer')
WITH
(
EiasToken[nvarchar](100) 'ebl:EIASToken',
Email[nvarchar](100) 'ebl:Email',
UserID [nvarchar](100) 'ebl:UserID',
RegistrationDate [datetime] 'ebl:RegistrationDate',
Name [nvarchar] (100) 'ebl:BuyerInfo/ebl:ShippingAddress/ebl:Name'
)
where EIASToken not in (select EIASToken from Users)
but I'm quite sure there's a way to avoid all that ebl:
can somebody suggest the solution?
Thanks
-----------------
Following the suggestion of marc_s I tried to build the queries with XQuery (actually not using #XML.query)
;WITH XMLNAMESPACES (DEFAULT 'urn:ebay:apis:eBLBaseComponents' )
INSERT INTO dbo.Sales
(...)
SELECT
...
FROM #XML.nodes('//OrderArray/Order/TransactionArray/Transaction') as O(X)
and
;WITH XMLNAMESPACES (DEFAULT 'urn:ebay:apis:eBLBaseComponents' )
INSERT INTO dbo.Sales
(...)
SELECT
...
FROM #XML.nodes('//OrderArray/Order') as O(X)
CROSS APPLY O.X.nodes('//TransactionArray/Transaction') as I(X)
WHERE O.X.value('(TransactionArray/Transaction/TransactionID/text())[1]', 'nvarchar(70)')=I.X.value('(TransactionID/text())[1]', 'nvarchar(70)')
Firsts solution returned the same number of records, while second is missing some 10%.
But biggest issue is on performances:
Parsing the same 12MB XML with 1237 "Records" I got the following results:
OPENXML
(1237 rows affected)
SQL Server Execution Times:
CPU time = 687 ms, elapsed time = 986 ms.
-
XQuery without Cross Apply
(1237 rows affected)
SQL Server Execution Times:
CPU time = 14.106.641 ms, elapsed time = 7.788.378 ms.
-
XQuery with Cross Apply
(1134 rows affected)
SQL Server Execution Times:
CPU time = 1.661.968 ms, elapsed time = 1.668.166 ms.
Therefore the OPENXML Solution is the only one reasonable, or have I made some mistakes or should use #XML.query() ?

Related

What is the best way to import this very large xml file into a SQL Server database

I have a XML file with 95 gb of data (1444 mio rows). I need to import some of the data into a SQL Server table.
I have made a sample file that I'm trying to import into my SQL Server with the following code. I don't get any errors, but I also don't get any data in the table.
Sample file: https://1drv.ms/u/s!AnJeuk8W8KbEjblueb6bjKaDWJ5XAw?e=2molDZ
CREATE DATABASE DMR_DB2
GO
USE DMR_DB2
GO
CREATE TABLE XMLwithOpenXML
(
Id INT IDENTITY PRIMARY KEY,
XMLData XML,
LoadedDateTime DATETIME
)
INSERT INTO XMLwithOpenXML(XMLData, LoadedDateTime)
SELECT CONVERT(XML, BulkColumn) AS BulkColumn, GETDATE()
FROM OPENROWSET(BULK 'C:\Users\kn\Desktop\ESStatistikListeModtag-20220911-222128\Test.xml', SINGLE_BLOB) AS x;
--SELECT * FROM XMLwithOpenXML
DECLARE #XML AS XML, #hDoc AS INT, #SQL NVARCHAR (MAX)
SELECT #XML = XMLData FROM XMLwithOpenXML
EXEC sp_xml_preparedocument #hDoc OUTPUT, #XML
SELECT KoeretoejIdent, KoeretoejArtNummer, KoeretoejArtNavn
FROM OPENXML(#hDoc, 'ESStatistikListeModtag_I/StatistikSamling/Statistik')
WITH
(
KoeretoejIdent [varchar](50) '#KoeretoejIdent',
KoeretoejArtNummer [varchar](100) '#KoeretoejArtNummer',
KoeretoejArtNavn [varchar](100) 'KoeretoejArtNavn'
)
EXEC sp_xml_removedocument #hDoc
GO
Please try the following solution.
It works for your small XML sample.
Microsoft proprietary OPENXML() and its companions sp_xml_preparedocument and sp_xml_removedocument are kept just for backward compatibility with the obsolete SQL
Server 2000. Their use is diminished just to very few fringe cases.
Starting from SQL Server 2005 onwards, it is strongly recommended to re-write your SQL and switch it to XQuery.
As #Lamu already pointed out, SQL Server XML type column can hold up to 2 GB XML.
For 95 GB you would need to use SSQL Server Integration Services (SSIS).
SSIS has no limitation on the XML fie size. It will handle whatever the Operation System (OS) file system allows.
SQL
USE tempdb;
GO
DROP TABLE IF EXISTS dbo.tbl;
CREATE TABLE tbl (
ID INT IDENTITY(1, 1) PRIMARY KEY,
XmlColumn XML
);
INSERT INTO tbl(XmlColumn)
SELECT * FROM OPENROWSET(BULK N'c:\Downloads\Test.xml', SINGLE_BLOB) AS x;
WITH XMLNAMESPACES(DEFAULT 'http://skat.dk/dmr/2007/05/31/')
SELECT c.value('(KoeretoejIdent/text())[1]', 'VARCHAR(50)') as KoeretoejIdent
, c.value('(KoeretoejArtNummer/text())[1]', 'INT') as KoeretoejArtNummer
, c.value('(KoeretoejArtNavn/text())[1]', 'NVARCHAR(50)') as KoeretoejArtNavn
FROM tbl
CROSS APPLY XmlColumn.nodes('/ESStatistikListeModtag_I/StatistikSamling/Statistik') AS t(c);

Getting XML to feed into SQL Server table

I am trying to get a (for right now) simple XML to feed into a SQL Server table.
The XML is:
<?xml version="1.0" encoding="utf-8"?>
<ArrayOfSafeEODBalance xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<SafeEODBalance>
<Lane>1</Lane>
<PouchId>06292019053041001</PouchId>
<BusinessDay>6/29/2019</BusinessDay>
<BusinessStartingTime>6/29/2019 5:36:58 AM</BusinessStartingTime>
<BusinessEndingTime>6/30/2019 12:15:55 AM</BusinessEndingTime>
<StartingBalance>0.0000</StartingBalance>
<EndingBalance>8</EndingBalance>
</SafeEODBalance>
<SafeEODBalance>
<Lane>2</Lane>
<PouchId>06292019053042002</PouchId>
<BusinessDay>6/29/2019</BusinessDay>
<BusinessStartingTime>6/29/2019 5:36:58 AM</BusinessStartingTime>
<BusinessEndingTime>6/30/2019 12:15:55 AM</BusinessEndingTime>
<StartingBalance>100.0000</StartingBalance>
<EndingBalance>2</EndingBalance>
</SafeEODBalance>
</ArrayOfSafeEODBalance>
And saved to C:\Users\cj\Documents\EodBalance.xml
I have set up the SQL Server table [dbo].[EndofDay] which has the columns of each of these exactly:
Here is the query I am trying:
INSERT INTO [dbo].[EndofDay] ([PouchID], [Lane], [BusinessDay], BusinessStartingTime, BusinessEndingTime, [StartingBalance], [EndingBalance])
SELECT
MY_XML.SafeEODBalance.query('PouchId').value('.', 'VARCHAR(25)'),
MY_XML.SafeEODBalance.query('Lane').value('.', 'NCHAR(2)'),
MY_XML.SafeEODBalance.query('BusinessDay').value('.', 'DATE'),
MY_XML.SafeEODBalance.query('BusinessStartingTime').value('.', 'DATETIME'),
MY_XML.SafeEODBalance.query('BusinessEndingTime').value('.', 'DATETIME'),
MY_XML.SafeEODBalance.query('StartingBalance').value('.', 'NCHAR(10)'),
MY_XML.SafeEODBalance.query('EndingBalance').value('.', 'NCHAR(10)')
FROM
(SELECT CAST(MY_XML AS XML)
FROM OPENROWSET(BULK 'C:\Users\cj\Documents\EodBalance.xml',SINGLE_BLOB) AS T(MY_XML)) AS T(MY_XML)
CROSS APPLY MY_XML.nodes('SafeEODBalance/SafeEODBalances') AS MY_XML (SafeEODBalance);
When I run this I get:
(0 rows affected)
Completion time: 2019-08-29T16:07:12.3361442-04:00
Which obviously should feed two lines into this, but it is giving nothing in the table.
Here is adjusted working SQL. Just uncomment the INSERT lines when you are ready.
SQL
WITH XmlFile (xmlData) AS
(
SELECT CAST(BulkColumn AS XML)
FROM OPENROWSET(BULK 'C:\Users\cj\Documents\EodBalance.xml', SINGLE_BLOB) AS x
)
--INSERT INTO [dbo].[EndofDay]
--([PouchID], [Lane], [BusinessDay], BusinessStartingTime, BusinessEndingTime, [StartingBalance], [EndingBalance])
SELECT c.value('(PouchId/text())[1]', 'VARCHAR(25)') AS [PouchId]
, c.value('(Lane/text())[1]', 'NCHAR(2)') AS [Lane]
, c.value('(BusinessDay/text())[1]', 'DATE') AS [BusinessDay]
, c.value('(BusinessStartingTime)[1]', 'datetime') AS [BusinessStartingTime]
, c.value('(BusinessEndingTime/text())[1]', 'datetime') AS [BusinessEndingTime]
, c.value('(StartingBalance/text())[1]', 'MONEY') AS [StartingBalance]
, c.value('(EndingBalance/text())[1]', 'MONEY') AS [EndingBalance]
FROM XmlFile CROSS APPLY xmlData.nodes('/ArrayOfSafeEODBalance/SafeEODBalance') AS t(c);
** EDIT ** As pointed out in the comments below, this answer uses legacy functions and SPs so should not be used unless you are running on a pre-2005 version of SQL
Here is a slightly different approach, using a variable to store the XML from OPENROWSET and the stored procedure sp_xml_preparedocument to convert it into an XML document.
Once in XML document form it can be queried using OPENXML(). This has the possible advantage that if you have a large or complex XML structure from which you wish to make several extracts, you can re-use the XML document repeatedly without having to reload the original XML file.
Be sure to remove the XML document using sp_xml_removedocument when you have finished with it to free up the server cache.
-- Load the XML file and convert it to an XML document
DECLARE #XML AS XML, #hXML AS INT;
SELECT #XML = CONVERT(XML, x.BulkColumn)
FROM OPENROWSET(BULK 'C:\Users\cj\Documents\EodBalance.xml\EodBalance.xml', SINGLE_BLOB) AS x;
EXEC sp_xml_preparedocument #hXML OUTPUT, #XML
-- Select data from the XML document
SELECT Lane, PouchID, BusinessDay, BusinessStartingTime, BusinessEndingTime, StartingBalance, EndingBalance
FROM OPENXML(#hXML, 'ArrayOfSafeEODBalance/SafeEODBalance') WITH
(
Lane [varchar](2) 'Lane',
PouchId [varchar](50) 'PouchId',
BusinessDay [date] 'BusinessDay',
BusinessStartingTime [datetime] 'BusinessStartingTime',
BusinessEndingTime [datetime] 'BusinessEndingTime',
StartingBalance [varchar](50) 'StartingBalance',
EndingBalance [varchar](50) 'EndingBalance'
);
-- Remove the XML document from the cache
EXEC sp_xml_removedocument #hXML;

MS SQL Server - OpenXML - Multiple elements

XML example:
<POLICY>
<RISKS>
<RISK>
<DRV>1</DRV>
</RISK>
<RISK>
<DRV>2</DRV>
</RISK>
</RISKS>
</POLICY>
I want to select both Risk elements with this query:
SELECT RISK
FROM OPENXML(#hDOC, 'POLICY/RISKS', 2)
WITH(
RISK XML 'RISK'
) AS Z
Expected:
1. <RISK><DRV>1</DRV></RISK>
2. <RISK><DRV>2</DRV></RISK>
Result:
1. <RISK><DRV>1</DRV></RISK>
(only first element was returned)
For comparison this query returns two rows as expected:
SELECT DRV
FROM OPENXML(#hDOC, 'POLICY/RISKS/RISK', 2)
WITH(
DRV XML 'DRV'
) AS Z
Result:
1. <DRV>1</DRV>
2. <DRV>2</DRV>
So the question is how can I get two Risk-rows?
Why are you not using the native XQuery support provided by SQL Server. OpenXML is old and having lot of issues.
You can write your query like following using XQuery Support
DECLARE #hDOC xml
SET #hDOC='<POLICY>
<RISKS>
<RISK>
<DRV>1</DRV>
</RISK>
<RISK>
<DRV>2</DRV>
</RISK>
</RISKS>
</POLICY>'
SELECT T.c.query('.') AS result
FROM #hDOC.nodes('/POLICY/RISKS/RISK') T(c)
GO
You will get output as
1. <RISK><DRV>1</DRV></RISK>
2. <RISK><DRV>2</DRV></RISK>
Edit: If you still want to do with OpenXml, use query like following.
DECLARE #DocHandle int
DECLARE #hDOC VARCHAR(1000)
SET #hDOC=N'<POLICY>
<RISKS>
<RISK>
<DRV>1</DRV>
</RISK>
<RISK>
<DRV>2</DRV>
</RISK>
</RISKS>
</POLICY>'
EXEC sp_xml_preparedocument #DocHandle OUTPUT, #hDOC
SELECT RISK
FROM OPENXML(#DocHandle, 'POLICY/RISKS/RISK', 2)
WITH(
RISK XML '.'
) AS Z
EXEC sp_xml_removedocument #DocHandle
You will get the desired output.

Retrieve specific fields of the imported to SQL Server multiple XMLs

I have multiple XML files with the same structure, I have imported them to SQL Server 2017 with the following commands:
DDL:
CREATE DATABASE xmlFiles
GO
USE xmlFiles
CREATE TABLE tblXMLFiles (IntCol int, XmlData xml);
GO
DML:
USE xmlFiles
INSERT INTO [dbo].[tblXMLFiles](XmlData) SELECT * FROM OPENROWSET(BULK 'C:\xmls\1.xml', SINGLE_BLOB) AS x;
INSERT INTO [dbo].[tblXMLFiles](XmlData) SELECT * FROM OPENROWSET(BULK 'C:\xmls\2.xml', SINGLE_BLOB) AS x;
…
INSERT INTO [dbo].[tblXMLFiles](XmlData) SELECT * FROM OPENROWSET(BULK 'C:\xmls\N.xml', SINGLE_BLOB) AS x;
Now I want to query the data:
USE xmlFiles
GO
DECLARE #XML AS XML, #hDoc AS INT, #SQL NVARCHAR (MAX)
SELECT #XML = XmLData FROM tblXMLFiles
EXEC sp_xml_preparedocument #hDoc OUTPUT, #XML
SELECT Surname , GivenNames
FROM OPENXML(#hDoc, 'article/ref-list/ref/mixed-citation')
WITH
(
Surname [varchar](100) 'string-name/surname',
GivenNames [varchar](100) 'string-name/given-names'
)
EXEC sp_xml_removedocument #hDoc
GO
The query is working, but the problem is that it returns the data only when there is only one row in a data source table — tblXMLFiles. If I add more than one row, I get empty result set.
Important:
The situation is changing if I add to the outer SELECT clause (SELECT #XML = XmLData…) the TOP statement, then it returns the queried data of the specific row number, according to the TOP value.
How can I retrieve the data not only when there is one line in the table, but many rows?
FROM OPENXML with the corresponding SPs to prepare and to remove a document is outdated and should not be used any more. Rather use the appropriate methods the XML data type provides.
Without an example of your XML it is quite difficult to offer a solution, but my magic crystal ball tells me, that it might be something like this:
SELECT f.IntCol
,mc.value('(string-name/surname)[1]','nvarchar(max)') AS Surname
,mc.value('(string-name/given-names)[1]','nvarchar(max)') AS GivenNames
FROM dbo.tblXMLFiles AS f
OUTER APPLY f.XmlData.nodes('article/ref-list/ref/mixed-citation') AS A(mc)

SQL process XML performance: Insert into columns in a table

I am having an issue in a SQL procedure and I can't seem to find the proper solution.
The stored procedure is containing one parameter of the XML datatype (name = #data).
An example of the incoming message is the following (the actual message is containing a lot more nodes, but I left them out for simplicity):
<Suppliers xmlns="">
<Supplier>
<IDCONO>3</IDCONO>
<IDSUNO>009999</IDSUNO>
<IDSUTY>0</IDSUTY>
</Supplier>
</Suppliers>
In my SQL database I have a table called "Supplier" and it contains the exact same columns as the nodes in the XML (IDCONO, IDSUNO, IDSUTY,..)
I need to loop over the nodes and insert the data in the columns.
I have implemented the procedure below, but this is giving me a lot of perfomance issues on the bigger files (long processing time, even timeouts):
INSERT INTO SUPPLIER
(IDCONO
,IDSUNO
,IDSUTY)
SELECT
T.C.value('IDCONO[1]', 'VARCHAR(50)') as IDCONO,
T.C.value('IDSUNO[1]', 'VARCHAR(50)') as IDSUNO,
T.C.value('IDSUTY[1]', 'VARCHAR(50)') as IDSUTY
from #data.nodes('/Suppliers/Supplier') T(C)
Any help is appreciated!
Note that the SQL version is SQL server 2012.
Thanks in advance.
The first I would try is the specify the text() node when using the XML datatype to prevent SQL Server from doing a deep search for text elements.
INSERT INTO SUPPLIER
(IDCONO
,IDSUNO
,IDSUTY)
SELECT
T.C.value('(IDCONO/text())[1]', 'VARCHAR(50)') as IDCONO,
T.C.value('(IDSUNO/text())[1]', 'VARCHAR(50)') as IDSUNO,
T.C.value('(IDSUTY/text())[1]', 'VARCHAR(50)') as IDSUTY
FROM #data.nodes('/Suppliers/Supplier') T(C)
If that is not good enough I would try OPENXML instead.
DECLARE #idoc INT
EXEC sp_xml_preparedocument #idoc OUT, #data
INSERT INTO SUPPLIER
(IDCONO
,IDSUNO
,IDSUTY)
SELECT IDCONO, IDSUNO, IDSUTY
FROM OPENXML(#idoc, '/Suppliers/Supplier', 2) WITH
(IDCONO VARCHAR(50),
IDSUNO VARCHAR(50),
IDSUTY VARCHAR(50))
EXEC sp_xml_removedocument #idoc

Resources