SQL query to retrieve XML data - sql-server

I have some XML in SQL Server:
Code to retrieve:
declare #xmlresponse xml
select top 1 #xmlrespone = xmlresponse from dto.t
;with xmlnamespaces(default 'http://www3')
select inputRefId = Node.Data.Value('.', 'varchar(50)')
from #xmlresponse.nodes('.') Node(Data)
This returns both fields (inputrefid and crn) as one string.
I would like to retrieve the values in separate fields starting with the inputrefid, but no matter what I try I get NULL.
e.g
select inputRefId = Node.Data.Value('(/inputRefId)[1]', 'varchar(50)')

Try it with this XQuery:
;WITH xmlnamespaces('http://www3' AS ns)
SELECT
inputRefId = Data.value('(ns:inputRefId)[1]', 'varchar(50)'),
crn = Data.value('(ns:crn)[1]', 'varchar(50)')
FROM
#xmlresponse.nodes('/resultDTO') Node(Data)
The point is that your top-level node - <resultDTO> does not have the http://www3 XML namespace - so you cannot really use the http://www3 as the default XML namespace for all the nodes in the XML - you need to be more specific and only apply it where it's really been set.

Related

Slow XML import with SQL server

I have a XML file with a size of 1GB.
I use the following code to load the data into sql server.
DECLARE #xmlvar XML
SELECT #xmlvar = BulkColumn
FROM OPENROWSET(BULK 'C:\Data\demo.xml', SINGLE_BLOB) x;
WITH XMLNAMESPACES(DEFAULT 'ux:no::ehe:v5:actual:aver',
'ux:no:ehe:v5:move' AS ns4,
'ux:no:ehe:v5:cat:fill' as ns3,
'ux:no:ehe:v5:centre' as ns2)
SELECT
zs.value(N'(../#versionCode)', 'VARCHAR(100)') as versionCode,
zs.value(N'(#Start)', 'VARCHAR(50)') as Start_date,
zs.value(N'(#End)', 'VARCHAR(50)') as End_date
into testtbl
FROM #xmlvar.nodes('/ns4:Dataview1/ns4:Content/ns4:gen') A(zs);
I takes now more than 2 hours to run the query and it is not finished.
I have tested the query with a smaller version of the XML file and that works.
Any tips on improving the loading speed?
Thank you.
Update XML file:
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<ns4:Dataview1 xmlns="ux:no::ehe:v5:actual:aver" xmlns:ns4="ux:no:ehe:v5:move">
<ns4:Content versionCode="16000">
<ns4:gen start="1961-07-01" end="1961-07-01">
</ns4:gen>
<ns4:gen start="2017-09-19">
</ns4:gen>
<ns4:gen start="1961-07-02" end="2016-09-30">
</ns4:gen>
<ns4:gen start="2016-10-01" end="2017-09-18">
</ns4:gen>
</ns4:Content>
</ns4:Dataview1>
(1) As #Stu already pointed out, loading XML file first into a single row table will speed up the process of loading significantly.
(2) it is not a good idea to traverse XML up in the XPath expressions. Like here:
c.value('../#versionCode', 'VARCHAR(100)') as versionCode
But the XML structure was not shared in the question. So, it is impossible to suggest anything concrete.
2nd CROSS APPLY is simulating 1-to-many relationship in the XML hierarchy.
Check it out below.
SQL
CREATE TABLE tbl (
ID INT IDENTITY(1, 1) PRIMARY KEY,
XmlColumn XML
);
INSERT INTO tbl(XmlColumn)
SELECT * FROM OPENROWSET(BULK N'C:\Data\demo.xml', SINGLE_BLOB) AS x;
WITH XMLNAMESPACES(DEFAULT 'ux:no::ehe:v5:actual:aver',
'ux:no:ehe:v5:move' AS ns4,
'ux:no:ehe:v5:cat:fill' as ns3,
'ux:no:ehe:v5:centre' as ns2)
SELECT c.value('#versionCode', 'VARCHAR(100)') as versionCode,
x.value('#start', 'DATE') as Start_date,
x.value('#end', 'DATE') as End_date
INTO dbo.testtbl
FROM tbl
CROSS APPLY XmlColumn.nodes('/ns4:Dataview1/ns4:Content') AS t1(c)
CROSS APPLY t1.c.nodes('ns4:gen') AS t2(x);
In my opinion it's better to use an SSIS Package for importing XML files.
It has a component named "XML Source" for loading XML file.
There is a useful article at : https://www.sqlshack.com/import-xml-documents-into-sql-server-tables-using-ssis-packages/

SQL Server parse XML to table - multiple node with the same name

I would like to parse XML into a table in SQL Server 2012 where my XML has nodes with the same name.
My SQL query which return only the first row:
SELECT
[date] = Node.Data.value('(date)[1]', 'NVARCHAR(MAX)'),
name = Node.Data.value('(name)[1]', 'VARCHAR(MAX)')
FROM
#xml.nodes('result/subject') Node(Data)
XML sample
<result>
<subject>
<date>2019-06-03</date>
<name>AZGREX</name>
<name>ABGDFC</name>
<name>WWGDFW</name>
<name>FDSFSD</name>
<name>FSDWEW</name>
<name>CXZCXZ</name>
<name>GWGRE</name>
</subject>
</result>
You need to use nodes in the FROM:
DECLARE #XML xml = '<result>
<subject>
<date>2019-06-03</date>
<name>AZGREX</name>
<name>ABGDFC</name>
<name>WWGDFW</name>
<name>FDSFSD</name>
<name>FSDWEW</name>
<name>CXZCXZ</name>
<name>GWGRE</name>
</subject>
</result>';
SELECT r.[subject].value('(date/text())[1]','date') AS [date],
s.[name].value('(./text())[1]','varchar(6)') AS [name] --obviously, you'll likely need a larger length
FROM (VALUES(#XML))V(X)
CROSS APPLY V.X.nodes('result/subject') r([subject])
CROSS APPLY r.[subject].nodes('name') s([name]);

Need help to format output of SQL Server XML sibling query

Consider the following SQL Server XML output:
<CUSTOMER>
<CUST_ID>TEST_CUSTOMER_01</CUST_ID>
<ORG_CODE>MY_ORG</ORG_CODE>
<CUSTOMER_TYPE CUST_TYPE="RETAIL" />
<CUSTOMER_COUNTRY CTRY_CODE="US" />
</CUSTOMER>
It was generated by the following SQL statement.
SELECT
CUSTOMER.CUST_ID, CUSTOMER.ORG_CODE,
(SELECT CUSTOMER_TYPE.CUST_TYPE
FROM CUSTOMER_TYPE
WHERE CUSTOMER.CUST_ID = CUSTOMER_TYPE.CUSTOMER_ID
FOR XML AUTO, TYPE),
(SELECT CUSTOMER_COUNTRY.CTRY_CODE
FROM CUSTOMER_COUNTRY
WHERE CUSTOMER.CUST_ID = CUSTOMER_COUNTRY.CUSTOMER_ID
FOR XML AUTO, TYPE)
FROM
CUSTOMER
WHERE
CUSTOMER.CUST_ID = 'TEST_CUSTOMER_01'
FOR XML AUTO, ELEMENTS
GO
It's required that the output look like the output below. Substituting ELEMENTS for the two TYPE words in the query above doesn't do it.
How then do I do it?
<CUSTOMER>
<CUST_ID>TEST_CUSTOMER_01</CUST_ID>
<ORG_CODE>MY_ORG</ORG_CODE>
<CUSTOMER_TYPE>
<CUST_TYPE>SHIP_TO</CUST_TYPE>
</CUSTOMER_TYPE>
<CUSTOMER_COUNTRY>
<CTRY_CODE>US</CTRY_CODE>
</CUSTOMER_COUNTRY>
</CUSTOMER>
Thanks!
The best (best in usage and performance!) is FOR XML PATH. It is very intuitive and easy to define any output you want simply by naming them.
SELECT
'TEST_CUSTOMER_01' AS [CUST_ID]
,'MY_ORG' AS [ORG_CODE]
,'SHIP_TO' AS [CUSTOMER_TYPE/CUST_TYPE]
,'US' AS [CUSTOMER_COUNTRY/CTRY_CODE]
--FROM
-- CUSTOMER
--WHERE
-- CUSTOMER.CUST_ID = 'TEST_CUSTOMER_01'
FOR XML PATH('CUSTOMER')
Try this query.
select CUST_ID,ORG_CODE,( SELECT CUSTOMER_TYPE.CUST_TYPE AS CUST_TYPE
FROM #customer_type CUSTOMER_TYPE
WHERE CUSTOMER.CUST_ID = CUSTOMER_TYPE.CUSTOMER_ID
FOR XML path (''),TYPE) AS 'CUSTOMER_TYPE' ,(
SELECT CUSTOMER_COUNTRY.CTRY_CODE as CTRY_CODE
FROM #customer_country CUSTOMER_COUNTRY
WHERE CUSTOMER.CUST_ID = CUSTOMER_COUNTRY.CUSTOMER_ID
for xml path (''),TYPE
) AS 'CUSTOMER_COUNTRY' from
#customer CUSTOMER
for xml auto, ELEMENTS
Using the above query you will get the result as
<CUSTOMER>
<CUST_ID>TEST_CUSTOMER_01</CUST_ID>
<ORG_CODE>MY_ORG</ORG_CODE>
<CUSTOMER_TYPE>
<CUST_TYPE>SHIP_TO</CUST_TYPE>
</CUSTOMER_TYPE>
<CUSTOMER_COUNTRY>
<CTRY_CODE>US</CTRY_CODE>
</CUSTOMER_COUNTRY>
</CUSTOMER>

Is it possible to Parse this XML file using SQL Server or SSIS?

I have an XML file I receive from a financial data provider called MDM. It shows which dividends I have downloaded. It includes information like Dividend rate, date and the security requested.
I have tried to parse this with SSIS using the XML source and Merge Join. I also tried SQL Server 2012 using Open XML and couldn't do it. I gathered these techniques from Youtube, google and searching this board. My goal is for this to be in an easy to read table format so I can see which securities received dividend information.
When I tried to Parse it with SSIS the following output names were given
FIELD
SECURITY
APP_PARAM
MDM_MESSAGE
The XML file is too big to post entirely here but I have included a Dropbox link that hopefully works. Hopefully someone can help. I am not sure if this file, though XML can be parsed or not.
https://dl.dropboxusercontent.com/u/29851290/parse_file.xml
Ideally the output will be something like this.
SYM_TYPE_ID SEC_SYMBOL SEC_TYPE_ID FOR DATE EX_DT PAY_DT WASH_AMOUNT RATE TICKER
aapl csus (Not sure) 5/15/2013 6/1/2013 (Not Sure) 0.25
Try this one -
DECLARE #XML XML
SELECT #XML = CONVERT (XML, [BulkColumn])
FROM OPENROWSET (BULK N'C:\parse_file.xml', SINGLE_BLOB) [XmlData]
SELECT PROV_ID = t.c.value('../#PROV_ID', 'VARCHAR(25)')
, SYM_TYPE_ID = t.c.value('../#SYM_TYPE_ID', 'VARCHAR(25)')
, SEC_SYMBOL = t.c.value('../#SEC_SYMBOL', 'VARCHAR(25)')
, SEC_TYPE_ID = t.c.value('../#SEC_TYPE_ID', 'VARCHAR(25)')
, LOCAL_NAME = t.c.value('#LOCAL_NAME', 'VARCHAR(25)')
, FOR_DATE = t.c.value('#FOR_DATE', 'DATETIME')
, FIELD = t.c.value('.', 'VARCHAR(25)')
FROM #XML.nodes('root/MDM_MESSAGE[2]/SECURITY/FIELD') t(c)
WHERE t.c.value('#SEC_SYMBOL', 'VARCHAR(25)') = '57636Q104'
UNION ALL
SELECT PROV_ID = t.c.value('#PROV_ID', 'VARCHAR(25)')
, SYM_TYPE_ID = t.c.value('#SYM_TYPE_ID', 'VARCHAR(25)')
, SEC_SYMBOL = t.c.value('#SEC_SYMBOL', 'VARCHAR(25)')
, SEC_TYPE_ID = t.c.value('#SEC_TYPE_ID', 'VARCHAR(25)')
, LOCAL_NAME = NULL
, FOR_DATE = NULL
, FIELD = NULL
FROM #XML.nodes('root/MDM_MESSAGE[2]/SECURITY') t(c)
WHERE t.c.exist('FIELD') = 0
AND t.c.value('#SEC_SYMBOL', 'VARCHAR(25)') = '57636Q104'

SQL Server XML Type Select Where Attribute = X From Any Tag

select *
from tablename
where CONVERT(xml, Sections).value('(/sections/section/#value)[1]', 'varchar(1)') = 'f'
will properly retrieve a record with the following value in the Sections column:
<sections><section value="f" priority="4" /><section value="a" priority="4" /></sections>
But misses this:
<sections><section value="w" priority="4" /><section value="f" priority="4" /></sections>
Obviously this is the problem "/sections/section/#value)[1]" but I don't understand the syntax and Google hasn't been too helpful. I found some code that got me this far, but I don't know how to modify it so that it will look through all tags instead of just the first one. I tried dropping the [1] but that gave the following error:
XQuery [value()]: 'value()' requires a singleton (or empty sequence), found operand of type 'xdt:untypedAtomic *'
You can use exist().
select *
from tablename
where CONVERT(xml, Sections).exist('/sections/section[#value = "f"]') = 1
If you want to use some dynamic value instead a hard coded f in the query you can use sql:variable().
declare #Value varchar(10) = 'f'
select *
from tablename
where CONVERT(xml, Sections).exist('/sections/section[#value = sql:variable("#Value")]') = 1
If you have multiple entries of an XML tag, you need to use the .nodes() XQuery method:
select
*,
Sections(Section).value('(#value)[1]', 'varchar(1)')
from tablename
cross apply CONVERT(xml, Sections).nodes('/sections/section') AS Sections(Section)
With this, you create a "pseudo-table" called Sections(Section) that contains one XML row for each element that matches your XPath (for each <section> under <sections>). You can then reach into this pseudo-table and extract individual bits of information from those XML "rows" using hte usual .value() method

Resources