T- SQL transform unstructured XML into columns - sql-server

I have a table from a vendor application that stores some xml data into a column of type varchar(200).
Table structure and sample data is here
declare #table table
(
MerchantID int not null
,Data varchar(200) not null)
insert into #table
select 1, '<product><productID>1</productID><pdesc>ProductDesc</pdesc></product>'
union all
select 2, '<product><itemid>1</itemid><itemname>name of item</itemname></product>'
Is there a way to transform raw xml data into relation format like below in a stored procedure?
for e.g when merchantID passed is 1
MerchantID productID pdesc
1 1 Product Desc
when MerchantID pass is 2 output should be
MerchantID itemid itemname
2 1 name of item

You can use XPath in SQL Server to access XML data nodes.
Here's an example, using your data.
declare #test xml
set #test = '<product><productID>1</productID><pdesc>ProductDesc</pdesc></product>'
SELECT
#test.value('(/product/productID/node())[1]', 'nvarchar(max)') as productID,
#test.value('(/product/pdesc/node())[1]', 'nvarchar(max)') as pdesc
From there, you should be able to perform your union like so:
SELECT 1,
xmlfield1.value('(/product/productID/node())[1]', 'int') as id,
xmlfield1.value('(/product/pdesc/node())[1]', 'nvarchar(max)') as desc
union
SELECT 2,
xmlfield2.value('/product/itemid/node())[1]', 'int') as id,
xmlfield2.value('/product/itemname/node())[1]', 'nvarchar(max)') as desc
if your data is in the same column, you can use a case statement to resolve it.
case
when merchantId = 1 data.value('(/product/productID/node())[1]', 'int')
else data.value('/product/itemid/node())[1]', 'int')
end as id

Related

SQL Server extract data from XML column without tag names

I have an XML string:
<XML>
<xml_line>
<col1>1</col1>
<col2>foo 1</col2>
</xml_line>
<xml_line>
<col1>2</col1>
<col2>foo 2</col2>
</xml_line>
</XML>
I am extracting data from that string (stored in #data_xml) by storing it in SQL Server table and parsing it:
-- create temp table, insert XML string
CREATE TABLE table1 (data_xml XML)
INSERT table1
SELECT #data_xml
-- parse XML string into temp table
SELECT
N.C.value('col1[1]', 'int') col1_name,
N.C.value('col2[1]', 'varchar(31)') col2_name,
FROM
table1
CROSS APPLY
data_xml.nodes('//xml_line') N(C)
I would like to know if there is a generic way to accomplish the same without specifying column names (i.e. col1[1], col2[1])
You can use something like:
SELECT
N.C.value('let $i := . return count(//xml_line[. << $i]) + 1', 'int') as LineNumber,
Item.Node.value('local-name(.)', 'varchar(max)') name,
Item.Node.value('.', 'varchar(max)') value
FROM
table1
CROSS APPLY
data_xml.nodes('//xml_line') N(C)
CROSS APPLY
N.C.nodes('*') Item(Node)
To get:
LineNumber
name
value
1
col1
1
1
col2
foo 1
2
col1
2
2
col2
foo 2
See this db<>fiddle.
However, to spread columns horizontally, you will need to generate dynamic SQL after querying for distinct element names.
ADDENDUM: Here is an updated db<>fiddle that also shows a dynamic SQL example.
The above maps all values as VARCHAR(MAX). If you have NVARCHAR data you can make the appropriate changes. If you have a need to map specific columns to specific types, you will need to explicitly define and populate a name-to-type mapping table and incorporate that into the dynamic SQL logic. The same may be necessary if you prefer that the result columns be in a specific order.
ADDENDUM 2: This updated db<>fiddle now includes column type and ordering logic.
--------------------------------------------------
-- Extract column names
--------------------------------------------------
DECLARE #Names TABLE (name VARCHAR(100))
INSERT #Names
SELECT DISTINCT Item.Node.value('local-name(.)', 'varchar(max)')
FROM table1
CROSS APPLY data_xml.nodes('//xml_line/*') Item(Node)
--SELECT * FROM #Names
--------------------------------------------------
-- Define column-to-type mapping
--------------------------------------------------
DECLARE #ColumnTypeMap TABLE ( ColumnName SYSNAME, ColumnType SYSNAME, ColumnOrder INT)
INSERT #ColumnTypeMap
VALUES
('col1', 'int', 1),
('col2', 'varchar(10)', 2)
DECLARE #ColumnTypeDefault SYSNAME = 'varchar(max)'
--------------------------------------------------
-- Define SQL Templates
--------------------------------------------------
DECLARE #SelectItemTemplate VARCHAR(MAX) =
' , N.C.value(<colpath>, <coltype>) <colname>
'
DECLARE #SqlTemplate VARCHAR(MAX) =
'SELECT
N.C.value(''let $i := . return count(//xml_line[. << $i]) + 1'', ''int'') as LineNumber
<SelectItems>
FROM
table1
CROSS APPLY
data_xml.nodes(''//xml_line'') N(C)
'
--------------------------------------------------
-- Expand SQL templates into SQL
--------------------------------------------------
DECLARE #SelectItems VARCHAR(MAX) = (
SELECT STRING_AGG(SI.SelectItem, '')
WITHIN GROUP(ORDER BY ISNULL(T.ColumnOrder, 999), N.Name)
FROM #Names N
LEFT JOIN #ColumnTypeMap T ON T.ColumnName = N.name
CROSS APPLY (
SELECT SelectItem = REPLACE(REPLACE(REPLACE(
#SelectItemTemplate
, '<colpath>', QUOTENAME(N.name + '[1]', ''''))
, '<colname>', QUOTENAME(N.name))
, '<coltype>', QUOTENAME(ISNULL(T.ColumnType, #ColumnTypeDefault), ''''))
) SI(SelectItem)
)
DECLARE #Sql VARCHAR(MAX) = REPLACE(#SqlTemplate, '<SelectItems>', #SelectItems)
--------------------------------------------------
-- Execute
--------------------------------------------------
SELECT DynamicSql = #Sql
EXEC (#Sql)
Result (with some additional data):
LineNumber
col1
col2
bar
foo
1
1
foo 1
null
More
2
2
foo 2
Stuff
null

How to parse XML from table in SQL Server

I have data in XML in column in table
SELECT ObjectXML
FROM DispOps_Events
[ObjectXML] [nvarchar](max) NOT NULL
A sample of the XML data:
<Document>
<DocumentId>3352597</DocumentId>
<DocumentFullPath>xxx</DocumentFullPath>
<Category>xxx</Category>
<ClientId>xxx</ClientId>
<ApplicationNumber>xxx</ApplicationNumber>
<ContractNumber>xxx</ContractNumber>
<Created>xxx</Created>
<Creator>xxx</Creator>
</Document>
And I need get data from DocumentId>XXXX/DocumentId> and insert into #tmpTable.
So 1. I cast varchar(max) to xml
select CAST(ObjectXML as XML) as fileXML
INTO #tmpXML
FROM DispOps_Events T WHERE MetastormMapName = 'DocumentsMap'
I tried
select
m.c.value('#DocumentId', 'varchar(max)') as DocumentId
--into #tmpTable
from #tmpXML as s
outer apply s.fileXML.nodes('Document/DocumentId') as m(c)
Error:
null data in the table
You don't need #temp tables to do this, you can just cast the nvarchar(max) data to the xml data type in a single query, e.g.:
/*
* Setup test data...
*/
drop table if exists dbo.DispOps_Events;
create table dbo.DispOps_Events (
ID int not null identity(1,1),
ObjectXML nvarchar(max)
);
insert dbo.DispOps_Events (ObjectXML) values
(N'<Document><DocumentId>2554742</DocumentId><!--...--></Document>'),
(N'<Document><DocumentId>2576868</DocumentId><!--...--></Document>'),
(N'<Document><DocumentId>2576869</DocumentId><!--...--></Document>'),
(N'<Document><DocumentId>2576870</DocumentId><!--...--></Document>');
/*
* Query XML...
*/
select ID, [DocumentId] = Document.DocumentId.value('text()[1]', 'nvarchar(50)')
from dbo.DispOps_Events
cross apply ( select try_cast(ObjectXML as xml) ) Transformers(RoolyTroolyXml)
cross apply RoolyTroolyXml.nodes('/Document/DocumentId') as Document(DocumentId);
ID
DocumentId
1
2554742
2
2576868
3
2576869
4
2576870

SQL Server XML output with CDATA into xml variable

Following this question, I need to put the select result into a xml variable. How to do this avoiding the error message "The FOR XML and FOR JSON clauses are invalid in views, inline functions, derived tables, and subqueries when they contain a set operator. To work around, wrap the SELECT containing a set operator using derived table or common table expression or view and apply FOR XML or FOR JSON on top of it.", please?
Here the test code:
declare #agent table
(
AgentID int,
Fname varchar(5),
SSN varchar(11)
)
insert into #agent
select 1, 'Vimal', '123-23-4521' union all
select 2, 'Jacob', '321-52-4562' union all
select 3, 'Tom', '252-52-4563'
SELECT
1 AS Tag,
NULL AS Parent,
NULL AS 'Agents!1!',
NULL AS 'Agent!2!AgentID',
NULL AS 'Agent!2!Fname!Element',
NULL AS 'Agent!2!SSN!cdata'
UNION ALL
SELECT
2 AS Tag,
1 AS Parent,
NULL,
AgentID,
Fname,
SSN
FROM #agent
FOR XML EXPLICIT
And here an example of what I want to do:
Declare #xml xml
...
set #xml= (SELECT
1 AS Tag,
NULL AS Parent,
NULL AS 'Agents!1!',
NULL AS 'Agent!2!AgentID',
NULL AS 'Agent!2!Fname!Element',
NULL AS 'Agent!2!SSN!cdata'
UNION ALL
SELECT
2 AS Tag,
1 AS Parent,
NULL,
AgentID,
Fname,
SSN
FROM #agent
FOR XML EXPLICIT)
This is tricky...
You can move the UNION ALL part to a CTE like here. The problem is not the FOR XML but rahter the UNION (be carefull, it might be necessary to add an ORDER BY clause):
DECLARE #xml XML;
WITH UnionAllCte AS
(
SELECT
1 AS Tag,
NULL AS Parent,
NULL AS 'Agents!1!',
NULL AS 'Agent!2!AgentID',
NULL AS 'Agent!2!Fname!Element',
NULL AS 'Agent!2!SSN!cdata'
UNION ALL
SELECT
2 AS Tag,
1 AS Parent,
NULL,
AgentID,
Fname,
SSN
FROM #agent
)
SELECT #xml=
(
SELECT * FROM UnionAllCte
FOR XML EXPLICIT
)
SELECT #xml;
And you should be aware, that CDATA as well as FOR XML EXPLICIT are outdated. Rather use FOR XML PATH() and for reading the appropriate methods the XML data type provides.
You might read this and read the following links too!

Get the Missing and Excess tags from a XML field

I have a table, Customer(Id int,Name nvarchar(100),Detail xml)
Sample Data:
1,'Abc','<ROOT> <TAG1>False</TAG1> <TAG3>value</TAG3> <TAG14>value</TAG14> </ROOT>'
2,'Pqr','<ROOT> <TAG2>False</TAG2> <TAG8>value</TAG8> <TAG11>value</TAG11> </ROOT>'
Also I have XML variable , #v_xml = '<ROOT> <TAG1>value</TAG1> <TAG2>value</TAG2> <TAG8>False</TAG8> <TAG14>False</TAG14> </ROOT>'.
Now I want get the Missing Tags and Excess Tags (in XML format) of each Customer comparing to the XML variable #v_xml (No need to consider the value, what ever it may be)
Expected Result:
Id Name Missing Excess
1,'Abc','<ROOT><TAG2>value</TAG2> <TAG8>value</TAG8> </ROOT>','<ROOT><TAG3>value</TAG3> </ROOT>'
2,'Pqr','<ROOT><TAG1>value</TAG1> <TAG14>False</TAG14> </ROOT>','<ROOT><TAG11>value</TAG11> </ROOT>'
There is no nested nodes/level in the XML. Only direct child elements under ROOT tag. But the number of child tags will vary. I am looking for a simple and common logic to resolve this (with or without SQL query).
Main idea parse tag name (local-name(.)) and concat diffs into xml
DECLARE #t TABLE (
Id INT PRIMARY KEY,
Name VARCHAR(50),
X XML
)
INSERT INTO #t
VALUES
(1, 'Abc', N'<ROOT><TAG1>False</TAG1><TAG3>value</TAG3><TAG14>value</TAG14></ROOT>'),
(2, 'Pqr', N'<ROOT><TAG2>False</TAG2><TAG8>value</TAG8><TAG11>value</TAG11></ROOT>')
DECLARE #x XML = N'<ROOT><TAG1>value</TAG1><TAG2>value</TAG2><TAG8>False</TAG8><TAG14>False</TAG14></ROOT>'
SELECT t.Id, t.Name, t2.val.query('Missing/*'), t2.val.query('Excess/*')
FROM #t t
CROSS APPLY (
SELECT
Missing = Missing.query,
Excess = Excess.query
FROM (
SELECT
query = t.c.query('.'),
tag = t.c.value('local-name(.)', 'SYSNAME')
FROM x.nodes('*/*') t(c)
) Excess
FULL JOIN (
SELECT
query = t.c.query('.'),
tag = t.c.value('local-name(.)', 'SYSNAME')
FROM #x.nodes('*/*') t(c)
) Missing ON Missing.tag = Excess.tag
WHERE Missing.tag IS NULL
OR Excess.tag IS NULL
FOR XML PATH(''), TYPE
) t2 (val)
Output -
----------- ---------- ------------------------- ------------------------------------------
1 Abc <TAG3>value</TAG3> <TAG2>value</TAG2><TAG8>False</TAG8>
2 Pqr <TAG11>value</TAG11> <TAG1>value</TAG1><TAG14>False</TAG14>

Grouping XML Elements in FOR XML Clause

I am trying to create a structure xml document from my temp table .The temp table is in the following format .
CREATE TABLE #Temp1 ( Name Char( 30 ), seqid integer, salary int );
INSERT INTO #Temp1 VALUES('DEAL' ,123,6)
INSERT INTO #Temp1 VALUES('DEAL' ,56,6)
INSERT INTO #Temp1 VALUES('TRACNHE' ,1253,56)
INSERT INTO #Temp1 VALUES('TRACNHE' ,5,65)
INSERT INTO #Temp1 VALUES('ASSET' ,56,23)
I am trying to create an xml format in the following form :
<Response>
<Deal>
<seqid="123" salary="6" />
<seqid="56" salary="6" />
<Deal>
<TRACNHE>
<seqid="1253" salary="56"/>
<seqid="5" salary="65"/>
</TRACNHE>
<ASSET>
<seqid="56" salary="23"/>
</ASSET>
</Response>
SELECT Name, (SELECT SEQID FROM #TEMP1 T WHERE T.Name = T1.Name)
FROM (SELECT DISTINCT NAME FROM #TEMP1 ) T1
FOR XML PATH('rEPONSE')
DROP TABLE #Temp1
DROP TABLE #Temp1
I tried the above query but says that subquery returned more than 1 value
Could you let me know as to what i am missing in this query .
Is there a better way to handle this scenario.
Thanks in advance
based on your requirement, i'm seeing there are 2 types of complexities
You are trying to get the xml with grouped items.
For each group trying to create an xml element with two attributes
without any proper name
<seqid="1253" salary="56"/>
instead of
<ss seqid="1253" salary="56"/>
just look into this below query, it may help
SELECT
(SELECT
seqid 'ss/#seqid'
, salary 'ss/#salary'
FROM Temp1 as t where t.Name = 'Deal'
FOR XML PATH('Deal') , TYPE
) ,
(SELECT
seqid 'ss/#seqid'
, salary 'ss/#salary'
FROM Temp1 as t where t.Name = 'TRACNHE'
FOR XML PATH('TRACNHE') , TYPE
) ,
(SELECT
seqid 'ss/#seqid'
, salary 'ss/#salary'
FROM Temp1 as t where t.Name = 'ASSET'
FOR XML PATH('ASSET') , TYPE
)
FOR XML PATH(''), ROOT('Response');

Resources