Hi I been trying to insert into two tables (groups and fields) from XML in SQL. But the solution either doesn't fix my problem or performance is slow as Groups and Fields can number in hundreds of thousands.
A sample of the XML:
<?xml version="1.0" encoding="utf-16"?>
<FB_Flow
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:xsd="http://www.w3.org/2001/XMLSchema" id="1">
<groups>
<FB_FlowGroup counter="1125" position="2" positionparent="0" id="0">
<fields>
<FB_FlowField>
<value>TEST1</value>
<counter>111</counter>
<lineposition>1</lineposition>
</FB_FlowField>
<FB_FlowField>
<value>TEST2</value>
<counter>222</counter>
<lineposition>2</lineposition>
<groupid>0</groupid>
</FB_FlowField>
<FB_FlowField>
<value>TEST3</value>
<counter>333</counter>
<lineposition>3</lineposition>
</FB_FlowField>
</fields>
</FB_FlowGroup>
<FB_FlowGroup counter="1126" position="3" positionparent="2" id="0">
<fields>
<FB_FlowField>
<value>TEST1</value>
<counter>18</counter>
<lineposition>1</lineposition>
</FB_FlowField>
</fields>
</FB_FlowGroup>
</groups>
</FB_Flow>
The first part works fine (To get a list of all groups)
insert into #Groups (intGroupCounter,intGroupPosition,intGroupPositionParent)
SELECT
gcounter = Groups.value('#counter[1]', 'int'),
gposition = Groups.value('#position[1]', 'int'),
gpositionparent = Groups.value('#positionparent[1]', 'int')
FROM
#FlowXML.nodes('/FB_Flow/groups/FB_FlowGroup') AS XTbl(Groups)
This second part fails for the most part (To get all fields with the parent group position):
insert into #Fields (intGroupPosition,vFieldValue,intFieldCounter,intFieldPosition)
SELECT
gposition = XTbl.Groups.value('#position', 'int'),
fValue = XTbl2.Fields.value('value[1]', 'varchar(max)'),
fcounter = XTbl2.Fields.value('counter[1]', 'int'),
fposition = XTbl2.Fields.value('lineposition[1]', 'int')
FROM
#FlowXML.nodes('/FB_Flow/groups/FB_FlowGroup') AS XTbl(Groups)
cross APPLY
Groups.nodes('fields/FB_FlowField') AS XTbl2(Fields)
I have been getting around this by using a cursor and selecting the group by the position attribute but the performance is very poor.
DECLARE #GroupCounter int,
#GroupPosition int,
#GroupPositionParent int,
#GroupID int
DECLARE #Groups table
(
intGroupCounter int not null,
intGroupPosition int not null,
intGroupPositionParent int null default 0
)
insert into #Groups (intGroupCounter,intGroupPosition,intGroupPositionParent)
SELECT
gcounter = Groups.value('#counter[1]', 'int'),
gposition = Groups.value('#position[1]', 'int'),
gpositionparent = Groups.value('#positionparent[1]', 'int')
FROM
#FlowXML.nodes('/FB_Flow/groups/FB_FlowGroup') AS XTbl(Groups)
DECLARE cur cursor for
SELECT
intGroupCounter,
intGroupPosition,
intGroupPositionParent
FROM
#Groups
OPEN cur
FETCH NEXT FROM cur INTO #GroupCounter, #GroupPosition, #GroupPositionParent
WHILE ##FETCH_STATUS = 0
BEGIN
insert into FB_T_FlowGroups (FH_ID,DTC_GroupCounter,Position,PositionParent)
values (#FlowHeaderID,#GroupCounter,#GroupPosition,#GroupPositionParent)
select #GroupID = ##IDENTITY
--declare #Path varchar(max) = '/FB_Flow/groups/FB_FlowGroup[#position="sql:variable("#GroupPosition")"]/fields/FB_FlowField'
insert into FB_T_FlowGroupField (FlowGroupID,ItemValue,DTC_ItemCounter)
SELECT
#GroupID,
XTbl.Fields.value('value[1]', 'varchar(max)'),
XTbl.Fields.value('counter[1]', 'int')
FROM
#FlowXML.nodes('/FB_Flow/groups/FB_FlowGroup[#position=sql:variable("#GroupPosition")]/fields/FB_FlowField') AS XTbl(Fields)
FETCH NEXT FROM cur INTO #GroupCounter, #GroupPosition, #GroupPositionParent
END
CLOSE cur
DEALLOCATE cur
Any Ideas?
What is your SQL Server version (SELECT ##VERSION;)?
Please try the following approach without a cursor. It should give you a tremendous performance improvement:
XML attributes don't need [1] position. Attributes are always unique.
XML elements need an adjustment in the XPath expression - text().
SQL
DECLARE #FlowXML XML =
N'<FB_Flow xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:xsd="http://www.w3.org/2001/XMLSchema" id="1">
<groups>
<FB_FlowGroup counter="1125" position="2" positionparent="0" id="0">
<fields>
<FB_FlowField>
<value>TEST1</value>
<counter>111</counter>
<lineposition>1</lineposition>
</FB_FlowField>
<FB_FlowField>
<value>TEST2</value>
<counter>222</counter>
<lineposition>2</lineposition>
<groupid>0</groupid>
</FB_FlowField>
<FB_FlowField>
<value>TEST3</value>
<counter>333</counter>
<lineposition>3</lineposition>
</FB_FlowField>
</fields>
</FB_FlowGroup>
<FB_FlowGroup counter="1126" position="3" positionparent="2" id="0">
<fields>
<FB_FlowField>
<value>TEST1</value>
<counter>18</counter>
<lineposition>1</lineposition>
</FB_FlowField>
</fields>
</FB_FlowGroup>
</groups>
</FB_Flow>';
-- insert into #Groups (intGroupCounter,intGroupPosition,intGroupPositionParent)
SELECT gcounter = Groups.value('#counter', 'INT')
, gposition = Groups.value('#position', 'INT')
, gpositionparent = Groups.value('#positionparent', 'INT')
FROM #FlowXML.nodes('/FB_Flow/groups/FB_FlowGroup') AS XTbl(Groups);
--insert into #Fields (intGroupPosition,vFieldValue,intFieldCounter,intFieldPosition)
SELECT gposition = XTbl.Groups.value('#position', 'INT')
, fValue = XTbl2.Fields.value('(value/text())[1]', 'VARCHAR(MAX)')
, fcounter = XTbl2.Fields.value('(counter/text())[1]', 'INT')
, fposition = XTbl2.Fields.value('(lineposition/text())[1]', 'INT')
FROM #FlowXML.nodes('/FB_Flow/groups/FB_FlowGroup') AS XTbl(Groups)
CROSS APPLY Groups.nodes('fields/FB_FlowField') AS XTbl2(Fields);
Related
Can someone please help... I cannot seem to figure this out... I need to parse this using T-SQL. I have it saved as a file in the C:\Temp directory, named "OpenPos.XML". Any help would be very greatly appreciated. I need to be able to parse the XML and write it to a SQL table.
If I do an outer apply at all levels, it applies every PO # to every line instead of just for the lines related to that PO #.
XML File:
<?xml version="1.0" encoding="utf-8"?>
<acXML lang="en-us" xmlns="https://www.autocrib.net/acXMLSchema.xsd">
<Header>
<From>
<Company>Rolls Royce ATC (Indianapolis)</Company>
<Identity>rollsroyceatc-indianapolis</Identity>
<DatabaseName>AutoCribNet2</DatabaseName>
</From>
</Header>
<Request>
<OpenPurchaseOrderRequest ReqType="GET">
<PoNo>1716W</PoNo>
<ExternalPoNo />
<LineItems>
<Item>
<Line>1</Line>
<TagNo>1716</TagNo>
<VendorID>QMS</VendorID>
<ItemID>CARD BC6266</ItemID>
<ItemType>Expendable</ItemType>
<ItemRFID />
<Station>01</Station>
<Bin>0825-04-B-06</Bin>
<OrderQty>1</OrderQty>
<Received>1</Received>
<ReceivedBurn>0</ReceivedBurn>
<PackQty>1</PackQty>
<UnitCost>0.0000</UnitCost>
<UnitPrice>0.0000</UnitPrice>
<Lot />
<IsSpotBuy>False</IsSpotBuy>
<SpotTranCode>0</SpotTranCode>
<Inspect>False</Inspect>
<InspDate />
<InspOnHand>0</InspOnHand>
<InspBurn>0</InspBurn>
<OrderDate>01-13-2022</OrderDate>
<DueDate>01-25-2022</DueDate>
<PromiseDt>01-25-2022</PromiseDt>
<ReceiveDt />
<Department />
<Job />
<Machine />
<Reason />
<Part />
<Processed>False</Processed>
</Item>
</LineItems>
</OpenPurchaseOrderRequest>
<OpenPurchaseOrderRequest ReqType="GET">
SQL:
--Open POs
SELECT PO_X = CAST(BulkColumn AS xml)
INTO #PO
FROM OPENROWSET(BULK 'C:\AutoCrib WebServices\XML Files\ATC\OpenPOs.XML', SINGLE_CLOB) t
SELECT [Company] = p.PO_X.value('(//*:Header/*:From/*:Company)[1]', 'varchar(150)')
, [Identity] = f.value('(*:Identity)[1]', 'varchar(200)')
, [DatabaseNM] = f.value('(*:DatabaseName)[1]', 'varchar(50)')
, [PoNo] = r.value('(*:PoNo)[1]', 'varchar(10)')
, [ItemLine] = i.value('(*:Line)[1]', 'int')
, [ItemTagNo] = i.value('(*:TagNo)[1]', 'varchar(10)')
, [ItemVendorID] = i.value('(*:VendorID)[1]', 'varchar(100)')
, [ItemID] = i.value('(*:ItemID)[1]', 'varchar(30)')
, [ItemRFID] = i.value('(*:ItemRFID)[1]', 'varchar(50)')
, [ItemStation] = i.value('(*:Station)[1]', 'varchar(5)')
, [ItemBin] = i.value('(*:Bin)[1]', 'varchar(30)')
, [ItemOrderQty] = i.value('(*:OrderQty)[1]', 'int')
, [ItemReceived] = i.value('(*:Received)[1]', 'int')
, [ItemReceivedBurn] = i.value('(*:ReceivedBurn)[1]', 'int')
, [ItemPackQty] = i.value('(*:PackQty)[1]', 'int')
, [ItemUnitCost] = i.value('(*:UnitCost)[1]', 'money')
, [ItemUnitPrice] = i.value('(*:UnitPrice)[1]', 'money')
, [ItemOrderDate] = i.value('(*:OrderDate)[1]', 'datetime')
, [ItemDueDate] = i.value('(*:DueDate)[1]', 'datetime')
, [ItemPromiseDate] = i.value('(*:PromiseDt)[1]', 'datetime')
, [ItemReceiveDate] = i.value('(*:ReceiveDt)[1]', 'datetime')
, [ItemProcessed] = i.value('(*:Processed)[1]', 'varchar(10)')
FROM #PO p
OUTER APPLY p.PO_X.nodes('//*:Header/*:From') a(f)
OUTER APPLY p.PO_X.nodes('//*:Request/*:OpenPurchaseOrderRequest') c(r)
OUTER APPLY p.PO_X.nodes('//*:Request/*:OpenPurchaseOrderRequest/*:LineItems/*:Item') l(i)
DROP TABLE #PO
Start with loading the file. This uses OPENROWSET, but you might find BULK INSERT more flexible.
SELECT DepX = CAST(BulkColumn AS xml)
INTO #Departments
FROM OPENROWSET(BULK 'C:\Temp\Departments.XML', SINGLE_CLOB) t
--SINGLE_CLOB|SINGLE_NCLOB for Ascii vs Unicode
Now, we can query the xml
SELECT [From.Company] = d.DepX.value('(//*:Header/*:From/*:Company)[1]', 'varchar(99)')
/* Shorter paths if we OUTER APPLY the root we're interested in */
, [From.Company Shortcut] = f.value('(*:Company)[1]', 'varchar(99)')
FROM #Departments d
OUTER APPLY d.DepX.nodes('//*:Header/*:From') a(f)
Results
From.Company
From.Company Shortcut
Test Company (Indianapolis)
Test Company (Indianapolis)
For help with xml shredding, try Jacob Sebastian's SELECT * FROM XML
The output is helpful, but taking the time to go through and understand how it works is very educational.
This uses namespace wildcard *:
More complex xml might require WITH XMLNAMESPACE
I was finally able to figure it out. I was missing the namespace. Here's my SQL logic:
DECLARE #XML XML =
(SELECT *
FROM OPENROWSET(BULK 'C:\Temp\OpenPOs.XML', SINGLE_CLOB) t)
;WITH XMLNAMESPACES (DEFAULT 'https://www.autocrib.net/acXMLSchema.xsd')
SELECT A.evnt.value('(PoNo/text())[1]','varchar(10)') AS Event_DriverId
,B.rec.value('(Line/text())[1]','int') AS Record_RecordId
FROM #XML.nodes('/acXML/Request/OpenPurchaseOrderRequest') A(evnt)
OUTER APPLY A.evnt.nodes('LineItems/Item') B(rec);
I'm querying using OpenXML to retrieve the cap elements between the subject elements in XML I don't want the cap between the support elemements. The query works great to retrieve one value but fails when there are multiple element nodes.
<First>
<Test id="83847">
<subject>
<cap>15</cap>
<cap>25</cap>
<cap>100</cap>
</subject>
<support>
<cap>9</cap>
</support>
</Test>
<Test id="83848">
<subject>
<cap>150</cap>
<cap>2</cap>
<cap>10</cap>
</subject>
<support>
<cap>9</cap>
</support>
</Test>
</First>
CREATE Table #XmlTemp(XmlField Xml);
Set Nocount On;
Insert Into #XmlTemp(XmlField)
Select '<First>
<Test id="83847">
<subject>
<cap>15</cap>
<cap>25</cap>
<cap>100</cap>
</subject>
<support>
<cap>9</cap>
</support>
</Test>
<Test id="83848">
<subject>
<cap>150</cap>
<cap>2</cap>
<cap>10</cap>
</subject>
<support>
<cap>9</cap>
</support>
</Test>
</First>'As XmlField;
Declare #xmlData Xml;
Select #xmlData = XmlField From #XmlTemp;
Declare #document int;
Exec sp_xml_preparedocument #document Output, #xmlData, NULL;
SELECT ID,Cap FROM(
SELECT ID,Cap FROM OpenXml(#document,'./First/Test', 0) With (ID varchar(max)'./#id', Cap Varchar(max) './subject/cap')) alias
drop table #xmltemp
It'd be fairly time consuming to change the query to use .nodes method more so because of the testing involved so I'd like it to stay as OpenXML if possible.
I'd only like to retrieve out the ID and then the multiple cap element values.
Thank you for your time.
I can't see why the query using .nodes is complex. Just
SELECT t.n.value('(/First/Test/#id)[1]', 'int') id
, t.n.value('(.)[1]', 'int') cap
from #xmlData.nodes('./First/Test/subject/cap') t(n);
And OpenXML version
SELECT ID,Cap FROM(
SELECT ID,Cap
FROM OpenXml(#document,'./First/Test/subject/cap', 0)
With (ID varchar(max) '/First/Test/#id'
, Cap Varchar(max) '.')) alias
Version for the edited question
SELECT ID,Cap FROM(
SELECT ID,Cap
FROM OpenXml(#document,'/First/Test/subject/cap', 0)
With (ID varchar(max) '../../#id'
, Cap Varchar(max) '.')) alias
It returns only subject/cap and #id of the proper parent:
ID Cap
1 83847 15
2 83847 25
3 83847 100
4 83848 150
5 83848 2
6 83848 10
Your XML is double nested. You have 1:n of <Test> elements within <First> and again 1:n of <cap> elements within <subject>.
The proper way to query this is diving into the XML strictly forward:
CREATE Table #XmlTemp(XmlField Xml);
Set Nocount On;
Insert Into #XmlTemp(XmlField)
Select '<First>
<Test id="83847">
<subject>
<cap>15</cap>
<cap>25</cap>
<cap>100</cap>
</subject>
<support>
<cap>9</cap>
</support>
</Test>
<Test id="83848">
<subject>
<cap>150</cap>
<cap>2</cap>
<cap>10</cap>
</subject>
<support>
<cap>9</cap>
</support>
</Test>
</First>'As XmlField;
--The query will use .nodes() to get all <Test> elements and again .nodes() to get the related <cap> elements:
SELECT t.value('#id', 'int') id
,c.value('text()[1]', 'int') cap
from #XmlTemp AS tbl
CROSS APPLY tbl.XmlField.nodes('/First/Test') AS A(t)
CROSS APPLY A.t.nodes('subject/cap') AS B(c);
GO
DROP TABLE #XmlTemp;
What am I not getting here? I can't get any return except NULL...
DECLARE #xml xml
SELECT #xml = '<SOAP-ENV:Envelope xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<SOAP-ENV:Body>
<webregdataResponse>
<result>0</result>
<regData />
<errorFlag>99</errorFlag>
<errorResult>Not Processed</errorResult>
</webregdataResponse>
</SOAP-ENV:Body>
</SOAP-ENV:Envelope>'
DECLARE #nodeVal int
SELECT #nodeVal = #xml.value('(errorFlag)[1]', 'int')
SELECT #nodeVal
Here is the solution:
DECLARE #xml xml
SELECT #xml = '<SOAP-ENV:Envelope xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<SOAP-ENV:Body>
<webregdataResponse>
<result>0</result>
<regData />
<errorFlag>99</errorFlag>
<errorResult>Not Processed</errorResult>
</webregdataResponse>
</SOAP-ENV:Body>
</SOAP-ENV:Envelope>'
declare #table table (data xml);
insert into #table values (#xml);
WITH xmlnamespaces (
'http://schemas.xmlsoap.org/soap/envelope/' as [soap])
SELECT Data.value('(/soap:Envelope/soap:Body/webregdataResponse/errorFlag)[1]','int') AS ErrorFlag
FROM #Table ;
Running the above SQL will return 99.
Snapshot of the result is given below,
That's because errorFlag is not the root element of your XML document. You can either specify full path from root element to errorFlag, for example* :
SELECT #nodeVal = #xml.value('(/*/*/*/errorFlag)[1]', 'int')
or you can use descendant-or-self axis (//) to get element by name regardless of it's location in the XML document, for example :
SELECT #nodeVal = #xml.value('(//errorFlag)[1]', 'int')
*: I'm using * instead of actual element name just to simplify the expression. You can also use actual element names along with the namespaces, like demonstrated in the other answer.
I have a SOAP response with below structure and need to get the all the values for the below tags 1. result , documentNumber , costElementCode .
This is my XML sample :
DECLARE #myXML XML = '<commitmentsResponse xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<result xmlns="http://response.cim.its.test.edu.au/">SUCCESS</result>
<value>
<documentNumber xmlns="http://finance.cim.its.test.edu.au/">123456789</documentNumber>
<commitmentLine>
<lineNumber>2</lineNumber>
<costElementCode>costElementCode</costElementCode>
<internalOrderNumber>1000002</internalOrderNumber>
<costCentreCode>9999</costCentreCode>
<wbsCode>3000</wbsCode>
<lineDescription>2 packets of pencils</lineDescription>
<accountNumber>100000</accountNumber>
<itemAmount>105.5</itemAmount>
<fundsDueDate>2015-06-15</fundsDueDate>
</commitmentLine>
<commitmentLine xmlns="http://finance.cim.its.test.edu.au/">
<lineNumber>2</lineNumber>
<costElementCode>costElementCode</costElementCode>
<internalOrderNumber>1000002</internalOrderNumber>
<costCentreCode>9999</costCentreCode>
<wbsCode>3000</wbsCode>
<lineDescription>2 packets of pencils</lineDescription>
<accountNumber>100000</accountNumber>
<itemAmount>105.5</itemAmount>
<fundsDueDate>2015-06-15</fundsDueDate>
</commitmentLine>
</value>
<value>
<documentNumber xmlns="http://finance.cim.its.test.edu.au/">12345</documentNumber>
<commitmentLine>
<lineNumber>2</lineNumber>
<costElementCode>costElementCode</costElementCode>
<internalOrderNumber>1000002</internalOrderNumber>
<costCentreCode>9999</costCentreCode>
<wbsCode>3000</wbsCode>
<lineDescription>2 packets of pencils</lineDescription>
<accountNumber>100000</accountNumber>
<itemAmount>105.5</itemAmount>
<fundsDueDate>2015-06-15</fundsDueDate>
</commitmentLine>
<commitmentLine xmlns="http://finance.cim.its.test.edu.au/">
<lineNumber>2</lineNumber>
<costElementCode>costElementCode</costElementCode>
<internalOrderNumber>1000002</internalOrderNumber>
<costCentreCode>9999</costCentreCode>
<wbsCode>3000</wbsCode>
<lineDescription>2 packets of pencils</lineDescription>
<accountNumber>100000</accountNumber>
<itemAmount>105.5</itemAmount>
<fundsDueDate>2015-06-15</fundsDueDate>
</commitmentLine>
</value>
</commitmentsResponse>'
I have tried using the below but only get the first value and not all nodes :
DECLARE #DocumentNumber INT
;WITH XMLNAMESPACES (N'http://finance.cim.its.test.edu.au/' as DYN)
SELECT #DocumentNumber = c.value('(DYN:documentNumber)[1]', 'INT')
FROM #myXML.nodes('/commitmentsResponse/value') t(c)
DECLARE #Result VARCHAR(256)
;WITH XMLNAMESPACES (N'http://response.cim.its.test.edu.au/' as DYN)
SELECT #Result = c.value('(DYN:result)[1]', 'VARCHAR(256)')
FROM #myXML.nodes('/commitmentsResponse') t(c)
DECLARE #CostElementCode VARCHAR(256)
SELECT #CostElementCode = c.value('(costElementCode)[1]', 'VARCHAR(256)')
FROM #myXML.nodes('/commitmentsResponse/value/commitmentLine') t(c)
SELECT #Result
SELECT #DocumentNumber
SELECT #CostElementCode
You can use OUTER APPLY to shred XML data type into multiple rows. This is one example for extracting multiple costElementCodes value (implementing for the other elements should be trivial) :
SELECT x.value('.[1]', 'VARCHAR(256)') as costElementCode
FROM #myXML.nodes('/commitmentsResponse/value/commitmentLine') t(c)
OUTER APPLY t.c.nodes('costElementCode') cec(x)
Please check below query.
declare #xmlRoot as xml
set #xmlRoot= '<Root>
<table1 col1="2012-03-02T16:42:55.777">
<table2Array>
<Table2 col2="abc">
</Table2>
<Table2 col2="def">
</Table2>
</table2Array>
</table1>
<table1 col1="2012-03-02T17:42:55.777">
<table2Array>
<Table2 col2="abc1">
</Table2>
<Table2 col2="def1">
</Table2>
</table2Array>
</table1>
</Root>'
declare #a as varchar(1)
set #a= '1'
SELECT
col1 = item.value('./#col2', 'varchar(10)')
FROM #xmlRoot.nodes('Root/table1[1]/table2Array/Table2' ) AS T(item);
--The above query return expected output
SELECT
col1 = item.value('./#col2', 'varchar(10)')
FROM #xmlRoot.nodes('Root/table1[*[local-name()=sql:variable("#a")]]/table2Array/Table2' )
AS T(item);
--The above query doesn't return expected output
what am I doing wrong here?
Since I dont have a key value in parent node to identify child node. I have to parse through index.
This worked for me:
DECLARE #a INT; -- data type is probably important!
SET #a = 1;
SELECT col1 = item.value('./#col2', 'varchar(10)')
FROM #xmlRoot.nodes('Root/table1[sql:variable("#a")]/table2Array/Table2') AS T(item);