Nesting XML elements with FOR XML, PATH - sql-server

I have a table EVENTS:
Name Event Gift Label
Jeff Birthday Card Happy Birthday
Jeff Birthday Present Jeff's prezzy
Maisey Promotion Cake Well Done!
Maisey Birthday Card Happy Birthday
Maisey Birthday Cake Another Year Older!
Here's the code to produce that table:
DECLARE #TempTable TABLE
(
[name] VARCHAR(250),
[event] VARCHAR(250),
[gift] VARCHAR(250),
[label] VARCHAR(2500)
);
INSERT INTO #TempTable([name], [event], [gift], [label]) VALUES
('Jeff','Birthday','Card','Happy Birthday'),
('Jeff','Birthday','Present','Jeff`s prezzy'),
('Maisey','Promotion','Cake','Well Done!'),
('Maisey','Birthday','Card','Happy Birthday'),
('Maisey','Birthday','Cake','Another Year Older!')
Which I would like to generate the following XML from:
<celebrations>
<person name="Jeff">
<events>
<event name="Birthday">
<gifts>
<gift name="Card" Value="Happy Birthday" />
<gift name="Present" Value="Jeff's prezzy" />
</gifts>
</event>
</events>
</person>
<person name="Maisey">
<events>
<event name="Promotion">
<gifts>
<gift name="Cake" value="Well Done!" />
</gifts>
</event>
<event name="Birthday">
<gifts>
<gift name="Card" value="Happy Birthday" />
<gift name="Cake" value="Another Year Older!" />
</gifts>
</event>
</events>
</person>
</celebrations>
My current SQLXML looks like this:
SELECT (
SELECT (
SELECT T1.*
FOR XML path('')
,root('gifts')
,type
)
FROM #TempTable AS T1
FOR XML path('person')
,type
)
FOR XML path('persons')
Which is currently producing this:
<persons>
<person>
<gifts>
<name>Jeff</name>
<event>Birthday</event>
<gift>Card</gift>
<label>Happy Birthday</label>
</gifts>
</person>
<person>
<gifts>
<name>Jeff</name>
<event>Birthday</event>
<gift>Present</gift>
<label>Jeff`s prezzy</label>
</gifts>
</person>
<person>
<gifts>
<name>Maisey</name>
<event>Promotion</event>
<gift>Cake</gift>
<label>Well Done!</label>
</gifts>
</person>
<person>
<gifts>
<name>Maisey</name>
<event>Birthday</event>
<gift>Card</gift>
<label>Happy Birthday</label>
</gifts>
</person>
<person>
<gifts>
<name>Maisey</name>
<event>Birthday</event>
<gift>Cake</gift>
<label>Another Year Older!</label>
</gifts>
</person>
</persons>
How do I format it to look like the XML in the portion at the top of this post?

Try this, the magic is the correlated sub-query with GROUP BY:
DECLARE #TempTable TABLE
(
[name] VARCHAR(250),
[event] VARCHAR(250),
[gift] VARCHAR(250),
[label] VARCHAR(2500)
);
INSERT INTO #TempTable([name], [event], [gift], [label]) VALUES
('Jeff','Birthday','Card','Happy Birthday'),
('Jeff','Birthday','Present','Jeff`s prezzy'),
('Maisey','Promotion','Cake','Well Done!'),
('Maisey','Birthday','Card','Happy Birthday'),
('Maisey','Birthday','Cake','Another Year Older!')
SELECT t1.[Name] AS [#name]
,(
SELECT T2.[Event] AS [#name]
,(
SELECT T3.gift AS [#name], T3.label AS [#value]
FROM #TempTable T3
WHERE T1.[Name]=T3.[Name] AND T2.[event] = T3.[event]
GROUP BY T3.gift, T3.label
FOR XML PATH('gift'),ROOT('gifts'),TYPE
)
FROM #TempTable T2
WHERE T1.[Name]=T2.[Name]
GROUP BY T2.[event]
FOR XML PATH('event'),ROOT('events'),TYPE
)
FROM #TempTable AS T1
GROUP BY t1.[Name]
FOR XML PATH('person'),ROOT('celebrations'),TYPE;
The result
<celebrations>
<person name="Jeff">
<events>
<event name="Birthday">
<gifts>
<gift name="Card" value="Happy Birthday" />
<gift name="Present" value="Jeff`s prezzy" />
</gifts>
</event>
</events>
</person>
<person name="Maisey">
<events>
<event name="Birthday">
<gifts>
<gift name="Cake" value="Another Year Older!" />
<gift name="Card" value="Happy Birthday" />
</gifts>
</event>
<event name="Promotion">
<gifts>
<gift name="Cake" value="Well Done!" />
</gifts>
</event>
</events>
</person>
</celebrations>

Related

Inserting XML data in to a table

I need to insert an external XML file data into a SQL Server table. I tried the below code. But this inserts a single record with NULL values for all the columns
Declare #xml XML
Select #xml =
CONVERT(XML,bulkcolumn,2) FROM OPENROWSET(BULK
'C:\Users\PC901\Downloads\Tags.xml',SINGLE_BLOB) AS X
SET ARITHABORT ON
TRUNCATE TABLE Tags
Insert into Tags
(
ID,WikiPostId,ExcerptPostId,Count,TagName
)
Select
P.value('ID[1]','BIGINT') AS ID,
P.value('WikiPostId[1]','BIGINT') AS WikiPostId,
P.value('ExcerptPostId[1]','BIGINT') AS ExcerptPostId,
P.value('Count[1]','BIGINT') AS Count,
P.value('TagName[1]','VARCHAR(100)') AS TagName
From #xml.nodes('/tags') PropertyFeed(P)
SELECT * FROM Tags
and the sample XML would be
<?xml version="1.0" encoding="utf-8"?>
<tags>
<row Id="1" TagName=".net" Count="283778" ExcerptPostId="3624959" WikiPostId="3607476" />
<row Id="2" TagName="html" Count="826083" ExcerptPostId="3673183" WikiPostId="3673182" />
<row Id="3" TagName="javascript" Count="1817846" ExcerptPostId="3624960" WikiPostId="3607052" />
<row Id="4" TagName="css" Count="588062" ExcerptPostId="3644670" WikiPostId="3644669" />
<row Id="5" TagName="php" Count="1286873" ExcerptPostId="3624936" WikiPostId="3607050" />
</tags>
Here you go:
declare #xml xml = '<?xml version="1.0" encoding="utf-8"?>
<tags>
<row Id="1" TagName=".net" Count="283778" ExcerptPostId="3624959" WikiPostId="3607476" />
<row Id="2" TagName="html" Count="826083" ExcerptPostId="3673183" WikiPostId="3673182" />
<row Id="3" TagName="javascript" Count="1817846" ExcerptPostId="3624960" WikiPostId="3607052" />
<row Id="4" TagName="css" Count="588062" ExcerptPostId="3644670" WikiPostId="3644669" />
<row Id="5" TagName="php" Count="1286873" ExcerptPostId="3624936" WikiPostId="3607050" />
</tags>'
Select
P.value('#Id','BIGINT') AS ID,
P.value('#WikiPostId','BIGINT') AS WikiPostId,
P.value('#ExcerptPostId','BIGINT') AS ExcerptPostId,
P.value('#Count','BIGINT') AS Count,
P.value('#TagName','VARCHAR(100)') AS TagName
From #xml.nodes('/tags/row') PropertyFeed(P)
outputs
ID WikiPostId ExcerptPostId Count TagName
----------- -------------------- -------------------- -------------------- ----------
1 3607476 3624959 283778 .net
2 3673182 3673183 826083 html
3 3607052 3624960 1817846 javascript
4 3644669 3644670 588062 css
5 3607050 3624936 1286873 php
(5 rows affected)

How can i generate a XML without repeating an field twice

I'm trying to generated a xml using the below code. But I'm not satisfied with the output result. Below code I'm using the cursor to get the ids for the xml to be generated and update in another table. Any help is appreciated and i'm new to xml. Thanks
DECLARE #xml_var XML;
DECLARE #ID INT;
DECLARE XML_CURSOR CURSOR FOR
SELECT id
FROM xml_temp_table
WHERE id IS NOT NULL;
OPEN XML_CURSOR;
FETCH NEXT
FROM XML_CURSOR
INTO #ID;
WHILE ##FETCH_STATUS = 0
BEGIN
SET #xml_var =
(
SELECT
(
SELECT 'Type' AS ID,
'Initial' AS VALUE,
'' AS TAG,
'true' AS VISIBLE,
Getdate() AS HISTORY,
'' AS DESCRIPTION,
'' AS COMMENT
FROM XML_TABLE d
WHERE D.XML_ID = #ID FOR XML PATH('field'),
TYPE ) AS 'field',
(
SELECT 'OwnerName' AS ID,
'Testing_XML' AS VALUE,
'' AS TAG,
'true' AS VISIBLE,
Getdate() AS HISTORY,
'' AS DESCRIPTION,
'' AS COMMENT
FROM XML_TABLE d
WHERE D.XML_ID = #ID FOR XML PATH('field'),
TYPE ) AS 'field'
FROM XML_TABLE p
WHERE P.XML_ID = #ID FOR XML PATH('Material'),
ROOT('FormValue') );
UPDATE S
SET S.XML_COL = #xml_var,
FROM LOCATION_TABLE_XML S
WHERE S.ID = #ID;
FETCH NEXT
FROM XML_CURSOR
INTO #ID;
END;
The result i'm getting is this way
<FormValue>
<Material>
<field> ----- i dont want this
<field>
<id>Type</id>
<value>Initial</value>
<tag />
<visible>true</visible>
<history>2016-11-08T16:53:16.440</history>
<description />
<comment />
</field>
<field>
<id>OwnerName</id>
<value>Testing_XML</value>
<tag />
<visible>true</visible>
<history>2016-11-08T16:53:16.440</history>
<description />
<comment />
</field>
</field> ---- i dont want this
</Material>
</FormValue>
But I want the result in this way
<FormValue>
<Material>
<field>
<id>Type</id>
<value>Initial</value>
<tag />
<visible>true</visible>
<history>2016-11-08T16:53:16.440</history>
<description />
<comment />
</field>
<field>
<id>OwnerName</id>
<value>Testing_XML</value>
<tag />
<visible>true</visible>
<history>2016-11-08T16:53:16.440</history>
<description />
<comment />
</field>
</Material>
</FormValue>
Might be enough to let the AS 'field' away. Your FOR XML PATH('field') will wrap each row with a <field> element.
The XML returning sub-selects can be seen as scalar values handled like a normal column. By providing a column alias this whole node gets a name and this name is again translated into a wrapping <field> element.
You can either erase this, or replace it with AS [node()] or with AS [*]

sql query a varchar(max) column to select an element based on the value of one of its children children's

I am trying to search xml data stored in a varchar(max) column.
Below is an example of one of the xml data strings found in the varchar column
With this example I will have sql variable called #dsName which is to be matched against the node when its values matches #dsName and it is subordinate to either or nodes. The nodes in between or until you get to can vary.
<business_process>
<ProcessDefinition name="dawns test">
<StartState name="START" uniqueId="Node3304">
<Transition name="Node4532" to="Node4532"/>
</StartState>
<EndState name="END4694" uniqueId="Node4694"/>
<User name="Node4532" uniqueId="Node4532">
<Description>test</Description>
<Distribution config-type="field" type="CommonQueueDistribution">
<Priority>0</Priority>
<AutoCompleteJob>false</AutoCompleteJob>
<GroupId>Admin</GroupId>
<UseAttendance>false</UseAttendance>
<UseShifts>false</UseShifts>
<NotifyActors>false</NotifyActors>
</Distribution>
<DocFinityTask type="DocFinityTask">
<description>read e-mail and approve or deny</description>
<help/>
<required>false</required>
<redoable>false</redoable>
<condition/>
<properties>
<undoable>true</undoable>
</properties>
</DocFinityTask>
<DocFinityTask type="SimpleFormTask">
<description>lob lookup</description>
<help/>
<required>false</required>
<redoable>true</redoable>
<condition/>
<properties>
<autoRun>true</autoRun>
<form>
<title>lob</title>
<formElement>
<type>Combobox</type>
<variable>lob</variable>
<tooltip>lob lookup</tooltip>
<label>lob</label>
<required>false</required>
<prepopulateValues>
<datasourceName>lob lookup</datasourceName>
</prepopulateValues>
<userEnter>true</userEnter>
<dataType>STRING</dataType>
</formElement>
</form>
</properties>
</DocFinityTask>
<Transition name="Node128795" to="Node128795"/>
</User>
<Server name="Node128795" uniqueId="Node128795">
<Description/>
<Event type="node-enter">
<Action type="SetProcessInstancePropertyAction" config-type="field">
<description>Whatever</description>
<propertyName>source</propertyName>
<datasourceName>get datasource list</datasourceName>
</Action>
</Event>
<Transition name="Node4694" to="END4694"/>
</Server>
<Server name="Node250" uniqueId="Node250">
<Description />
<Event type="node-enter">
<Action type="SetProcessInstancePropertyAction" config-type="field">
<description>Whatever</description>
<propertyName>source</propertyName>
<datasourceName>stump</datasourceName>
</Action>
</Event>
<Transition name="Node4694" to="END4694" />
</Server>
</ProcessDefinition>
<Layout>
<annotations/>
<nodes>
<node name="START" uniqueId="Node3304" type="startNode" text="START" x="184.5" y="135.5" width="25" height="25"/>
<node name="END4694" uniqueId="Node4694" type="endNode" text="END4694" x="588.5" y="137.5" width="25" height="25"/>
<node name="Node4532" uniqueId="Node4532" type="userNode" text="Node4532" info="false" x="296" y="135" width="150" height="50"/>
<node name="Node128795" uniqueId="Node128795" type="serverNode" text="Node128795" info="false" x="286" y="244" width="150" height="50"/>
</nodes>
<edges>
<edge originNode="Node3304" targetNode="Node4532" text="" sketch="arrow"/>
<edge originNode="Node4532" targetNode="Node128795" text="" sketch="arrow"/>
<edge originNode="Node128795" targetNode="Node4694" text="" sketch="arrow"/>
</edges>
</Layout>
Here is an example of the select I used when trying to load into a variable of an XML data type. The varchar(max) column name is XML
DECLARE #xml XML=
(SELECT [XML]
FROM ProcessModels
WHERE [XML] LIKE '%<datasourceName>' + #dsName + '%'
and [status] = 'ACTIVE')
The SQL select is that same if I load it into a table variable and the #dsName variable is already set with the string to search for.
In this example I want to find name of every Server node and/or User node when it has a node with the value of 'get datasource list'. The string 'get datasource list' already exists in the variable #dsName.
The following query gets me half way there.
select sn.value('#name', 'varchar(100)') AS ServerNodes
from #xml.nodes('/business_process/ProcessDefinition/Server') AS ServerNodes(sn)
Now I need to figure out how to limit the Server.#name returned to just those where the child node //datasourceName value equals the sql:variable.
This worked:
SELECT ServerNode.value('#name','varchar(max)') AS ServerNode
FROM #xml.nodes('/business_process/ProcessDefinition') AS ProcessDefinition(pd)
OUTER APPLY pd.nodes('Server[Event//datasourceName=sql:variable("#searchVariable")]') AS The(ServerNode)
WHERE ServerNode.value('#name','varchar(max)') IS NOT NULL
SELECT UserNode.value('#name','varchar(max)') AS UserNode
FROM #xml.nodes('/business_process/ProcessDefinition') AS ProcessDefinition(pd)
OUTER APPLY pd.nodes('User[DocFinityTask//datasourceName=sql:variable("#searchVariable")]') AS The(UserNode)
WHERE UserNode.value('#name','varchar(max)') IS NOT NULL
As my first answer is already very crowded...
With this you'd get the User's and the Server's data out of the XML. If you set the #searchVariable to a non existent datasourceName-value, the User data is still there, but the Server data will be NULL. Try it out!
DECLARE #xml XML=
'<business_process>
<ProcessDefinition name="dawns test">
<StartState name="START" uniqueId="Node3304">
<Transition name="Node4532" to="Node4532" />
</StartState>
<EndState name="END4694" uniqueId="Node4694" />
<User name="Node4532" uniqueId="Node4532">
<Description>test</Description>
<Distribution config-type="field" type="CommonQueueDistribution">
<Priority>0</Priority>
<AutoCompleteJob>false</AutoCompleteJob>
<GroupId>Admin</GroupId>
<UseAttendance>false</UseAttendance>
<UseShifts>false</UseShifts>
<NotifyActors>false</NotifyActors>
</Distribution>
<DocFinityTask type="DocFinityTask">
<description>read e-mail and approve or deny</description>
<help />
<required>false</required>
<redoable>false</redoable>
<condition />
<properties>
<undoable>true</undoable>
</properties>
</DocFinityTask>
<DocFinityTask type="SimpleFormTask">
<description>lob lookup</description>
<help />
<required>false</required>
<redoable>true</redoable>
<condition />
<properties>
<autoRun>true</autoRun>
<form>
<title>lob</title>
<formElement>
<type>Combobox</type>
<variable>lob</variable>
<tooltip>lob lookup</tooltip>
<label>lob</label>
<required>false</required>
<prepopulateValues>
<datasourceName>lob lookup</datasourceName>
</prepopulateValues>
<userEnter>true</userEnter>
<dataType>STRING</dataType>
</formElement>
</form>
</properties>
</DocFinityTask>
<Transition name="Node128795" to="Node128795" />
</User>
<Server name="Node128795" uniqueId="Node128795">
<Description />
<Event type="node-enter">
<Action type="SetProcessInstancePropertyAction" config-type="field">
<description>Whatever</description>
<propertyName>source</propertyName>
<datasourceName>get datasource list</datasourceName>
</Action>
</Event>
<Transition name="Node4694" to="END4694" />
</Server>
</ProcessDefinition>
<Layout>
<annotations />
<nodes>
<node name="START" uniqueId="Node3304" type="startNode" text="START" x="184.5" y="135.5" width="25" height="25" />
<node name="END4694" uniqueId="Node4694" type="endNode" text="END4694" x="588.5" y="137.5" width="25" height="25" />
<node name="Node4532" uniqueId="Node4532" type="userNode" text="Node4532" info="false" x="296" y="135" width="150" height="50" />
<node name="Node128795" uniqueId="Node128795" type="serverNode" text="Node128795" info="false" x="286" y="244" width="150" height="50" />
</nodes>
<edges>
<edge originNode="Node3304" targetNode="Node4532" text="" sketch="arrow" />
<edge originNode="Node4532" targetNode="Node128795" text="" sketch="arrow" />
<edge originNode="Node128795" targetNode="Node4694" text="" sketch="arrow" />
</edges>
</Layout>
</business_process>';
DECLARE #searchVariable VARCHAR(100)='get datasource list';
SELECT ServerNode.value('#name','varchar(max)') AS ServerName
,ServerNode.value('#uniqueId','varchar(max)') AS ServerId
,pd.value('User[1]/#name','varchar(max)') AS UserName
,pd.value('User[1]/#uniqueId','varchar(max)') AS UserId
FROM #xml.nodes('/business_process/ProcessDefinition') AS ProcessDefinition(pd)
OUTER APPLY pd.nodes('Server[Event/Action/datasourceName=sql:variable("#searchVariable")]') AS The(ServerNode);
Your friend is sql:variable(), there is also a sql:column() if your search value comes from a table's column.
According to your comment I mock up one table with an XML column of type varchar. The SELECT will first use CROSS APPLY to cast this to "real" XML, then .exist() is used to pick up the rows fullfilling your criteria and finally the value of /User/#name is returned.
If you change the lookup-variable to "another" you'll find the other XML, other strings will come back empty.
As examples of XPath I give you three queries all returning the same. This depends on your XML...
DECLARE #tbl TABLE(ID INT IDENTITY, YourXMLAsVarchar VARCHAR(MAX));
INSERT INTO #tbl VALUES
('<User name="First Node" uniqueId="1332">
<Task type="Form">
<properties>
<form>
<formElement>
<populateValues>
<source>lookup</source>
</populateValues>
</formElement>
</form>
</properties>
</Task>
</User>')
,('<User name="First Node" uniqueId="1332">
<Task type="Form">
<properties>
<form>
<formElement>
<populateValues>
<source>another</source>
</populateValues>
</formElement>
</form>
</properties>
</Task>
</User>');
--Search for "lookup"
DECLARE #SearchingFor VARCHAR(100)='lookup';
--Search with full path
SELECT x.value('(/User/#name)[1]','varchar(max)')
FROM #tbl AS tbl
CROSS APPLY(SELECT CAST(YourXMLAsVarchar AS XML)) AS a(x)
WHERE x.exist('/User/Task/properties/form/formElement/populateValues/source[.=sql:variable("#SearchingFor")]')=1
--shorter, if there is not other "source" element this could be muddled up with...
SELECT x.value('(/User/#name)[1]','varchar(max)')
FROM #tbl AS tbl
CROSS APPLY(SELECT CAST(YourXMLAsVarchar AS XML)) AS a(x)
WHERE x.exist('//source[.=sql:variable("#SearchingFor")]')=1
--even shorter, if your lookup string won't be anywhere else an element's value
SELECT x.value('(/User/#name)[1]','varchar(max)')
FROM #tbl AS tbl
CROSS APPLY(SELECT CAST(YourXMLAsVarchar AS XML)) AS a(x)
WHERE x.exist('//*[.=sql:variable("#SearchingFor")]')=1
Yet another alternative. This doesn't use XML.exist but looks straight for User elements having a <source> element with the lookup variable. Then the path is reversed back to ancestor User and the name attribute is selected.
DECLARE #xml XML=
'<User name="First Node" uniqueId="1332">
<Task type="Form">
<properties>
<form>
<formElement>
<populateValues>
<source>lookup</source>
</populateValues>
</formElement>
</form>
</properties>
</Task>
</User>';
DECLARE #lookup NVARCHAR(128)='lookup';
SELECT
n.v.value('../../../../../../#name','NVARCHAR(128)') AS name
FROM
#xml.nodes('//User/Task/properties/form/formElement/populateValues/source[.=sql:variable("#lookup")]') AS n(v);
Apparantly XPath axes aren't fully supported (at least not on SQL Server 2012). If it were instead of ../../../../../../#name you could have written the easier ancestor::User/#name.

How to Insert xml data into SQL Server table?

How to import below XML data into SQL Server table with three columns?
<dataset>
<metadata>
<item name="NAME_LAST" type="xs:string" length="62" />
<item name="NAME_FIRST" type="xs:string" length="62" />
<item name="NAME_MIDDLE" type="xs:string" length="32" />
</metadata>
<data>
<row>
<value>SMITH</value>
<value>MARY</value>
<value>N</value>
</row>
<row>
<value>SMITH2</value>
<value>MARY2</value>
<value>N2</value>
</row>
</data>
</dataset>
Try this:
DECLARE #input XML = '<dataset>
<metadata>
<item name="NAME_LAST" type="xs:string" length="62" />
<item name="NAME_FIRST" type="xs:string" length="62" />
<item name="NAME_MIDDLE" type="xs:string" length="32" />
</metadata>
<data>
<row>
<value>SMITH</value>
<value>MARY</value>
<value>N</value>
</row>
<row>
<value>SMITH2</value>
<value>MARY2</value>
<value>N2</value>
</row>
</data>
</dataset>'
INSERT INTO dbo.YourTable(ColName, ColFirstName, ColOther)
SELECT
Name = XCol.value('(value)[1]','varchar(25)'),
FirstName = XCol.value('(value)[2]','varchar(25)'),
OtherValue = XCol.value('(value)[3]','varchar(25)')
FROM
#input.nodes('/dataset/data/row') AS XTbl(XCol)
Insert XML Data into sql Server table
Declare #retValue1 varchar(50);
Declare #XmlStr XML;
SET #XmlStr='<Customers>
<customer>
<ID>111589</ID>
<FirstName>name1</FirstName>
<LastName>Lname1</LastName>
<Company>ABC</Company>
</customer>
<customer>
<ID>12345</ID>
<FirstName>name2</FirstName>
<LastName>Lname2</LastName>
<Company>ABC</Company>
</customer>
<customer>
<ID>14567</ID>
<FirstName>name3</FirstName>
<LastName>Lname3</LastName>
<Company>DEF</Company>
</customer>
</Customers>';
#retValue='Failed';
INSERT INTO [test_xmlinsert](
[id],
[firstName],
[lastName],
[company]
)
SELECT
COALESCE([Table].[Column].value('ID[1]', 'int'),0) as 'ID',
[Table].[Column].value('FirstName [1]', 'varchar(50)') as ' FirstName ',
[Table].[Column].value(' LastName[1]', 'varchar(50)') as ' LastName',
[Table].[Column].value(' Company [1]', 'varchar(50)') as ' Company'
FROM #XmlStr.nodes('/ Customers / customer') as [Table]([Column])
IF(##ROWCOUNT > 0 )
SET #retValue='SUCCESS';

Validating individual XML elements in SQL Server 2008R2

I'm writing a stored procedure to process XML data uploaded by the user:
<People>
<Person Id="1" FirstName="..." LastName="..." />
<Person Id="2" FirstName="..." LastName="..." />
<Person Id="3" FirstName="..." LastName="..." />
<Person Id="4" FirstName="..." LastName="..." />
<Person Id="5" FirstName="..." LastName="..." />
</People>
I would like to use a schema to make sure that the entities are valid, but I don't want the entire process to fail just because of one invalid entity. Instead, I would like to log all invalid entities to a table and process the valid entities as normal.
Is there a recommended way to do this?
A pure SQL approach would be:
Create a schema collection that defines <Person>:
CREATE XML SCHEMA COLLECTION [dbo].[testtest] AS
N'<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:element name="Person">
<xs:complexType>
<xs:attribute name="Id" type="xs:int" use="required"/>
<xs:attribute name="FirstName" type="xs:string" use="required"/>
<xs:attribute name="LastName" type="xs:string" use="required"/>
</xs:complexType>
</xs:element>
</xs:schema>
'
(one-time operation)
Have an XML query that selects each <Person> node from <People> as a separate row.
Declare a cursor on that query and select each row into an untyped xml variable. After the select, try to assign to a typed xml variable from within a try-catch block.
Resulting code would look like:
declare #source xml = N'
<People>
<Person Id="1" FirstName="..." LastName="..." />
<Person Id="2" FirstName="..." LastName="..." />
<Person Id="f" FirstName="..." LastName="..." />
<Person Id="4" FirstName="..." LastName="..." />
<Person Id="5" FirstName="..." LastName="..." />
</People>';
declare foo cursor
local
forward_only
read_only
for
select t.p.query('.')
from #source.nodes('People/Person') as t(p)
;
declare #x xml (dbo.testtest);
declare #x_raw xml;
open foo;
fetch next from foo into #x_raw;
while ##fetch_status = 0
begin
begin try
set #x = #x_raw;
print cast(#x_raw as nvarchar(max)) + ': OK';
end try
begin catch
print cast(#x_raw as nvarchar(max)) + ': FAILED';
end catch;
fetch next from foo into #x_raw;
end;
close foo;
deallocate foo;
Result:
<Person Id="1" FirstName="..." LastName="..."/>: OK
<Person Id="2" FirstName="..." LastName="..."/>: OK
<Person Id="f" FirstName="..." LastName="..."/>: FAILED
<Person Id="4" FirstName="..." LastName="..."/>: OK
<Person Id="5" FirstName="..." LastName="..."/>: OK
A simpler option is to create a CLR stored procedure that would parse XML in a .NET language.

Resources