How to reduce cost of parsing small XML in SQL Server? - sql-server

I have following T-SQL code:
DECLARE #DesiredCategories TABLE(CategoryID INT, Search BIT, SearchText NVARCHAR(50));
INSERT INTO #DesiredCategories
SELECT X.Col.query('id').value('.', 'INT') as CategoryID,
X.Col.value('#search', 'BIT') as Search,
X.Col.query('text').value('.', 'NVARCHAR(MAX)') as SearchText
FROM .nodes('root/node') X(Col);
#XMLCategoryIDs contains XML like this:
<root>
<node search="0"><id>10088</id><text></text></node>
<node search="0"><id>10087</id><text></text></node>
<node search="0"><id>10090</id><text></text></node>
</root>
Sadly, this code have very high subtree cost (for following code snippet it's ~900) and seems to be a perfomance bottleneck.
Execution plan is included - http://pastebin.com/ptnqJ4jX
We are using XML to send varying number of parameters (1-5) to stored procedure.
And yes, I'm total beginner with XML manipulation in SQL.

If your xml is stored in a table, you can create a primary xml index to reduce subtree cost for queries against that xml at a later time. You're basically shifting the workload to earlier in the process, but this might help your situation.
IF OBJECT_ID('tempdb..#Table') IS NOT NULL
DROP TABLE #Table;
CREATE TABLE #Table
(
ID INT IDENTITY PRIMARY KEY
,DocumentId INT
,Xml XML
)
CREATE PRIMARY XML INDEX PXML_Table_Xml
ON #Table (Xml);
GO
INSERT INTO #Table
(
DocumentId
,Xml
)
VALUES
(
1
,
'
<root>
<node search="0"><id>10088</id><text></text></node>
<node search="0"><id>10087</id><text></text></node>
<node search="0"><id>10090</id><text></text></node>
</root>
'
)
DECLARE #DesiredCategories TABLE(DocumentId INT,CategoryID INT, Search BIT, SearchText NVARCHAR(50));
INSERT INTO #DesiredCategories
SELECT
DocumentId
,CategoryID
,Search
,SearchText
FROM #Table
CROSS APPLY
(
SELECT
X.Col.query('id').value('.', 'INT') as CategoryID,
X.Col.value('#search', 'BIT') as Search,
X.Col.query('text').value('.', 'NVARCHAR(MAX)') as SearchText
FROM Xml.nodes('root/node') X(Col)
) A
SELECT * FROM #DesiredCategories

Related

T-SQL: Parse XML Data in from classes which inherit from a common base,

I'm trying to parse XML data in SQL Server. I have a XML column in a table, the XML stored in it can vary by type, but they all inherit from the same base type.
Row 1: has XML like so:
<Form>
<TaskType>1</TaskType>
--Other Properties ...
</Form>
Row 2: has XML like so:
<License>
<TaskType>2</TaskType>
--Other Properties ...
</License>
Normally I might parse XML with this T-SQL code snippet:
SELECT
xmlData.A.value('.', 'INT') AS Animal
FROM
#XMLToParse.nodes('License/TaskType') xmlData(A)
This doesn't work since in a view since I'm dependent on the name to find the node.
How can I always find the TaskType XML element in my XML content?
Please try the following solution.
XPath is using asterisk * as a wildcard.
http://www.tizag.com/xmlTutorial/xpathwildcard.php
SQL
-- DDL and sample data population, start
DECLARE #tbl TABLE (ID INT IDENTITY PRIMARY KEY, xmldata XML);
INSERT #tbl (xmldata) VALUES
(N'<Form>
<TaskType>1</TaskType>
<TaskName>Clone</TaskName>
<!--Other XML elements-->
</Form>'),
(N'<License>
<TaskType>2</TaskType>
<TaskName>Copy</TaskName>
<!--Other XML elements-->
</License>');
-- DDL and sample data population, end
SELECT ID
, c.value('(TaskType/text())[1]', 'INT') AS TaskType
, c.value('(TaskName/text())[1]', 'VARCHAR(20)') AS TaskName
FROM #tbl
CROSS APPLY xmldata.nodes('/*') AS t(c);
Output
ID
TaskType
TaskName
1
1
Clone
2
2
Copy
Apparently you can just interate the nodes like so without being aware of their name:
SELECT xmlData.A.value('.', 'INT') AS Animal
FROM #XMLToParse.nodes('node()/TaskType') xmlData(A)

How to extract schema from XML variable using XQuery

Technologies: T-SQL, XML, XQuery
I have an XML #variable in a database table which has a schema section and data section. I would only like to extra only the schema section and create a XML Schema Collection for it. It appears XQuery would be the quickest way. How do I specify the starting tag and ending tag in the following file (I only want to extract everything between <xs:schema xmlns and </xs:schema>?
CREATE FUNCTION [etl].[ufn_GetXmlSchema]
(
#DataLakeBlobId uniqueidentifier
)
RETURNS xml
AS
BEGIN
DECLARE #XmlSchema xml
,#XmlData xml
SET #XmlSchema = ( SELECT [XmlData]
FROM [landing].[v_tbForm] WITH (NOLOCK)
WHERE [DataLakeBlobId] = #DataLakeBlobId
)
--RETURN #XmlSchema.query('</xs:schema>')-- missing matching begin tag
--RETURN #XmlSchema.query('<xs:schema xmlns="" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata" id="NewDataSet">')-- Expected end tag 'xs:schema'
RETURN #XmlSchema.query('<xs:schema xmlns="" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata" id="NewDataSet"></xs:schema>')-- nothing in between was returned
END
GO
SELECT [etl].[ufn_GetXmlSchema]('A257667D-C3AA-471C-9F82-91FA35181833')
Any help is appreciated.
While waiting for a real scenario, here is a good jump start for you. As end result, it creates an XML Schema Collection named dbo.StateAndCities.
SQL
USE tempdb;
GO
-- DDL and sample data population, start
IF EXISTS (SELECT * FROM sys.xml_schema_collections
WHERE name = N'StateAndCities'
AND schema_id = SCHEMA_ID(N'dbo'))
DROP XML SCHEMA COLLECTION dbo.StateAndCities;
DECLARE #tbl TABLE (
ID INT IDENTITY PRIMARY KEY
, state CHAR(2)
, city VARCHAR(30)
);
INSERT INTO #tbl (state, city)
VALUES
('FL', 'Miami')
, ('CA', 'Los Angeles')
, ('TX', 'Austin');
-- DDL and sample data population, end
DECLARE #xml XML
, #XSD XML;
-- Generate XML plus embedded XSD schema
SET #xml = (SELECT NULL,
(
SELECT *
FROM #tbl AS [row]
FOR XML AUTO, ELEMENTS, TYPE, XMLSCHEMA('MyURI'))
FOR XML PATH(''), TYPE, ROOT('root')
);
-- just to see, XML plus embedded XSD schema
SELECT #xml;
-- retrive just XSD
;WITH xmlnamespaces ('http://www.w3.org/2001/XMLSchema' AS xsd)
SELECT #xsd = (SELECT #xml.query('/root/xsd:schema'));
-- just to see, XSD schema
SELECT #xsd AS xsd;
-- create schema collection
CREATE XML SCHEMA COLLECTION dbo.StateAndCities AS #xsd;

Pulling a value from Xml in a column

I have a table in Sql server that stores Xml data in one of its columns.
The Xml column data looks like this:
<TestDef Weight="0" FailValue="2" ConceptID="-327">
<ToleranceDef ObjectType="SomeName" TargetValue="0"TargetRange="2" />
</TestDef>
I need to write a query that fetches out all the conceptId's from each rows Xml column.
Here it would be -327
I know I can cast the Xml column to a nvarchar(max) then use some reg exp to get the value but not sure how to use the regular expression
Here's an example using a table variable. It will be the same concept with an actual table:
Declare #XmlTable table (
Id Integer Identity,
XmlValue XML
)
Insert Into #XmlTable (XmlValue) values ('<TestDef Weight="0" FailValue="2" ConceptID="-327"><ToleranceDef ObjectType="SomeName" TargetValue="0" TargetRange="2" /></TestDef>')
Insert Into #XmlTable (XmlValue) values ('<TestDef Weight="0" FailValue="2" ConceptID="-325"><ToleranceDef ObjectType="SomeName" TargetValue="0" TargetRange="2" /></TestDef>')
select
Id,
XmlValue,
XmlValue.value('(/TestDef/#ConceptID)[1]', 'integer') as ConceptId
from
#XmlTable

Using SQL to transpose/flatten XML structure to columns

I am using SQL Server (2008/2012) and I know there are similar answers from lots of searching, however I can't seem to find the appropriate example/pointers for my case.
I have an XML column in a SQL Server table holding this data:
<Items>
<Item>
<FormItem>
<Text>FirstName</Text>
<Value>My First Name</Value>
</FormItem>
<FormItem>
<Text>LastName</Text>
<Value>My Last Name</Value>
</FormItem>
<FormItem>
<Text>Age</Text>
<Value>39</Value>
</FormItem>
</Item>
<Item>
<FormItem>
<Text>FirstName</Text>
<Value>My First Name 2</Value>
</FormItem>
<FormItem>
<Text>LastName</Text>
<Value>My Last Name 2</Value>
</FormItem>
<FormItem>
<Text>Age</Text>
<Value>40</Value>
</FormItem>
</Item>
</Items>
So even though the structure of <FormItem> is going to be the same, I can have multiple (most commonly no more than 20-30) sets of form items..
I am essentially trying to return a query from SQL in the format below, i.e. dynamic columns based on /FormItem/Text:
FirstName LastName Age ---> More columns as new `<FormItem>` are returned
My First Name My Last Name 39 Whatever value etc..
My First Name 2 My Last Name 2 40
So, at the moment I had the following:
select
Tab.Col.value('Text[1]','nvarchar(100)') as Question,
Tab.Col.value('Value[1]','nvarchar(100)') as Answer
from
#Questions.nodes('/Items/Item/FormItem') Tab(Col)
Of course that hasn't transposed my XML rows into columns, and obviously is fixed with fields anyway.. I have been trying various "Dynamic SQL" approaches where the SQL performs a distinct selection of (in my case) the <Text> node, and then uses some sort of Pivot? but I couldn't seem to find the magic combination to return the results I need as a dynamic set of columns for each row (<Item> within the collection of <Items>).
I'm sure it can be done having seen so many very similar examples, however again the solution eludes me!
Any help gratefully received!!
Parsing the XML is fairly expensive so instead of parsing once to build a dynamic query and once to get the data you can create a temporary table with a Name-Value list and then use that as the source for a dynamic pivot query.
dense_rank is there to create the ID to pivot around.
To build the column list in the dynamic query it uses the for xml path('') trick.
This solution requires that your table has a primary key (ID). If you have the XML in a variable it can be somewhat simplified.
select dense_rank() over(order by ID, I.N) as ID,
F.N.value('(Text/text())[1]', 'varchar(max)') as Name,
F.N.value('(Value/text())[1]', 'varchar(max)') as Value
into #T
from YourTable as T
cross apply T.XMLCol.nodes('/Items/Item') as I(N)
cross apply I.N.nodes('FormItem') as F(N)
declare #SQL nvarchar(max)
declare #Col nvarchar(max)
select #Col =
(
select distinct ','+quotename(Name)
from #T
for xml path(''), type
).value('substring(text()[1], 2)', 'nvarchar(max)')
set #SQL = 'select '+#Col+'
from #T
pivot (max(Value) for Name in ('+#Col+')) as P'
exec (#SQL)
drop table #T
SQL Fiddle
select Tab.Col.value('(FormItem[Text = "FirstName"]/Value)[1]', 'varchar(32)') as FirstName,
Tab.Col.value('(FormItem[Text = "LastName"]/Value)[1]', 'varchar(32)') as LastName,
Tab.Col.value('(FormItem[Text = "Age"]/Value)[1]', 'int') as Age
from #Questions.nodes('/Items/Item') Tab(Col)
I wanted to add my "own answer" really just for completeness to possibly help others.. however it is most definitely based on the great help from #Mikael above!! so again, this is really for completeness only - all kudos to #Mikael.
Basically I ended up with the following proc. I needed to select some data/filter, and get some joined data too and allow some boolean filtering on some of the input params. Then drop into the next section which was create a temp table of my relational data and the required xml nodes via the cross apply. The final step was to then pivot the results/dynamically create the columns from the selected XML node..
CREATE PROCEDURE [dbo].[usp_RPT_ExtractFlattenentries]
#CompanyID int,
#MainSelector nvarchar(50) = null,
#SecondarySelector nvarchar(255) = null,
#DateFrom datetime = '01-jan-2012',
#DateTo datetime = '31-dec-2100',
#SysReference nvarchar(20) = null
AS
BEGIN
SET NOCOUNT ON;
-- Create the table var to hold the XML form data from the entries
declare #FeedbackXml table (
ID int identity primary key,
XMLCol xml,
CompanyName nvarchar(20),
SysReference nvarchar(20),
RecordDate datetime,
EntryName nvarchar(255),
MainSelector nvarchar(50)
)
-- STEP 1: Get the raw submission data based on the params passed in
-- *Note: The double casting is necessary as the "form" field is nvarchar (not varchar) and we need xml in UTF-8 format
begin
insert into #FeedbackXml
(XMLCol, CompanyName, SysReference, RecordDate, EntryName, MainSelector)
select cast(cast(e.form as nvarchar(max)) as xml), c.name, e.SysReference, e.RecordDate, e.name, e.wizard
from
entries s
left join
companies o on e.companies = c.ID
where
(#CompanyID = -1 or #CompanyID = e.companies)
and
(#MainSelector is null or #MainSelector = e.wizard)
and
(#SecondarySelector is null or #SecondarySelector = e.name)
and
(#SysReference is null or #SysReference = e.SysReference)
and
(e.RecordDate >= #DateFrom and e.RecordDate <= #DateTo)
end
-- STEP 2: Flatten the required XML structure to provide a base for the pivot, and include other fields we wish to output
select dense_rank() over(order by ID) as ID,
T.RecordDate, T.CompanyName, T.SysReference, T.EntryName, T.MainSelector,
F.N.value('(FieldNameNode/text())[1]', 'nvarchar(max)') as FieldName,
F.N.value('(FieldNameValue/text())[1]', 'nvarchar(max)') as FieldValue
into #TempData
from #FeedbackXml as T
cross apply T.XMLCol.nodes('/root/companies/') as I(N) -- Xpath to the desired node start point
cross apply I.N.nodes('company') as F(N) -- The actual node collection that forms the "field name" and "field value" data
-- STEP 3: Pivot the #TempData table creating a dynamic column structure based on the selected XML nodes in step 2
declare #SQL nvarchar(max)
declare #Col nvarchar(max)
select #Col =
(
select distinct ','+quotename(FieldName)
from #TempData
for xml path(''), type
).value('substring(text()[1], 2)', 'nvarchar(max)')
set #SQL = 'select CompanyName, SysReference, EntryName, MainSelector, RecordDate, '+#Col+'
from #TempData
pivot (max(FieldValue) for FieldName in ('+#Col+')) as P'
exec (#SQL)
drop table #TempData
END
Again, really only added this answer to provide a complete picture from my perspective, and may help others.

SQL Server table to xml

this time i have question how to convert MSSQL table to XML
My source SQL table:
+-----------+-----------------+
|atributname|atributvalue |
+-----------+-----------------+
|phone |222 |
|param4 |bbbbcdsfceecc |
|param3 |bbbbcdsfceecc |
|param2 |bbbbcdsfccc |
+-----------+-----------------+
Expected result sample:
<items>
<phone>222</phone>
<prama4>bbbbcdsfceecc</param4>
<param3>bbbbcdsfceecc</param3>
<param2>bbbbcdsfccc</param2>
</items>
I tried lot of variations of the following query
SELECT atributname,atributvalue
FROM sampletable FOR XML PATH (''), ROOT ('items');
but results are not good :( should be exactly like in "Expected result sample"
any help
ps
Script to create sampletable:
create table sampletable
(atributname varchar(20),
atributvalue varchar(20))
insert into sampletable (atributname,atributvalue)
values ('phone','222');
insert into sampletable (atributname,atributvalue)
values ('param4','bbbbcdsfceecc');
insert into sampletable (atributname,atributvalue)
values ('param3','bbbbcdsfceecc');
insert into sampletable (atributname,atributvalue)
values ('param2','bbbbcdsfccc');
That's not how FOR XML works. It's columns that get turned into XML elements, not rows. In order to obtain the expected result, you would need to have columns named phone, param4, and so on - not rows with these values in attributename.
If there are specific elements you want in the XML, you could perform a pivot on the data first, then use FOR XML.
Example of a pivot would be:
SELECT [phone], [param2], [param3], [param4]
FROM
(
SELECT attributename, attributevalue
FROM attributes
) a
PIVOT
(
MAX(attributevalue)
FOR attributename IN ([phone], [param2], [param3], [param4])
) AS pvt
FOR XML ROOT('items')
Of course the aggregate will only work if attributevalue is a numeric data type. If it's a character-type column, then you'll have some trouble with the pivot, as there are no built-in string aggregates in SQL server AFAIK...
ok
finally i have done this in several ways,
but this is simplest version suitable for medium dataset
declare #item nvarchar(max)
set #item= (SELECT '<' + atributname +'>' +
cast(atributvalue as nvarchar(max)) +'</' + atributname +'>'
FROM sampletable FOR XML PATH (''), ROOT ('items'));
select replace(replace(#item,'<','<'),'>','>')

Resources