SQL Server 2014: Extracting data from XML using OpenXML and BulkColumn - sql-server

I am trying to extract data from a big xml file using OpenXML and BulkColumn and then save it into a new table called badges.
I am also executing a select statement to show the content of the table.
The file is stored locally. The file uses the attribute-centric mapping and has tens of thousands of rows.
The code I am using is:
CREATE TABLE dbo.badges (
Id int,
Name NVARCHAR(1000),
Date date,
Class smallint,
TagBased nvarchar(10),
);
DECLARE #XMLDoc XML;
DECLARE #XMLDocID INT;
SELECT #XMLDoc = BulkColumn
FROM OPENROWSET(BULK 'C:\Users\Zuhair\Desktop\Badges.xml', SINGLE_BLOB);
EXEC sys.sp_xml_preparedocument #XMLDocID OUTPUT, #XMLDoc;
SELECT Id, Name, Date, Class, TagBased
FROM OPENXML(#XMLDocID, '/badges/row')
WITH (Id int 'Id',
Name NVARCHAR(1000) 'Name',
Date date 'Date',
Class smallint 'Class',
TagBased nvarchar(10) 'TagBased');
INSERT INTO dbo.badges (Id, Name, Date, Class, TagBased)
SELECT *
FROM OPENXML(#XMLDocID, '/badges/row')
WITH (Id int 'Id',
Name NVARCHAR(1000) 'Name',
Date date 'Date',
Class smallint 'Class',
TagBased nvarchar(10) 'TagBased');
exec sp_xml_removedocument #XMLDocID;
However, when I execute the above code I get the following result:
Here is a sample of the XML data that I am using:
<badges>
<row Id="1" UserId="2" Name="Autobiographer" Date="2010-08-11T18:25:03.937" Class="3" TagBased="False" />
<row Id="2" UserId="3" Name="Autobiographer" Date="2010-08-11T18:25:03.997" Class="3" TagBased="False" />
<row Id="3" UserId="4" Name="Autobiographer" Date="2010-08-11T18:25:04.107" Class="3" TagBased="False" />
<row Id="4" UserId="22" Name="Autobiographer" Date="2010-08-11T19:35:05.283" Class="3" TagBased="False" />
<row Id="5" UserId="33" Name="Autobiographer" Date="2010-08-11T19:35:05.330" Class="3" TagBased="False" />
<row Id="6" UserId="27" Name="Autobiographer" Date="2010-08-11T19:40:05.490" Class="3" TagBased="False" />
...
</badges>
Why am I getting this result rather than a table that has the desired data?

The usage of FROM OPENXML with the related procedures to prepare and remove a document are out-dated and should not be used any more.
Try this:
DECLARE #xml XML =
'<badges>
<row Id="1" UserId="2" Name="Autobiographer" Date="2010-08-11T18:25:03.937" Class="3" TagBased="False" />
<row Id="2" UserId="3" Name="Autobiographer" Date="2010-08-11T18:25:03.997" Class="3" TagBased="False" />
<row Id="3" UserId="4" Name="Autobiographer" Date="2010-08-11T18:25:04.107" Class="3" TagBased="False" />
<row Id="4" UserId="22" Name="Autobiographer" Date="2010-08-11T19:35:05.283" Class="3" TagBased="False" />
<row Id="5" UserId="33" Name="Autobiographer" Date="2010-08-11T19:35:05.330" Class="3" TagBased="False" />
<row Id="6" UserId="27" Name="Autobiographer" Date="2010-08-11T19:40:05.490" Class="3" TagBased="False" />
</badges>';
SELECT r.value('#Id','int') AS Id
,r.value('#UserId','int') AS UserId
,r.value('#Name','varchar(max)') AS Name
,r.value('#Date','datetime') AS [Date]
,r.value('#Class','int') AS Class
,r.value('#TagBased','bit') AS TagBased
FROM #xml.nodes('/badges/row') AS A(r)
UPDATE The full (minimal) code
DECLARE #XMLDoc XML;
SELECT #XMLDoc = BulkColumn
FROM OPENROWSET(BULK 'C:\Users\Zuhair\Desktop\Badges.xml', SINGLE_BLOB) AS x;
SELECT r.value('#Id','int') AS Id
,r.value('#UserId','int') AS UserId
,r.value('#Name','varchar(max)') AS Name
,r.value('#Date','datetime') AS [Date]
,r.value('#Class','int') AS Class
,r.value('#TagBased','bit') AS TagBased
FROM #XMLDoc.nodes('/badges/row') AS A(r)

First and foremost, I like Shnugo's answer and I believe that is the path you should follow.
For your specific question, the reason you got all the NULL is because you are extracting data from the ATTRIBUTE and you forgot all the #.
Try the code below:
DECLARE #XMLDoc XML =
'<badges>
<row Id="1" UserId="2" Name="Autobiographer" Date="2010-08-11T18:25:03.937" Class="3" TagBased="False" />
<row Id="2" UserId="3" Name="Autobiographer" Date="2010-08-11T18:25:03.997" Class="3" TagBased="False" />
<row Id="3" UserId="4" Name="Autobiographer" Date="2010-08-11T18:25:04.107" Class="3" TagBased="False" />
<row Id="4" UserId="22" Name="Autobiographer" Date="2010-08-11T19:35:05.283" Class="3" TagBased="False" />
<row Id="5" UserId="33" Name="Autobiographer" Date="2010-08-11T19:35:05.330" Class="3" TagBased="False" />
<row Id="6" UserId="27" Name="Autobiographer" Date="2010-08-11T19:40:05.490" Class="3" TagBased="False" />
</badges>';
DECLARE #XMLDocID INT;
EXEC sys.sp_xml_preparedocument #XMLDocID OUTPUT, #XMLDoc;
SELECT Id, Name, Date, Class, TagBased
FROM OPENXML(#XMLDocID, '/badges/row')
WITH (Id int '#Id',
Name NVARCHAR(1000) '#Name',
Date date '#Date',
Class smallint '#Class',
TagBased nvarchar(10) '#TagBased');
exec sp_xml_removedocument #XMLDocID;

Related

Inserting XML data in to a table

I need to insert an external XML file data into a SQL Server table. I tried the below code. But this inserts a single record with NULL values for all the columns
Declare #xml XML
Select #xml =
CONVERT(XML,bulkcolumn,2) FROM OPENROWSET(BULK
'C:\Users\PC901\Downloads\Tags.xml',SINGLE_BLOB) AS X
SET ARITHABORT ON
TRUNCATE TABLE Tags
Insert into Tags
(
ID,WikiPostId,ExcerptPostId,Count,TagName
)
Select
P.value('ID[1]','BIGINT') AS ID,
P.value('WikiPostId[1]','BIGINT') AS WikiPostId,
P.value('ExcerptPostId[1]','BIGINT') AS ExcerptPostId,
P.value('Count[1]','BIGINT') AS Count,
P.value('TagName[1]','VARCHAR(100)') AS TagName
From #xml.nodes('/tags') PropertyFeed(P)
SELECT * FROM Tags
and the sample XML would be
<?xml version="1.0" encoding="utf-8"?>
<tags>
<row Id="1" TagName=".net" Count="283778" ExcerptPostId="3624959" WikiPostId="3607476" />
<row Id="2" TagName="html" Count="826083" ExcerptPostId="3673183" WikiPostId="3673182" />
<row Id="3" TagName="javascript" Count="1817846" ExcerptPostId="3624960" WikiPostId="3607052" />
<row Id="4" TagName="css" Count="588062" ExcerptPostId="3644670" WikiPostId="3644669" />
<row Id="5" TagName="php" Count="1286873" ExcerptPostId="3624936" WikiPostId="3607050" />
</tags>
Here you go:
declare #xml xml = '<?xml version="1.0" encoding="utf-8"?>
<tags>
<row Id="1" TagName=".net" Count="283778" ExcerptPostId="3624959" WikiPostId="3607476" />
<row Id="2" TagName="html" Count="826083" ExcerptPostId="3673183" WikiPostId="3673182" />
<row Id="3" TagName="javascript" Count="1817846" ExcerptPostId="3624960" WikiPostId="3607052" />
<row Id="4" TagName="css" Count="588062" ExcerptPostId="3644670" WikiPostId="3644669" />
<row Id="5" TagName="php" Count="1286873" ExcerptPostId="3624936" WikiPostId="3607050" />
</tags>'
Select
P.value('#Id','BIGINT') AS ID,
P.value('#WikiPostId','BIGINT') AS WikiPostId,
P.value('#ExcerptPostId','BIGINT') AS ExcerptPostId,
P.value('#Count','BIGINT') AS Count,
P.value('#TagName','VARCHAR(100)') AS TagName
From #xml.nodes('/tags/row') PropertyFeed(P)
outputs
ID WikiPostId ExcerptPostId Count TagName
----------- -------------------- -------------------- -------------------- ----------
1 3607476 3624959 283778 .net
2 3673182 3673183 826083 html
3 3607052 3624960 1817846 javascript
4 3644669 3644670 588062 css
5 3607050 3624936 1286873 php
(5 rows affected)

Convert XML to SQL Server 2008R2 table [duplicate]

This question already has answers here:
Import 'xml' into Sql Server
(5 answers)
Closed 5 years ago.
I want to import below XML file into SQL table. (SQL Server 2008R2)
<table>
<id>{72cbb5ab-dbb3-4de7-9010-5dd1192a1851}</id>
<rows>
<row>
<columns>
<column name="itemcode" value="0984-22-301" type="System.String" />
<column name="date" value="08-November-2017" type="System.DateTime" />
<column name="amount" value="10" type="System.Decimal" />
<column name="DefaultKey" value="1" type="System.Int32" />
</columns>
</row>
<row>
<columns>
<column name="itemcode" value="0984-33-101" type="System.String" />
<column name="date" value="08-November-2017" type="System.DateTime" />
<column name="amount" value="11" type="System.Decimal" />
<column name="DefaultKey" value="2" type="System.Int32" />
</columns>
</row>
</rows>
<key>DefaultKey</key>
<total>0</total>
<data />
<parameters />
</table>
It should look like a sql table with columns id, itemcode, date and amount.
How should my query look like?
Solved the question.
declare #xmltable table (data xml)
insert into #xmltable (data)
select DATA from [MyData]..myxml
SELECT
LineId = c.value('id[1]', 'nvarchar(max)'),
ColumnItemCode = l.value('(columns/column[#name="itemcode"]/#value)[1]', 'varchar(20)'),
ColumnDate = l.value('(columns/column[#name="date"]/#value)[1]', 'varchar(20)'),
ColumnAmount = l.value('(columns/column[#name="amount"]/#value)[1]', 'varchar(20)')
FROM
#xmltable x
CROSS APPLY data.nodes('table') t(c)
CROSS APPLY data.nodes('table/rows/row') b(l)
This resulted in:
LineId ColumnItemCode ColumnDate ColumnAmount
{72cbb5ab-dbb3-4de7-9010-5dd1192a1851} 0984-22-301 08-November-2017 10
{72cbb5ab-dbb3-4de7-9010-5dd1192a1851} 0984-33-101 08-November-2017 11
Thanks for your help.

Query Optimization while updating XML String in SQL Server?

I'm working with XML string shown below.
I have to update the XML string as follows:
If the XML string contains 1000 records or more, it kills the query
If the XML string contains < 1000 records, let it continue.
How can I do this?
example data
<root xmlns:json="http://james.newtonking.com/projects/json">
<row json:Array="true" RowNumber="1">
<Column json:Array="true" Name="Number" Value="1" />
<Column json:Array="true" Name="HourFrom" Value="13.2" />
<Column json:Array="true" Name="HourTo" Value="13.3" />
<Column json:Array="true" Name="Rate" Value="0.895" />
</row>
<row json:Array="true" RowNumber="2">
<Column json:Array="true" Name="Number" Value="1" />
<Column json:Array="true" Name="HourFrom" Value="13.3" />
<Column json:Array="true" Name="HourTo" Value="13.4" />
<Column json:Array="true" Name="Rate" Value="0.907" />
</row>
</root>
Temp table creation
CREATE TABLE #xmltable(
Id INT Identity (1,1) PRIMARY KEY CLUSTERED,
DataValue XML
);
CREATE PRIMARY XML INDEX indexratesheet ON #xmltable
(
DataValue
)
Inserting data into table
INSERT INTO (DataValue ) VALUES(TheXMLfromAbove)
updating the XML string in the table
DECLARE #i INT 1
WHILE(#i<=1000)
BEGIN
UPDATE #xmltable SET DataValue.modify('insert <Column Name="ValidationComments" Value="{sql:variable("#validationcomments")}"></Column>
into (/root/row[#i=sql:variable("#i")])[1]')
SET #i=#i+1
END
If you have to keep this with XML it should be much faster to shred the whole XML into a derivedTable and re-build it from scratch.
Try this:
CREATE TABLE #xmltable(
Id INT Identity (1,1) PRIMARY KEY CLUSTERED,
DataValue XML
);
CREATE PRIMARY XML INDEX indexratesheet ON #xmltable
(
DataValue
);
--Your test XML
INSERT INTO #xmltable (DataValue ) VALUES(N'<root xmlns:json="http://james.newtonking.com/projects/json">
<row json:Array="true" RowNumber="1">
<Column json:Array="true" Name="Number" Value="1" />
<Column json:Array="true" Name="HourFrom" Value="13.2" />
<Column json:Array="true" Name="HourTo" Value="13.3" />
<Column json:Array="true" Name="Rate" Value="0.895" />
</row>
<row json:Array="true" RowNumber="2">
<Column json:Array="true" Name="Number" Value="1" />
<Column json:Array="true" Name="HourFrom" Value="13.3" />
<Column json:Array="true" Name="HourTo" Value="13.4" />
<Column json:Array="true" Name="Rate" Value="0.907" />
</row>
</root>');
--The query to shred it
SELECT r.value(N'#RowNumber','int') AS RowNumber
,r.value(N'(Column[#Name="Number"]/#Value)[1]','int') AS Number
,r.value(N'(Column[#Name="HourFrom"]/#Value)[1]','decimal(10,4)') AS HourFrom
,r.value(N'(Column[#Name="HourTo"]/#Value)[1]','decimal(10,4)') AS HourTo
,r.value(N'(Column[#Name="Rate"]/#Value)[1]','decimal(10,4)') AS Rate
INTO #derivedTable
FROM #xmltable AS t
CROSS APPLY t.DataValue.nodes(N'/root/row') AS A(r);
--The query to re-build it
WITH XMLNAMESPACES('http://james.newtonking.com/projects/json' AS json)
SELECT 'true' AS [#json:Array]
,t.RowNumber AS [#RowNumber]
,'true' AS [Column/#json:Array]
,'Number' AS [Column/#Name]
,t.Number AS [Column/#Value]
,''
,'true' AS [Column/#json:Array]
,'HourFrom' AS [Column/#Name]
,t.HourFrom AS [Column/#Value]
,''
,'true' AS [Column/#json:Array]
,'HourTo' AS [Column/#Name]
,t.HourTo AS [Column/#Value]
,''
,'true' AS [Column/#json:Array]
,'Rate' AS [Column/#Name]
,t.Rate AS [Column/#Value]
,''
,'ValidationComments' AS [Column/#Name]
,'SomeValue' AS [Column/#Value]
FROM #derivedTable AS t
FOR XML PATH('row'),ROOT('root');
--Clean up (carefull with real data!)
GO
DROP TABLE #derivedTable;
DROP TABLE #xmltable
This is the result
<root xmlns:json="http://james.newtonking.com/projects/json">
<row json:Array="true" RowNumber="1">
<Column json:Array="true" Name="Number" Value="1" />
<Column json:Array="true" Name="HourFrom" Value="13.2000" />
<Column json:Array="true" Name="HourTo" Value="13.3000" />
<Column json:Array="true" Name="Rate" Value="0.8950" />
<Column Name="ValidationComments" Value="SomeValue" />
</row>
<row json:Array="true" RowNumber="2">
<Column json:Array="true" Name="Number" Value="1" />
<Column json:Array="true" Name="HourFrom" Value="13.3000" />
<Column json:Array="true" Name="HourTo" Value="13.4000" />
<Column json:Array="true" Name="Rate" Value="0.9070" />
<Column Name="ValidationComments" Value="SomeValue" />
</row>
</root>
UPDATE
Try this query, it will work for all different column lists, but it will repeat the namespace declaration. This is not wrong, but very annoying. At the moment I do not have the time to think about a hack. Let me know, if this works for you.
WITH XMLNAMESPACES('http://james.newtonking.com/projects/json' AS json)
,CTE AS
(
SELECT r.value(N'#RowNumber','int') AS RowNumber
,r.query('./*') AS TheContent
FROM #xmltable AS t
CROSS APPLY t.DataValue.nodes(N'/root/row') AS A(r)
)
SELECT CTE.TheContent AS [*]
,'ValidationComments' AS [Column/#Name]
,'SomeValue' AS [Column/#Value]
FROM CTE
FOR XML PATH('row'),ROOT('root')

How to Insert xml data into SQL Server table?

How to import below XML data into SQL Server table with three columns?
<dataset>
<metadata>
<item name="NAME_LAST" type="xs:string" length="62" />
<item name="NAME_FIRST" type="xs:string" length="62" />
<item name="NAME_MIDDLE" type="xs:string" length="32" />
</metadata>
<data>
<row>
<value>SMITH</value>
<value>MARY</value>
<value>N</value>
</row>
<row>
<value>SMITH2</value>
<value>MARY2</value>
<value>N2</value>
</row>
</data>
</dataset>
Try this:
DECLARE #input XML = '<dataset>
<metadata>
<item name="NAME_LAST" type="xs:string" length="62" />
<item name="NAME_FIRST" type="xs:string" length="62" />
<item name="NAME_MIDDLE" type="xs:string" length="32" />
</metadata>
<data>
<row>
<value>SMITH</value>
<value>MARY</value>
<value>N</value>
</row>
<row>
<value>SMITH2</value>
<value>MARY2</value>
<value>N2</value>
</row>
</data>
</dataset>'
INSERT INTO dbo.YourTable(ColName, ColFirstName, ColOther)
SELECT
Name = XCol.value('(value)[1]','varchar(25)'),
FirstName = XCol.value('(value)[2]','varchar(25)'),
OtherValue = XCol.value('(value)[3]','varchar(25)')
FROM
#input.nodes('/dataset/data/row') AS XTbl(XCol)
Insert XML Data into sql Server table
Declare #retValue1 varchar(50);
Declare #XmlStr XML;
SET #XmlStr='<Customers>
<customer>
<ID>111589</ID>
<FirstName>name1</FirstName>
<LastName>Lname1</LastName>
<Company>ABC</Company>
</customer>
<customer>
<ID>12345</ID>
<FirstName>name2</FirstName>
<LastName>Lname2</LastName>
<Company>ABC</Company>
</customer>
<customer>
<ID>14567</ID>
<FirstName>name3</FirstName>
<LastName>Lname3</LastName>
<Company>DEF</Company>
</customer>
</Customers>';
#retValue='Failed';
INSERT INTO [test_xmlinsert](
[id],
[firstName],
[lastName],
[company]
)
SELECT
COALESCE([Table].[Column].value('ID[1]', 'int'),0) as 'ID',
[Table].[Column].value('FirstName [1]', 'varchar(50)') as ' FirstName ',
[Table].[Column].value(' LastName[1]', 'varchar(50)') as ' LastName',
[Table].[Column].value(' Company [1]', 'varchar(50)') as ' Company'
FROM #XmlStr.nodes('/ Customers / customer') as [Table]([Column])
IF(##ROWCOUNT > 0 )
SET #retValue='SUCCESS';

Validating individual XML elements in SQL Server 2008R2

I'm writing a stored procedure to process XML data uploaded by the user:
<People>
<Person Id="1" FirstName="..." LastName="..." />
<Person Id="2" FirstName="..." LastName="..." />
<Person Id="3" FirstName="..." LastName="..." />
<Person Id="4" FirstName="..." LastName="..." />
<Person Id="5" FirstName="..." LastName="..." />
</People>
I would like to use a schema to make sure that the entities are valid, but I don't want the entire process to fail just because of one invalid entity. Instead, I would like to log all invalid entities to a table and process the valid entities as normal.
Is there a recommended way to do this?
A pure SQL approach would be:
Create a schema collection that defines <Person>:
CREATE XML SCHEMA COLLECTION [dbo].[testtest] AS
N'<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:element name="Person">
<xs:complexType>
<xs:attribute name="Id" type="xs:int" use="required"/>
<xs:attribute name="FirstName" type="xs:string" use="required"/>
<xs:attribute name="LastName" type="xs:string" use="required"/>
</xs:complexType>
</xs:element>
</xs:schema>
'
(one-time operation)
Have an XML query that selects each <Person> node from <People> as a separate row.
Declare a cursor on that query and select each row into an untyped xml variable. After the select, try to assign to a typed xml variable from within a try-catch block.
Resulting code would look like:
declare #source xml = N'
<People>
<Person Id="1" FirstName="..." LastName="..." />
<Person Id="2" FirstName="..." LastName="..." />
<Person Id="f" FirstName="..." LastName="..." />
<Person Id="4" FirstName="..." LastName="..." />
<Person Id="5" FirstName="..." LastName="..." />
</People>';
declare foo cursor
local
forward_only
read_only
for
select t.p.query('.')
from #source.nodes('People/Person') as t(p)
;
declare #x xml (dbo.testtest);
declare #x_raw xml;
open foo;
fetch next from foo into #x_raw;
while ##fetch_status = 0
begin
begin try
set #x = #x_raw;
print cast(#x_raw as nvarchar(max)) + ': OK';
end try
begin catch
print cast(#x_raw as nvarchar(max)) + ': FAILED';
end catch;
fetch next from foo into #x_raw;
end;
close foo;
deallocate foo;
Result:
<Person Id="1" FirstName="..." LastName="..."/>: OK
<Person Id="2" FirstName="..." LastName="..."/>: OK
<Person Id="f" FirstName="..." LastName="..."/>: FAILED
<Person Id="4" FirstName="..." LastName="..."/>: OK
<Person Id="5" FirstName="..." LastName="..."/>: OK
A simpler option is to create a CLR stored procedure that would parse XML in a .NET language.

Resources