T-SQL Regex range in XML method nodes() - sql-server

I have a table T with two columns. Column A is a varchar column and Column B is a XML column.
Somewhere inside Column B there is always the following parent tag: <Documents> ... </Documents>. Inside there are some <Document>...</Document> children.
I would like to get a result set with two columns:
Column 1 should contain the same values of Column A;
Column 2 should contain the content of one <Document>...</Document> only.
Also, in column 2 there should only be <Document>s with attribute equal to 1, 2 or 3.
E.g. Starting table T:
Column A | Column B
--------------------------------------------------------------------------
abc | <Documents><Document ID="1">Doc Foo</Document><Document ID="4">Doc Bar</Document></Documents>
def | <Documents><Document ID="2">Doc Foo2</Document><Document ID="3">Doc Bar2</Document></Documents>
Expected result:
Column 1 | Column 2
-------------------------------------
abc |<Document ID="1">Doc Foo</Document>
def |<Document ID="2">Doc Foo2</Document>
def |<Document ID="3">Doc Bar2</Document>
I can get the expected result like this:
SELECT
[Column A] AS [Column 1]
,T2.c.query('.') AS [Column 2]
FROM T AS tbl
CROSS APPLY T.nodes('*/Documents/Document[#ID="1" or #ID="2" or #ID="3"]') AS T2(c)
But when filter conditions are more complex it can get out of control without regex.
This does not work:
SELECT
[Column A] AS [Column 1]
,T2.c.query('.') AS [Column 2]
FROM T AS tbl
CROSS APPLY T.nodes('*/Documents/Document[#ID="[1-3]"]') AS T2(c)
How to make it work?

To expand on what #JeroenMostert suggests, and if he's correct, then perhaps what you are after is:
WITH YourTable AS(
SELECT *
FROM (VALUES('abc',CONVERT(xml,'<Documents><Document ID="1">Doc Foo</Document><Document ID="4">Doc Bar</Document></Documents>')),
('def',CONVERT(xml,'<Documents><Document ID="2">Doc Foo2</Document><Document ID="3">Doc Bar2</Document></Documents>')))V(Col1,Col2))
SELECT YT.Col1,
Ds.D.query('.')
FROM YourTable YT
CROSS APPLY YT.Col2.nodes('Documents/Document') Ds(D)
WHERE Ds.D.value('#ID','int') IN (1,2,3);

You could filter on the attribute to appear in a sequence of values:
Document[#ID = (1,2,3)]

You were very close. XQuery is based on the notion of sequences. So you need to check if the #ID attribute value is a member of the sequence.
SQL
-- DDL and sample data population, start
DECLARE #tbl TABLE (ID CHAR(3), xmldata XML);;
INSERT INTO #tbl (ID, xmldata)
VALUES
('abc', '<Documents><Document ID="1">Doc Foo</Document><Document ID="4">Doc Bar</Document></Documents>')
, ('def', '<Documents><Document ID="2">Doc Foo2</Document><Document ID="3">Doc Bar2</Document></Documents>');
-- DDL and sample data population, end
SELECT ID
, c.query('.') AS [Column 2]
FROM #tbl AS tbl
CROSS APPLY tbl.xmldata.nodes('/Documents/Document[#ID=("1","2","3")]') AS t(c);
Output
+-----+--------------------------------------+
| ID | Column 2 |
+-----+--------------------------------------+
| abc | <Document ID="1">Doc Foo</Document> |
| def | <Document ID="2">Doc Foo2</Document> |
| def | <Document ID="3">Doc Bar2</Document> |
+-----+--------------------------------------+

Related

Split a string in SQL by hyphen in 2012 version

I have multiple string in a column where I have get last string after column
Below are three example like same I have different number hyphen that can occur in a string but desired result is I have string before last hyphen
1. abc-def-Opto
2. abc-def-ijk-5C-hello-Opto
3. abc-def-ijk-4C-hi-Build
4. abc-def-ijk-4C-123-suppymanagement
Desired result set is
def
hello
hi
123
How to do this in SQL query to get this result set. I have MSSQL 2012 version
Require a generic sql which can get the result set
There are many ways to split/parse a string. ParseName() would fail because you may have more than 4 positions.
One option (just for fun), is to use a little XML.
We reverse the string
Convert into XML
Grab the second node
Reverse the desired value for the final presentation
Example
Declare #YourTable Table ([SomeCol] varchar(50))
Insert Into #YourTable Values
('abc-def-Opto')
,('abc-def-ijk-5C-hello-Opto')
,('abc-def-ijk-4C-hi-Build')
,('abc-def-ijk-4C-123-suppymanagement')
Select *
,Value = reverse(convert(xml,'<x>'+replace(reverse(SomeCol),'-','</x><x>')+'</x>').value('x[2]','varchar(150)'))
from #YourTable
Returns
SomeCol Value
abc-def-Opto def
abc-def-ijk-5C-hello-Opto hello
abc-def-ijk-4C-hi-Build hi
abc-def-ijk-4C-123-suppymanagement 123
Without getting into XML stuff, simply using string functions of sql server.
Declare #YourTable Table ([SomeCol] varchar(50))
Insert Into #YourTable Values
('abc-def-Opto')
,('abc-def-ijk-5C-hello-Opto')
,('abc-def-ijk-4C-hi-Build')
,('abc-def-ijk-4C-123-suppymanagement');
SELECT *
,RTRIM(LTRIM(REVERSE(
SUBSTRING(
SUBSTRING(REVERSE([SomeCol]) , CHARINDEX('-', REVERSE([SomeCol])) +1 , LEN([SomeCol]) )
, 1 , CHARINDEX('-', SUBSTRING(REVERSE([SomeCol]) , CHARINDEX('-', REVERSE([SomeCol])) +1 , LEN([SomeCol]) ) ) -1
)
)))
FROM #YourTable
i am not sure this script will exactly useful to your requirement but i am just trying to give an idea how to split the data
IF OBJECT_ID('tempdb..#Temp')IS NOT NULL
DROP TABLE #Temp
;WITH CTE(Id,data)
AS
(
SELECT 1,'abc-def-Opto' UNION ALL
SELECT 2,'abc-def-ijk-5C-hello-Opto' UNION ALL
SELECT 3,'abc-def-ijk-4C-hi-Build' UNION ALL
SELECT 4,'abc-def-ijk-4C-123-suppymanagement'
)
,Cte2
AS
(
SELECT Id, CASE WHEN Id=1 AND Setdata=1 THEN data
WHEN Id=2 AND Setdata=2 THEN data
WHEN Id=3 AND Setdata=3 THEN data
WHEN Id=4 AND Setdata=4 THEN data
ELSE NULL
END AS Data
FROM
(
SELECT Id,
Split.a.value('.','nvarchar(1000)') AS Data,
ROW_NUMBER()OVER(PARTITION BY id ORDER BY id) AS Setdata
FROM(
SELECT Id,
CAST('<S>'+REPLACE(data ,'-','</S><S>')+'</S>' AS XML) AS data
FROM CTE
) AS A
CROSS APPLY data.nodes('S') AS Split(a)
)dt
)
SELECT * INTO #Temp FROM Cte2
SELECT STUFF((SELECT DISTINCT ', '+ 'Set_'+CAST(Id AS VARCHAR(10))+':'+Data
FROM #Temp WHERE ISNULL(Data,'')<>'' FOR XML PATH ('')),1,1,'')
Result
Set_1:abc, Set_2:def, Set_3:ijk, Set_4:4C
You can do like
WITH CTE AS
(
SELECT 1 ID,'abc-def-Opto' Str
UNION
SELECT 2, 'abc-def-ijk-5C-hello-Opto'
UNION
SELECT 3, 'abc-def-ijk-4C-hi-Build'
UNION
SELECT 4, 'abc-def-ijk-4C-123-suppymanagement'
)
SELECT ID,
REVERSE(LEFT(REPLACE(P2, P1, ''), CHARINDEX('-', REPLACE(P2, P1, ''))-1)) Result
FROM (
SELECT LEFT(REVERSE(Str), CHARINDEX('-', REVERSE(Str))) P1,
REVERSE(Str) P2,
ID
FROM CTE
) T;
Returns:
+----+--------+
| ID | Result |
+----+--------+
| 1 | def |
| 2 | hello |
| 3 | hi |
| 4 | 123 |
+----+--------+
Demo

Returning Field names as part of a SQL Query

I need to write a Sql Satement that gets passed any valid SQL subquery, and return the the resultset, WITH HEADERS.
Somehow i need to interrogate the resultset, get the fieldnames and return them as part of a "Union" with the origional data, then pass the result onwards for exporting.
Below my attempt: I have a Sub-Query Callled "A", wich returns a dataset and i need to query it for its fieldnames. ?ordinally maybe?
select A.fields[0].name, A.fields[1].name, A.fields[2].name from
(
Select 'xxx1' as [Complaint Mechanism] , 'xxx2' as [Actual Achievements]
union ALL
Select 'xxx3' as [Complaint Mechanism] , 'xxx4' as [Actual Achievements]
union ALL
Select 'xxx5' as [Complaint Mechanism] , 'xxx6' as [Actual Achievements] ) as A
Any pointers would be appreciated (maybe i am just missing the obvious...)
The Resultset should look like the table below:
F1 F2
--------------------- ---------------------
[Complaint Mechanism] [Actual Achievements]
xxx1 xxx2
xxx3 xxx4
xxx5 xxx6
If you have a static number of columns, you can put your data into a temp table and then query tempdb.sys.columns to get the column names, which you can then union on top of your data. If you will have a dynamic number of columns, you will need to use dynamic SQL to build your pivot statement but I'll leave that up to you to figure out.
The one caveat here is that all data under your column names will need to be converted to strings:
select 1 a, 2 b
into #a;
select [1] as FirstColumn
,[2] as SecondColumn
from (
select column_id
,name
from tempdb.sys.columns
where object_id = object_id('tempdb..#a')
) d
pivot (max(name)
for column_id in([1],[2])
) pvt
union all
select cast(a as nvarchar(100))
,cast(b as nvarchar(100))
from #a;
Query Results:
| FirstColumn | SecondColumn |
|-------------|--------------|
| a | b |
| 1 | 2 |

SQL - Selecting Multi Keyword Values From Single Field

So I'm building a SQL view for more readable data I'll use to report on. I have one table that houses field data (keyword ids) for questions within a section on a website. Column3 is a multi-keyword field and stored in the DB delimited by chr(185).
Table 1
Column1 | Column2 | Column3
4456 | 2323 | ¹8661¹8662¹
I have a second table that houses keyword ids and their values.
Table 2
Column1 | Column2
4456 | val1
2323 | val2
8661 | val3
8662 | val4
The view joins the tables to display the keyword values, but I'm not sure how to handle the multi-keyword field (looking to format the result like below).
View Table
Column1 | Column2 | Column3
val1 | val2 | val3; val4
Would I need some sort of function to accomplish this or is there another way?
Here's one way to do it using some of SQL Server's XML features ..sorry the column and table names make this kind of difficult to read, here's a fiddle:
with mapped (c1, c2, val) as (
select t1.column1, t1.column2, t2.column2
from table1 t1
cross apply (select convert(xml, '<_' + replace(substring(column3,2,len(column3)-2), '¹', '/><_') + '/>')) s(c)
cross apply c.nodes('*') x(n)
join table2 t2 on t2.column1=n.value('fn:substring(local-name(.),2)', 'int'))
select
column1 = c1.column2,
column2 = c2.column2,
column3 = (select val + '; ' from mapped where c1=t1.column1 and c2=t1.column2 for xml path(''))
from table1 t1
join table2 c1 on c1.column1=t1.column1
join table2 c2 on c2.column1=t1.column2

TSQL - View with cross apply and pivot

this is my base table:
docID | rowNumber | Column1 | Column2 | Column3
I use cross apply and pivot to transform the records in Column1 to actual columns and use the values in column2 and column3 as records for the new columns. In my fiddle you can see base and transformed select statement.
I have columns like Plant and Color which are numbered, e.g. Plant1, Plant2, Plant3, Color1, Color2 etc.
For each plant that exists in all plant columns I want to create a new row with a comma separated list of colors in one single column.
What I want to achieve is also in below screenshot:
This should become a view to use in Excel. How do I need to modify the view to get to the desired result?
Additional question: The Length-column is numeric. Is there any way to switch the decimal separator from within Excel as a user and apply it to this or all numeric column(s) so that it will be recognized by Excel as a number?
I used to have an old php web query where I would pass the separator from a dropdown cell in Excel as a parameter.
Thank you.
First off, man the way your data is stored is a mess. I would recommend reading up on good data structures and fixing yours if you can. Here's a TSQL query that gets you the data in the correct format.
WITH CTE_no_nums
AS
(
SELECT docID,
CASE
WHEN PATINDEX('%[0-9]%',column1) > 0
THEN SUBSTRING(column1,0,PATINDEX('%[0-9]%',column1))
ELSE column1
END AS cols,
COALESCE(column2,column3) AS vals
FROM miscValues
WHERE column2 IS NOT NULL
OR column3 IS NOT NULL
),
CTE_Pivot
AS
(
SELECT docID,partNumber,prio,[length],material
FROM CTE_no_nums
PIVOT
(
MAX(vals) FOR cols IN (partNumber,prio,[length],material)
) pvt
)
SELECT A.docId + ' # ' + B.vals AS [DocID # Plant],
A.docID,
A.partNumber,
A.prio,
B.vals AS Plant,
A.partNumber + '#' + A.material + '#' + A.[length] AS Identification,
A.[length],
SUBSTRING(CA.colors,0,LEN(CA.colors)) colors --substring removes last comma
FROM CTE_Pivot A
INNER JOIN CTE_no_nums B
ON A.docID = B.docID
AND B.cols = 'Plant'
CROSS APPLY ( SELECT vals + ','
FROM CTE_no_nums C
WHERE cols = 'Color'
AND C.docID = A.docID
FOR XML PATH('')
) CA(colors)
Results:
DocID # Plant docID partNumber prio Plant Identification length colors
---------------- ------ ---------- ---- ---------- ------------------ ------- -------------------------
D0001 # PlantB D0001 X001 1 PlantB X001#MA123#10.87 10.87 white,black,blue
D0001 # PlantC D0001 X001 1 PlantC X001#MA123#10.87 10.87 white,black,blue
D0002 # PlantA D0002 X002 2 PlantA X002#MA456#16.43 16.43 black,yellow
D0002 # PlantC D0002 X002 2 PlantC X002#MA456#16.43 16.43 black,yellow
D0002 # PlantD D0002 X002 2 PlantD X002#MA456#16.43 16.43 black,yellow

Query to find the record with most matching columns, where the number of columns and names of columns is unknown?

I have two tables, X and Y, with identical schema but different records. Given a record from X, I need a query to find the closest matching record in Y that contains NULL values for non-matching columns. Identity columns should be excluded from the comparison. For example, if my record looked like this:
------------------------
id | col1 | col2 | col3
------------------------
0 |'abc' |'def' | 'ghi'
And table Y looked like this:
------------------------
id | col1 | col2 | col3
------------------------
6 |'abc' |'def' | 'zzz'
8 | NULL |'def' | NULL
Then the closest match would be record 8, since where the columns don't match, there are NULL values. 6 WOULD have been the closest match, but the 'zzz' disqualified it.
What's unique about this problem is that the schema of the tables is unknown besides the id column and the data types. There could be 4 columns, or there could be 7 columns. We just don't know - it's dynamic. All we know is that there is going to be an 'id' column and that the columns will be strings, either varchar or nvarchar.
What is the best query in this case to pick the closest matching record out of Y, given a record from X? I'm actually writing a function. The input is an integer (the id of a record in X) and the output is an integer (the id of a record in Y, or NULL). I'm an SQL novice, so a brief explanation of what's happening in your solution would help me greatly.
There could be 4 columns, or there could be 7 columns.... I'm actually writing a function.
This is an impossible task. Because functions are deterministic, so you cannot have a function that will work on an arbitrary table structure, using dynamic SQL. A stored procedure, sure, but not a function.
However, the below shows you a way using FOR XML and some decomposing of the XML to unpivot rows into column names and values which can then be compared. The technique used here and the queries can be incorporated into a stored procedure.
MS SQL Server 2008 Schema Setup:
-- this is the data table to match against
create table t1 (
id int,
col1 varchar(10),
col2 varchar(20),
col3 nvarchar(40));
insert t1
select 6, 'abc', 'def', 'zzz' union all
select 8, null , 'def', null;
-- this is the data with the row you want to match
create table t2 (
id int,
col1 varchar(10),
col2 varchar(20),
col3 nvarchar(40));
insert t2
select 0, 'abc', 'def', 'ghi';
GO
Query 1:
;with unpivoted1 as (
select n.n.value('local-name(.)','nvarchar(max)') colname,
n.n.value('.','nvarchar(max)') value
from (select (select * from t2 where id=0 for xml path(''), type)) x(xml)
cross apply x.xml.nodes('//*[local-name()!="id"]') n(n)
), unpivoted2 as (
select x.id,
n.n.value('local-name(.)','nvarchar(max)') colname,
n.n.value('.','nvarchar(max)') value
from (select id,(select * from t1 where id=outr.id for xml path(''), type) from t1 outr) x(id,xml)
cross apply x.xml.nodes('//*[local-name()!="id"]') n(n)
)
select TOP(1) WITH TIES
B.id,
sum(case when A.value=B.value then 1 else 0 end) matches
from unpivoted1 A
join unpivoted2 B on A.colname = B.colname
group by B.id
having max(case when A.value <> B.value then 1 end) is null
ORDER BY matches;
Results:
| ID | MATCHES |
----------------
| 8 | 1 |

Resources