Related
Can I return a value at a particular position with the STRING_SPLIT function in SQL Server 2016 or higher?
I know the order from a select is not guaranteed, but is it with STRING_SPLIT?
DROP TABLE IF EXISTS #split
SELECT 'z_y_x' AS splitIt
INTO #split UNION
SELECT 'a_b_c'
SELECT * FROM #split;
WITH cte
AS (
SELECT ROW_NUMBER() OVER ( PARTITION BY s.splitIt ORDER BY s.splitIt ) AS position,
s.splitIt,
value
FROM #split s
CROSS APPLY STRING_SPLIT(s.splitIt, '_')
)
SELECT * FROM cte WHERE position = 2
Will this always return the value at the 2nd element? b for a_b_c and y for z_y_x?
I don't understand why Microsoft doesn't return a position indicator column alongside the value for this function.
There is - starting with v2016 - a solution via FROM OPENJSON():
DECLARE #str VARCHAR(100) = 'val1,val2,val3';
SELECT *
FROM OPENJSON('["' + REPLACE(#str,',','","') + '"]');
The result
key value type
0 val1 1
1 val2 1
2 val3 1
The documentation tells clearly:
When OPENJSON parses a JSON array, the function returns the indexes of the elements in the JSON text as keys.
For your case this was:
SELECT 'z_y_x' AS splitIt
INTO #split UNION
SELECT 'a_b_c'
DECLARE #delimiter CHAR(1)='_';
SELECT *
FROM #split
CROSS APPLY OPENJSON('["' + REPLACE(splitIt,#delimiter,'","') + '"]') s
WHERE s.[key]=1; --zero based
Let's hope, that future versions of STRING_SPLIT() will include this information
UPDATE Performance tests, compare with popular Jeff-Moden-splitter
Try this out:
USE master;
GO
CREATE DATABASE dbTest;
GO
USE dbTest;
GO
--Jeff Moden's splitter
CREATE FUNCTION [dbo].[DelimitedSplit8K](#pString VARCHAR(8000), #pDelimiter CHAR(1))
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (
SELECT TOP (ISNULL(DATALENGTH(#pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(#pString,t.N,1) = #pDelimiter
),
cteLen(N1,L1) AS(
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(#pDelimiter,#pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(#pString, l.N1, l.L1)
FROM cteLen l
;
GO
--Avoid first call bias
SELECT * FROM dbo.DelimitedSplit8K('a,b,c',',');
GO
--Table to keep the results
CREATE TABLE Results(ID INT IDENTITY,ResultSource VARCHAR(100),durationMS INT, RowsCount INT);
GO
--Table with strings to split
CREATE TABLE dbo.DelimitedItems(ID INT IDENTITY,DelimitedNString nvarchar(4000),DelimitedString varchar(8000));
GO
--Get rows wiht randomly mixed strings of 100 items
--Try to play with the count of rows (count behind GO) and the count with TOP
INSERT INTO DelimitedItems(DelimitedNString)
SELECT STUFF((
SELECT TOP 100 ','+REPLACE(v.[name],',',';')
FROM master..spt_values v
WHERE LEN(v.[name])>0
ORDER BY NewID()
FOR XML PATH('')),1,1,'')
--Keep it twice in varchar and nvarchar
UPDATE DelimitedItems SET DelimitedString=DelimitedNString;
GO 500 --create 500 differently mixed rows
--The tests
DECLARE #d DATETIME2;
SET #d = SYSUTCDATETIME();
SELECT DI.ID, DS.Item, DS.ItemNumber
INTO #TEMP
FROM dbo.DelimitedItems DI
CROSS APPLY dbo.DelimitedSplit8K(DI.DelimitedNString,',') DS;
INSERT INTO Results(ResultSource,RowsCount,durationMS)
SELECT 'delimited8K with NVARCHAR(4000)'
,(SELECT COUNT(*) FROM #TEMP) AS RowCountInTemp
,DATEDIFF(MILLISECOND,#d,SYSUTCDATETIME()) AS Duration_NV_ms_delimitedSplit8K
SET #d = SYSUTCDATETIME();
SELECT DI.ID, DS.Item, DS.ItemNumber
INTO #TEMP2
FROM dbo.DelimitedItems DI
CROSS APPLY dbo.DelimitedSplit8K(DI.DelimitedString,',') DS;
INSERT INTO Results(ResultSource,RowsCount,durationMS)
SELECT 'delimited8K with VARCHAR(8000)'
,(SELECT COUNT(*) FROM #TEMP2) AS RowCountInTemp
,DATEDIFF(MILLISECOND,#d,SYSUTCDATETIME()) AS Duration_V_ms_delimitedSplit8K
SET #d = SYSUTCDATETIME();
SELECT DI.ID, OJ.[Value] AS Item, OJ.[Key] AS ItemNumber
INTO #TEMP3
FROM dbo.DelimitedItems DI
CROSS APPLY OPENJSON('["' + REPLACE(DI.DelimitedNString,',','","') + '"]') OJ;
INSERT INTO Results(ResultSource,RowsCount,durationMS)
SELECT 'OPENJSON with NVARCHAR(4000)'
,(SELECT COUNT(*) FROM #TEMP3) AS RowCountInTemp
,DATEDIFF(MILLISECOND,#d,SYSUTCDATETIME()) AS Duration_NV_ms_OPENJSON
SET #d = SYSUTCDATETIME();
SELECT DI.ID, OJ.[Value] AS Item, OJ.[Key] AS ItemNumber
INTO #TEMP4
FROM dbo.DelimitedItems DI
CROSS APPLY OPENJSON('["' + REPLACE(DI.DelimitedString,',','","') + '"]') OJ;
INSERT INTO Results(ResultSource,RowsCount,durationMS)
SELECT 'OPENJSON with VARCHAR(8000)'
,(SELECT COUNT(*) FROM #TEMP4) AS RowCountInTemp
,DATEDIFF(MILLISECOND,#d,SYSUTCDATETIME()) AS Duration_V_ms_OPENJSON
GO
SELECT * FROM Results;
GO
--Clean up
DROP TABLE #TEMP;
DROP TABLE #TEMP2;
DROP TABLE #TEMP3;
DROP TABLE #TEMP4;
USE master;
GO
DROP DATABASE dbTest;
Results:
200 items in 500 rows
1220 delimited8K with NVARCHAR(4000)
274 delimited8K with VARCHAR(8000)
417 OPENJSON with NVARCHAR(4000)
443 OPENJSON with VARCHAR(8000)
100 items in 500 rows
421 delimited8K with NVARCHAR(4000)
140 delimited8K with VARCHAR(8000)
213 OPENJSON with NVARCHAR(4000)
212 OPENJSON with VARCHAR(8000)
100 items in 5 rows
10 delimited8K with NVARCHAR(4000)
5 delimited8K with VARCHAR(8000)
3 OPENJSON with NVARCHAR(4000)
4 OPENJSON with VARCHAR(8000)
5 items in 500 rows
32 delimited8K with NVARCHAR(4000)
30 delimited8K with VARCHAR(8000)
28 OPENJSON with NVARCHAR(4000)
24 OPENJSON with VARCHAR(8000)
--unlimited length (only possible with OPENJSON)
--Wihtout a TOP clause while filling
--results in about 500 items in 500 rows
1329 OPENJSON with NVARCHAR(4000)
1117 OPENJSON with VARCHAR(8000)
Facit:
the popular splitter function does not like NVARCHAR
the function is limited to strings within 8k byte volumen
Only the case with many items and many rows in VARCHAR lets the splitter function be ahead.
In all other cases OPENJSON seems to be more or less faster...
OPENJSON can deal with (almost) unlimited counts
OPENJSON demands for v2016
Everybody is waiting for STRING_SPLIT with the position
UPDATE Added STRING_SPLIT to the test
In the meanwhile I re-run the test with two more test sections using STRING_SPLIT(). As position I had to return a hardcoded value as this function does not return the part's index.
In all tested cases OPENJSON was close with STRING_SPLIT and often faster:
5 items in 1000 rows
250 delimited8K with NVARCHAR(4000)
124 delimited8K with VARCHAR(8000) --this function is best with many rows in VARCHAR
203 OPENJSON with NVARCHAR(4000)
204 OPENJSON with VARCHAR(8000)
235 STRING_SPLIT with NVARCHAR(4000)
234 STRING_SPLIT with VARCHAR(8000)
200 items in 30 rows
140 delimited8K with NVARCHAR(4000)
31 delimited8K with VARCHAR(8000)
47 OPENJSON with NVARCHAR(4000)
31 OPENJSON with VARCHAR(8000)
47 STRING_SPLIT with NVARCHAR(4000)
31 STRING_SPLIT with VARCHAR(8000)
100 items in 10.000 rows
8145 delimited8K with NVARCHAR(4000)
2806 delimited8K with VARCHAR(8000) --fast with many rows!
5112 OPENJSON with NVARCHAR(4000)
4501 OPENJSON with VARCHAR(8000)
5028 STRING_SPLIT with NVARCHAR(4000)
5126 STRING_SPLIT with VARCHAR(8000)
The simple answer is, no. Microsoft so far have refused to provide Ordinal position as part of the return dataset in STRING_SPLIT. You'll need to use a different solution I'm afraid. For example Jeff Moden's DelimitedSplit8k.
(Yes, I realise this is more or less a link only answer, however, pasting Jeff's solution here would effectively be plagiarism).
If you were to use Jeff's solution, then you would be able to do something like:
SELECT *
FROM dbo.DelimitedSplit8K('a,b,c,d,e,f,g,h,i,j,k',',') DS
WHERE ItemNumber = 2;
Of course, you'd likely be passing column rather than a literal string.
I just extended #Shnugo's answer if the splitted text would contain line breaks, unicode and other non json compatible characters, to use
STRING_ESCAPE
My Test code with pipe as separator instead comma:
DECLARE #Separator VARCHAR(5) = STRING_ESCAPE('|', 'json'); -- here pipe or use any other separator (even ones escaped by json)
DECLARE #LongText VARCHAR(MAX) = 'Albert says: "baby, listen!"|ve Çağrı söylüyor: "Elma"|1st Line' + CHAR(13) + CHAR(10) + '2nd line';
SELECT * FROM OPENJSON('["' + REPLACE(STRING_ESCAPE(#LongText, 'json'), #Separator ,'","') + '"]'); -- ok
-- SELECT * FROM OPENJSON('["' + REPLACE(#LongText, #Separator ,'","') + '"]'); -- fails with: JSON text is not properly formatted. ...
Updated due to comment from Simon Zeinstra
I didn't want to deal with OPENJSON, but still wanted to get string_split() value by index.
The performance was not an issue in my case.
I used CTE (Common Table Expression)
Assume you have string str = "part1 part2 part3".
WITH split_res_list as
(
SELECT value FROM STRING_SPLIT('part1 part2 part3', ' ')
),
split_res_list_with_index as
(
SELECT [value],
ROW_NUMBER() OVER (ORDER BY [value] ASC) as [RowNumber]
FROM split_res_list
)
SELECT * FROM split_res_list_with_index WHERE RowNumber = 2
BUT: please be aware that the order of 3 parts is changed according to ORDER BY condition!
The output for the second row with "part2" value:
Using STRING_SPLIT:
STRING_SPLIT ( string , separator [ , enable_ordinal ] )
enable_ordinal
An int or bit expression that serves as a flag to enable or disable the ordinal output column. A value of 1 enables the ordinal column. If enable_ordinal is omitted, NULL, or has a value of 0, the ordinal column is disabled.
The enable_ordinal argument and ordinal output column are currently only supported in Azure SQL Database, Azure SQL Managed Instance, and Azure Synapse Analytics (serverless SQL pool only).
Query:
SELECT value FROM STRING_SPLIT('part1_part2_part3', '_', 1) WHERE ordinal = 2;
Here is my workaround. I will follow the Question waiting for a better answer:
UPDATED: Original code did not take into consideration if a word contains another.
UPDATE 2: Performance was horrible in production so i have to think another way. you have it at the end as option 2, implementation for table.
UPDATE 3: Added code for UDF in the implementation in a string.
Implementation in a string:
declare #a as nvarchar(100) = 'Lorem ipsum dolor dol ol sit amet. D Lorem DO ipsum DOL dolor sit amet. DOLORES ipsum';
WITH T AS (
SELECT T1.value
,charindex(' ' + T1.value + ' ',' ' + #a + ' ' ,0) AS INDX
,RN = ROW_NUMBER() OVER (PARTITION BY value order BY value)
FROM STRING_SPLIT(#a, ' ') AS T1
WHERE T1.value <> ''
),
R (VALUE,INDX,RN) AS (
SELECT *
FROM T
WHERE T.RN = 1
UNION ALL
SELECT T.VALUE
,charindex(' ' + T.value + ' ',' ' + #a + ' ',R.INDX + 1) AS INDX
,T.RN
FROM T
JOIN R
ON T.value = R.VALUE
AND T.RN = R.RN + 1
)
SELECT * FROM R ORDER BY INDX
result:
tableOfResults
UDF:
CREATE FUNCTION DBO.UDF_get_word(#string nvarchar(100),#wordNumber int)
returns nvarchar(100)
AS
BEGIN
DECLARE #searchedWord nvarchar(100);
WITH T AS (
SELECT T1.value
,charindex(' ' + T1.value + ' ',' ' + #string + ' ' ,0) AS INDX
,RN = ROW_NUMBER() OVER (PARTITION BY value order BY value)
FROM STRING_SPLIT(#string, ' ') AS T1
WHERE T1.value <> ''
),
R (VALUE,INDX,RN) AS (
SELECT *
FROM T
WHERE T.RN = 1
UNION ALL
SELECT T.VALUE
,charindex(' ' + T.value + ' ',' ' + #string + ' ',R.INDX + 1) AS INDX
,T.RN
FROM T
JOIN R
ON T.value = R.VALUE
AND T.RN = R.RN + 1
)
SELECT #searchedWord = (value) FROM ( SELECT *, ORD = ROW_NUMBER() OVER (ORDER BY INDX) FROM R )AS TBL WHERE ORD = #wordNumber
RETURN #searchedword
END
GO
Modification for a column in a table, OPTION 1:
WITH T AS (
SELECT T1.stringToBeSplit
,T1.column1 --column1 is an example of column where stringToBeSplit is the same for more than one record. better to be avoid but if you need to added here it is how just follow column1 over the code
,T1.column2
,T1.value
,T1.column3
/*,...any other column*/
,charindex(' ' + T1.value + ' ',' ' + T1.stringToBeSplit + ' ' ,0) AS INDX
,RN = ROW_NUMBER() OVER (PARTITION BY t1.column1, T1.stringToBeSplit, T1.value order BY T1.column1, T1.T1.stringToBeSplit, T1.value) --any column that create duplicates need to be added here as example i added column1
FROM (SELECT TOP 10 * FROM YourTable D CROSS APPLY string_split(D.stringToBeSplit,' ')) AS T1
WHERE T1.value <> ''
),
R (stringToBeSplit, column1, column2, value, column3, INDX, RN) AS (
SELECT stringToBeSplit, column1, column2, value, column3, INDX, RN
FROM T
WHERE T.RN = 1
UNION ALL
SELECT T.stringToBeSplit, T.column1, column2, T.value, T.column3
,charindex(' ' + T.value + ' ',' ' + T.stringToBeSplit + ' ',R.INDX + 1) AS INDX
,T.RN
FROM T
JOIN R
ON T.value = R.VALUE AND T.COLUMN1 = R.COLUMN1 --any column that create duplicates need to be added here as exapmle i added column1
AND T.RN = R.RN + 1
)
SELECT * FROM R ORDER BY column1, stringToBeSplit, INDX
Modification for a column in a table, OPTION 2 (max performance i could get, main action came from removing the join and finding a way of properly execute (and stop) the recursive loop of the CTE, from 1.30 for 1000 lines to 2 sec for 30K lines of strings of similar type and length):
WITH T AS (
SELECT T1.stringToBeSplit --no extracolumns this time
,T1.value
,charindex(' ' + T1.value + ' ',' ' + T1.stringToBeSplit + ' ' ,0) AS INDX
,RN = ROW_NUMBER() OVER (PARTITION BY T1.stringToBeSplit,T1.value order BY T1.stringToBeSplit,T1.value) --from clause use distinct and where if possible
FROM (SELECT DISTINCT stringToBeSplit, VALUE FROM [your table] D CROSS APPLY string_split(D.stringToBeSplit,' ') WHERE [your filter]) AS T1
WHERE T1.value <> ''
),
R (stringToBeSplit, value, INDX, RN) AS (
SELECT stringToBeSplit, value, INDX, RN
FROM T
WHERE T.RN = 1
UNION ALL
SELECT R.stringToBeSplit, R.value
,charindex(' ' + R.value + ' ',' ' + R.stringToBeSplit + ' ',R.INDX + 1) AS INDX
,R.RN + 1
FROM R
WHERE charindex(' ' + R.value + ' ',' ' + R.stringToBeSplit + ' ',R.INDX + 1) <> 0
)
SELECT * FROM R ORDER BY stringToBeSplit, INDX
For getting the word ordinal instead of SELECT * FROM R USE:
SELECT stringToBeSplit ,value , ROW_NUMBER() OVER (PARTITION BY stringToBeSplit order BY [indX]) AS ORD FROM R
if instead of having one RW per word you prefer one column:
select * FROM (SELECT [name 1],value , ROW_NUMBER() OVER (PARTITION BY [name 1] order BY [indX]) AS ORD FROM R ) as R2
pivot (MAX(VALUE) FOR ORD in ([1],[2],[3]) ) AS PIV
if you don't want to specify the number of columns QUOTNAME() like in this link, in my case i only need first 4 words rest are irrelevant for the moment. Below the code from the page in case link fail:
DECLARE
#columns NVARCHAR(MAX) = '',
#sql NVARCHAR(MAX) = '';
-- select the category names
SELECT
#columns+=QUOTENAME(category_name) + ','
FROM
production.categories
ORDER BY
category_name;
-- remove the last comma
SET #columns = LEFT(#columns, LEN(#columns) - 1);
-- construct dynamic SQL
SET #sql ='
SELECT * FROM
(
SELECT
category_name,
model_year,
product_id
FROM
production.products p
INNER JOIN production.categories c
ON c.category_id = p.category_id
) t
PIVOT(
COUNT(product_id)
FOR category_name IN ('+ #columns +')
) AS pivot_table;';
-- execute the dynamic SQL
EXECUTE sp_executesql #sql;
Last but not least i'm really looking forward to know if there is an easier way with same performance either in SQL server or in C#. i just think everything that does not use external info should stay in the Server and run as query or batch but not sure to be honest as i heard the contrary (specially from people that use panda) but no one have convince me just yet.
This works
Example:
String = "pos1-pos2-pos3"
REVERSE(PARSENAME(REPLACE(REVERSE(String), '-', '.'), 1))
With 1 Returns "pos1"
With 2 will return "pos2"...
I have a table like this
Id Name FromAddress ToAddress
1 Joey ABC JKL
2 Joey DEF MNP
3 Joey GHI OQR
I am looking for the following output with dynamic pivot in SQL Server
Name FromAdrs1 FromAdrs2 FromAdrs3 ToAdrs1 ToAdrs2 ToAdrs3
Joey ABC DEF GHI JKL MNP OQR
Note: The number of rows changes based on the Id value, so I am trying to get the output using dynamic Pivot.
Here is the code that I tried, it's looks like it is correct, but throwing me an error.
IF OBJECT_ID('temp..#temp') IS NOT NULL
DROP TABLE #temp
CREATE TABLE #temp
(
Id INT, Name VARCHAR(10), FromAddress VARCHAR(10), ToAddress VARCHAR(10)
)
INSERT INTO #temp VALUES (1, 'Joey', 'ABC', 'JKL'), (2, 'Joey', 'DEF', 'MNP'), (3, 'Joey', 'GHI', 'OQR')
--SELECT * FROM #temp
DECLARE #colsFrom AS NVARCHAR(MAX),
#colsTo AS NVARCHAR(MAX),
#query AS NVARCHAR(MAX);
SET #colsFrom = STUFF((SELECT distinct ',' + QUOTENAME(CONVERT(VARCHAR(2), t.id) + 'From')
FROM #temp t
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
,1,1,'')
SET #colsTo = STUFF((SELECT distinct ',' + QUOTENAME(CONVERT(VARCHAR(2), t.id) + 'To')
FROM #temp t
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
,1,1,'')
set #query = 'SELECT *, ' + #colsFrom + ', ' + #colsTo + ' from
(
select *
from #temp
) T
PIVOT
(
max(FromAddress)
for Id in (REPLACE('''+#colsFrom+''',''From'',''''))
) p
PIVOT
(
max(ToAddress)
for Id in (REPLACE('''+#colsTo+''',''To'',''''))
) Q'
execute(#query)
DROP TABLE #temp
Any help is appreciated. Thank you all for your time.
Edit: This is the error
I believe the problem is the IN () expression in the PIVOTs. The column list explicitly has to be a list of fields names, not a function and not a list of varchar literals or function values. You've got a REPLACE() function in there. The engine expects to be looking for a field named [REPLACE] and then gets confused by the open parentheses that shows up.
This is valid (square brackets for emphasis):
SELECT VendorID, Employee, Orders
FROM
(SELECT VendorID, Emp1, Emp2, Emp3, Emp4, Emp5
FROM pvt) p
UNPIVOT
(Orders FOR Employee IN
([Emp1], [Emp2], [Emp3], [Emp4], [Emp5])
)AS unpvt;
This is not:
SELECT VendorID, Employee, Orders
FROM
(SELECT VendorID, Emp1, Emp2, Emp3, Emp4, Emp5
FROM pvt) p
UNPIVOT
(Orders FOR Employee IN
('Emp1', 'Emp2', 'Emp3', 'Emp4', 'Emp5')
)AS unpvt;
And this is not valid:
SELECT VendorID, Employee, Orders
FROM
(SELECT VendorID, Emp1, Emp2, Emp3, Emp4, Emp5
FROM pvt) p
UNPIVOT
(Orders FOR Employee IN
(REPLACE('Emp1','1','A'), REPLACE('Emp2','2','B'))
)AS unpvt;
Replace the execute(#query) with a select #query or print #query to see the query your code generated and troubleshoot the syntax in a query analyzer that way. Then work backwards.
You want to do the REPLACE() at the same level you're building the query. The query that ends up in the #query variable should already have the column names fixed.
Alternately, you could generate #colsFromLabels, #colsToLabels, #colsFrom and #colsTo with the former two have the 'from' and to bits added and the latter two just being column names.
Your desired output is a little gross as far as square bracket escaping, too.
ok, I created a temp table to do some testing on. The solution requires an unpivot first. I recommend running with/without the extra test data to get a sense of some other behaviors that surround this solution -- the weirdness that comes with the MAX aggregation and lack of new rows that you might have expected when changing the value in 'name'.
GL. Hope it helps.
-------------------------
-- Some test data here
CREATE table #addresses ( Id int, Name varchar(5), FromAddress varchar(5), ToAddress varchar(5))
insert into #addresses(id, Name, FromAddress, ToAddress) Values
(1,'Joey', 'ABC', 'JKL')
, (2,'Joey', 'DEF', 'MNO')
, (3,'Joey', 'GHI', 'PQR')
, (4,'Spike', 'XXX', 'YYY')
, (1,'Spike', 'QQQ', 'RRR')
-------------------------
-- Solution starts here. create a temp table and unpivot your data into it.
-- Your initial technique of does not work, PIVOT only supports one aggregation
CREATE table #unpvt(RowColCode varchar(20), vals varchar(20))
Insert into #unpvt
SELECT ColCode + '_' + Cast(ID as varchar(2)) as RowColCode, vals
FROM #addresses a
UNPIVOT
(vals for ColCode in (Name,FromAddress,ToAddress)) c
-------------------------
-- Read the temp table for a column list
declare #ColList nvarchar(max)
set #ColList = STUFF((
SELECT distinct ',[' + t.RowColCode + ']'
FROM #unpvt t
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)'),1,1,'')
-------------------------
-- 're pivot' the data using your new column list
declare #qry varchar(max)
set #qry = '
select *
from
#unpvt
PIVOT(
MAX(vals)
FOR RowColCode in (' +#ColList + ')
) rslt
'
execute(#qry)
I have a number of text files that are in a format similar to what is shown below.
ENTRY,1,000000,Widget 4000,1,,,2,,
FIELD,Type,A
FIELD,Component,Widget 4000
FIELD,Vendor,Acme
ENTRY,2,000000,PRODUCT XYZ,1,,,3,
FIELD,Type,B
FIELD,ItemAssembly,ABCD
FIELD,Component,Product XYZ - 123
FIELD,Description1,Product
FIELD,Description2,XYZ-123
FIELD,Description3,Alternate Part #440
FIELD,Vendor,Contoso
They have been imported into a table with VARCHAR(MAX) as the only field. Each ENTRY is a "new" item, and all the subsequent FIELD rows are properties of that item. The data next to the FIELD is the column name of the property. The data to the right of the property is the data I want to display.
The desired output would be:
ENTRY Type Component Vendor ItemAssembly Description1
1,000000,Widget 4000 A Widget 4000 Acme
2,000000,Product XYZ B Product XYZ-123 Contoso ABCD Product
I've got the column names using the code below (there are several tables that I have UNIONed together to list all the property names).
select #cols =
STUFF (
(select Distinct ', ' + QUOTENAME(ColName) from
(SELECT
SUBSTRING(ltrim(textFileData),CHARINDEX(',', textFileData, 1)+1,CHARINDEX(',', textFileData, CHARINDEX(',', textFileData, 1)+1)- CHARINDEX(',', textFileData, 1)-1) as ColName
FROM [MyDatabase].[dbo].[MyTextFile]
where
(LEFT(textFileData,7) LIKE #c)
UNION
....
) A
FOR XML PATH(''), TYPE).value('.','NVARCHAR(MAX)'),1,1,'')
Is a Pivot table the best way to do this? No aggregation is needed. Is there a better way to accomplish this? I want to list out data next to the FIELD name in a column format.
Thanks!
Here is the solution in SQL fiddle:
http://sqlfiddle.com/#!3/8f0b0/8
Prepare raw data in format (entry, field, value), use dynamic SQL to make pivot on unknown column count.
MAX() for string is enough to simulate "without aggregate" behavior in this case.
create table t(data varchar(max))
insert into t values('ENTRY,1,000000,Widget 4000,1,,,2,,')
insert into t values('FIELD,Type,A')
insert into t values('FIELD,Component,Widget 4000')
insert into t values('FIELD,Vendor,Acme ')
insert into t values('ENTRY,2,000000,PRODUCT XYZ,1,,,3,')
insert into t values('FIELD,Type,B')
insert into t values('FIELD,ItemAssembly,ABCD')
insert into t values('FIELD,Component,Product XYZ - 123')
insert into t values('FIELD,Description1,Product ')
insert into t values('FIELD,Description2,XYZ-123 ')
insert into t values('FIELD,Description3,Alternate Part #440')
insert into t values('FIELD,Vendor,Contoso');
create type preparedtype as table (entry varchar(max), field varchar(max), value varchar(max))
declare #prepared preparedtype
;with identified as
(
select
row_number ( ) over (order by (select 1)) as id,
substring(data, 1, charindex(',', data) - 1) as type,
substring(data, charindex(',', data) + 1, len(data)) as data
from t
)
, tree as
(
select
id,
(select max(id)
from identified
where type = 'ENTRY'
and id <= i.id) as parentid,
type,
data
from identified as i
)
, pivotsrc as
(
select
p.data as entry,
substring(c.data, 1, charindex(',', c.data) - 1) as field,
substring(c.data, charindex(',', c.data) + 1, len(c.data)) as value
from tree as p
inner join tree as c on c.parentid = p.id
where p.id = p.parentid
and c.parentid <> c.id
)
insert into #prepared
select * from pivotsrc
declare #dynamicPivotQuery as nvarchar(max)
declare #columnName as nvarchar(max)
select #columnName = ISNULL(#ColumnName + ',','')
+ QUOTENAME(field)
from (select distinct field from #prepared) AS fields
set #dynamicPivotQuery = N'select * from #prepared
pivot (max(value) for field in (' + #columnName + ')) as result'
exec sp_executesql #DynamicPivotQuery, N'#prepared preparedtype readonly', #prepared
Here your are, this comes back exactly as you need it. I love tricky SQL :-). This is a real ad-hoc singel-statement call.
DECLARE #tbl TABLE(OneCol VARCHAR(MAX));
INSERT INTO #tbl
VALUES('ENTRY,1,000000,Widget 4000,1,,,2,,')
,('FIELD,Type,A')
,('FIELD,Component,Widget 4000')
,('FIELD,Vendor,Acme ')
,('ENTRY,2,000000,PRODUCT XYZ,1,,,3,')
,('FIELD,Type,B')
,('FIELD,ItemAssembly,ABCD')
,('FIELD,Component,Product XYZ - 123')
,('FIELD,Description1,Product ')
,('FIELD,Description2,XYZ-123 ')
,('FIELD,Description3,Alternate Part #440')
,('FIELD,Vendor,Contoso');
WITH OneColumn AS
(
SELECT ROW_NUMBER() OVER(ORDER BY (SELECT 1)) AS inx
,CAST('<root><r>' + REPLACE(OneCol,',','</r><r>') + '</r></root>' AS XML) AS Split
FROM #tbl AS tbl
)
,AsParts AS
(
SELECT inx
,Each.part.value('/root[1]/r[1]','varchar(max)') AS Part1
,Each.part.value('/root[1]/r[2]','varchar(max)') AS Part2
,Each.part.value('/root[1]/r[3]','varchar(max)') AS Part3
,Each.part.value('/root[1]/r[4]','varchar(max)') AS Part4
,Each.part.value('/root[1]/r[5]','varchar(max)') AS Part5
FROM OneColumn
CROSS APPLY Split.nodes('/root') AS Each(part)
)
,TheEntries AS
(
SELECT DISTINCT *
FROM AsParts
WHERE Part1='ENTRY'
)
SELECT TheEntries.Part2 + ',' + TheEntries.Part3 + ',' + TheEntries.Part4 AS [ENTRY]
,MyFields.AsXML.value('(fields[1]/field[Part2="Type"])[1]/Part3[1]','varchar(max)') AS [Type]
,MyFields.AsXML.value('(fields[1]/field[Part2="Component"])[1]/Part3[1]','varchar(max)') AS Component
,MyFields.AsXML.value('(fields[1]/field[Part2="Vendor"])[1]/Part3[1]','varchar(max)') AS Vendor
,MyFields.AsXML.value('(fields[1]/field[Part2="ItemAssembly"])[1]/Part3[1]','varchar(max)') AS ItemAssembly
,MyFields.AsXML.value('(fields[1]/field[Part2="Description1"])[1]/Part3[1]','varchar(max)') AS Description1
FROM TheEntries
CROSS APPLY
(
SELECT *
FROM AsParts AS ap
WHERE ap.Part1='FIELD' AND ap.inx>TheEntries.inx
AND ap.inx < ISNULL((SELECT TOP 1 nextEntry.inx FROM TheEntries AS nextEntry WHERE nextEntry.inx>TheEntries.inx ORDER BY nextEntry.inx DESC),10000000)
ORDER BY ap.inx
FOR XML PATH('field'), ROOT('fields'),TYPE
) AS MyFields(AsXML)
-- Pivot table with one row and four columns
SELECT 'Values' tValues,
ID,Name,ValueID,Value FROM (
Select ID,Name,ValueID,Value FROM Table WHERE OptionID = 1000000
) AS SourceTable
PIVOT (
COUNT(tValues)
FOR tValues IN ( ID,Attribute,ValueID,Value )
) AS PivotTable;
I'm going off the example at Microsoft.com: http://msdn.microsoft.com/en-us/library/ms177410.aspx
But there are a few things about Pivot i don't really understand, so don't be surprised when you see it in the code above, such as COUNT(tValues), I have no idea what this is for, by judging from the example on microsoft, it seems to be always some sort of numeric value, so i figured i'd try it to see if it would return something, but all it returns is an error. Anyhow, if someone out there can share why this query doesn't work, and possibly explain what the numeric value above the FOR is used for?
The Table containts an x amount of rows, with four columns, so it looks like this:
ID | Name | ValueID | Value
100 | Color | 10000 | Black
101 | Size | 10005 | Large
The output should be like this:
Name_100 | Color | Name_101 | Size |
10000 | Black | 10005 | Large |
Something like this maybe.
This will only work if the name column is unique. If not then you might want to append an id on it.
So first some test data:
CREATE TABLE tblValues
(
ID INT,
Name VARCHAR(100),
ValueID INT,
Value VARCHAR(100)
)
INSERT INTO tblValues
VALUES
(100,'Color',10000,'Black'),
(101,'Size',10005,'Large')
Then you need to get the columns to pivot on:
DECLARE #cols VARCHAR(MAX)
;WITH CTE AS
(
SELECT
'Name_'+CAST(tbl.ID AS VARCHAR(100)) AS Name,
'Name_'+CAST(tbl.ID AS VARCHAR(100)) AS Sort,
tbl.ID
FROM
tblValues AS tbl
UNION ALL
SELECT
tbl.Name,
'Value_'+CAST(tbl.ID AS VARCHAR(100)) AS Sort,
tbl.ID
FROM
tblValues AS tbl
)
SELECT
#cols = COALESCE(#cols + ','+QUOTENAME(Name),
QUOTENAME(Name))
FROM
CTE
ORDER BY
CTE.ID,
CTE.Sort
Then declaring and executing the dynamic sql like this:
DECLARE #query NVARCHAR(4000)=
N'SELECT
*
FROM
(
SELECT
''Name_''+CAST(tbl.ID AS VARCHAR(100)) AS pivotName,
CAST(tbl.ValueID AS VARCHAR(100)) AS name
FROM
tblValues AS tbl
UNION ALL
SELECT
tbl.Name AS pivotName,
tbl.Value AS name
FROM
tblValues AS tbl
) AS p
PIVOT
(
MAX(name)
FOR pivotName IN ('+#cols+')
) AS pvt'
EXECUTE(#query)
Then in my case I will drop the table I have created
DROP TABLE tblValues
Edit
Or in you case it should be something like this:
First the columns:
DECLARE #cols VARCHAR(MAX)
;WITH CTE AS
(
SELECT
'Name_'+CAST(tbl.ID AS VARCHAR(100)) AS Name,
'Name_'+CAST(tbl.ID AS VARCHAR(100)) AS Sort,
tbl.ID
FROM
[Table] AS tbl
WHERE
tbl.OptionID = 1000000
UNION ALL
SELECT
tbl.Name,
'Value_'+CAST(tbl.ID AS VARCHAR(100)) AS Sort,
tbl.ID
FROM
[Table] AS tbl
WHERE
tbl.OptionID = 1000000
)
SELECT
#cols = COALESCE(#cols + ','+QUOTENAME(Name),
QUOTENAME(Name))
FROM
CTE
ORDER BY
CTE.ID,
CTE.Sort
Then the dynamic sql.
DECLARE #query NVARCHAR(4000)=
N'SELECT
*
FROM
(
SELECT
''Name_''+CAST(tbl.ID AS VARCHAR(100)) AS pivotName,
CAST(tbl.ValueID AS VARCHAR(100)) AS name
FROM
[Table] AS tbl
WHERE
tbl.OptionID = 1000000
UNION ALL
SELECT
tbl.Name AS pivotName,
tbl.Value AS name
FROM
[Table] AS tbl
WHERE
tbl.OptionID = 1000000
) AS p
PIVOT
(
MAX(name)
FOR pivotName IN ('+#cols+')
) AS pvt'
EXECUTE(#query)
You do not need to create the table or drop the table. That was just because I did not have your table in my database and that if someone else want's to run the example.
If you want to use Pivot tables with a variable number of columns, then I'd suggest using something along the lines of;
DECLARE #cols VARCHAR(4000)
DECLARE #query VARCHAR(8000)
SELECT #cols = STUFF(( SELECT DISTINCT
'],[' + Name
FROM Table
ORDER BY '],[' + Name
FOR XML PATH('')
), 1, 2, '') + ']'
SET #query =
'SELECT * FROM
(
SELECT col1, col2, col3, whateverColYourInterestedIn, Name, Value
FROM Table
)t
PIVOT (MAX(Value) FOR Name
IN ('+#cols+')) AS pvt'
EXECUTE (#query)
That is probably not quite right, but it should hopefully be a starting point for you.
For more info, check out links such as this or this.
I have a table with data as given below:
DATE Price
---------- ------
31/12/2009 10
31/12/2009 11
31/12/2009 12
30/12/2009 20
30/12/2009 21
30/12/2009 22
29/12/2009 30
29/12/2009 32
29/12/2009 31
I want to convert this data as given below:
31/12/2009 30/12/2009 29/12/2009
---------- ---------- ----------
10 10 10
11 11 11
12 12 12
But the values in the date column is dynamic. So, I dont know how to use this using SQL Server Pivot.
Could you please let me know how to get this data.
Given below is the script to replicate this scenario:
CREATE TABLE TEMP(EffectiveDate DATETIME,Price INT)
INSERT INTO TEMP(EffectiveDate,Price)
SELECT GETDATE(),10
UNION ALL
SELECT GETDATE(),11
UNION ALL
SELECT GETDATE(),12
UNION ALL
SELECT GETDATE()-1,20
UNION ALL
SELECT GETDATE()-1,21
UNION ALL
SELECT GETDATE()-1,22
UNION ALL
SELECT GETDATE()-2,30
UNION ALL
SELECT GETDATE()-2,32
UNION ALL
SELECT GETDATE()-2,31
SELECT CONVERT(VARCHAR,EffectiveDATE,103) AS 'DATE',Price FROM Temp
Thanks in advance,
Mahesh
OK, as I mentioned, your data does not make sense, but maybe this can help.
The only way to create a dynamic pivot, is by creating dynamic sql.
Also, PIVOT requires that you use an Aggregate function (SUM, AVG, COUNT).
Ok, let see if this can help you.
CREATE TABLE #TEMP (EffectiveDate DATETIME,Price INT)
INSERT INTO #TEMP(EffectiveDate,Price)
SELECT GETDATE(),10
UNION ALL
SELECT GETDATE(),11
UNION ALL
SELECT GETDATE(),12
UNION ALL
SELECT GETDATE()-1,20
UNION ALL
SELECT GETDATE()-1,21
UNION ALL
SELECT GETDATE()-1,22
UNION ALL
SELECT GETDATE()-2,30
UNION ALL
SELECT GETDATE()-2,32
UNION ALL
SELECT GETDATE()-2,31
DECLARE #Cols VARCHAR(MAX)
SELECT #cols = COALESCE(#cols + ',[' + colName + ']',
'[' + colName + ']')
FROM (
SELECT DISTINCT
CONVERT(VARCHAR,EffectiveDATE,103) colName
FROM #TEMP
) s
ORDER BY colName DESC
DECLARE #query VARCHAR(MAX)
SET #query = N'SELECT *
FROM
(SELECT CONVERT(VARCHAR,EffectiveDATE,103) AS ''DATE'',Price
FROM #TEMP) p
PIVOT
(
SUM(Price) FOR DATE IN
( '+
#cols +' )
) AS pvt'
EXECUTE(#query)
DROP TABLE #TEMP