SQL Server: combining multiple rows into one row - sql-server

I have a SQL query like this;
SELECT *
FROM Jira.customfieldvalue
WHERE CUSTOMFIELD = 12534
AND ISSUE = 19602
And that's the results;
What I want is; showing in one row (cell) combined all STRINGVALUE's and they are separated with a comma. Like this;
SELECT --some process with STRINGVALUE--
FROM Jira.customfieldvalue
WHERE CUSTOMFIELD = 12534
AND ISSUE = 19602
Araç Listesi (C2, K1 vb.Belgeler; yoksa Ruhsat Fotokopileri), Min. 5
araç plakası için İnternet Sorgusu, Son 3 Yıla Ait Onaylı Yıl Sonu
Bilanço + Gelir Tablosu, Son Yıl (Yıl Sonuna ait) Detay Mizanı, İçinde
Bulunduğumuz Yıla ait Ara Dönem Geçici Vergi Beyannamesi, Bayi Yorum
E-Maili, Proforma Fatura
How can I do that?

There are several methods.
If you want just the consolidated string value returned, this is a good quick and easy approach
DECLARE #combinedString VARCHAR(MAX)
SELECT #combinedString = COALESCE(#combinedString + ', ', '') + stringvalue
FROM jira.customfieldValue
WHERE customfield = 12534
AND ISSUE = 19602
SELECT #combinedString as StringValue
Which will return your combined string.
You can also try one of the XML methods e.g.
SELECT DISTINCT Issue, Customfield, StringValues
FROM Jira.customfieldvalue v1
CROSS APPLY ( SELECT StringValues + ','
FROM jira.customfieldvalue v2
WHERE v2.Customfield = v1.Customfield
AND v2.Issue = v1.issue
ORDER BY ID
FOR XML PATH('') ) D ( StringValues )
WHERE customfield = 12534
AND ISSUE = 19602

You can achieve this is to combine For XML Path and STUFF as follows:
SELECT (STUFF((
SELECT ', ' + StringValue
FROM Jira.customfieldvalue
WHERE CUSTOMFIELD = 12534
AND ISSUE = 19602
FOR XML PATH('')
), 1, 2, '')
) AS StringValue

This is an old question, but as of the release of Microsoft SQL Server 2017 you can now use the STRING_AGG() function which is much like the GROUP_CONCAT function in MySQL.
STRING_AGG (Transact-SQL) Documentation
Example
USE AdventureWorks2016
GO
SELECT STRING_AGG (CONVERT(NVARCHAR(max),FirstName), ',') AS csv
FROM Person.Person;
Returns
Syed,Catherine,Kim,Kim,Kim,Hazem

There's a convenient method for this in MySql called GROUP_CONCAT. An equivalent for SQL Server doesn't exist, but you can write your own using the SQLCLR. Luckily someone already did that for you.
Your query then turns into this (which btw is a much nicer syntax):
SELECT CUSTOMFIELD, ISSUE, dbo.GROUP_CONCAT(STRINGVALUE)
FROM Jira.customfieldvalue
WHERE CUSTOMFIELD = 12534 AND ISSUE = 19602
GROUP BY CUSTOMFIELD, ISSUE
But please note that this method is good for at the most 100 rows within a group. Beyond that, you'll have major performance problems. SQLCLR aggregates have to serialize any intermediate results and that quickly piles up to quite a lot of work. Keep this in mind!
Interestingly the FOR XML doesn't suffer from the same problem but instead uses that horrendous syntax.

I believe for databases which support listagg function, you can do:
select id, issue, customfield, parentkey, listagg(stingvalue, ',') within group (order by id)
from jira.customfieldvalue
where customfield = 12534 and issue = 19602
group by id, issue, customfield, parentkey

Using MySQL inbuilt function group_concat() will be a good choice for getting the desired result. The syntax will be -
SELECT group_concat(STRINGVALUE)
FROM Jira.customfieldvalue
WHERE CUSTOMFIELD = 12534
AND ISSUE = 19602
Before you execute the above command make sure you increase the size of group_concat_max_len else the the whole output may not fit in that cell.
To set the value of group_concat_max_len, execute the below command-
SET group_concat_max_len = 50000;
You can change the value 50000 accordingly, you increase it to a higher value as required.

CREATE VIEW [dbo].[ret_vwSalariedForReport]
AS
WITH temp1 AS (SELECT
salaried.*,
operationalUnits.Title as OperationalUnitTitle
FROM
ret_vwSalaried salaried LEFT JOIN
prs_operationalUnitFeatures operationalUnitFeatures on salaried.[Guid] = operationalUnitFeatures.[FeatureGuid] LEFT JOIN
prs_operationalUnits operationalUnits ON operationalUnits.id = operationalUnitFeatures.OperationalUnitID
),
temp2 AS (SELECT
t2.*,
STUFF ((SELECT ' - ' + t1.OperationalUnitTitle
FROM
temp1 t1
WHERE t1.[ID] = t2.[ID]
For XML PATH('')), 2, 2, '') OperationalUnitTitles from temp1 t2)
SELECT
[Guid],
ID,
Title,
PersonnelNo,
FirstName,
LastName,
FullName,
Active,
SSN,
DeathDate,
SalariedType,
OperationalUnitTitles
FROM
temp2
GROUP BY
[Guid],
ID,
Title,
PersonnelNo,
FirstName,
LastName,
FullName,
Active,
SSN,
DeathDate,
SalariedType,
OperationalUnitTitles

declare #maxColumnCount int=0;
declare #Query varchar(max)='';
declare #DynamicColumnName nvarchar(MAX)='';
-- table type variable that store all values of column row no
DECLARE #TotalRows TABLE( row_count int)
INSERT INTO #TotalRows (row_count)
SELECT (ROW_NUMBER() OVER(PARTITION BY InvoiceNo order by InvoiceNo Desc)) as row_no FROM tblExportPartProforma
-- Get the MAX value from #TotalRows table
set #maxColumnCount= (select max(row_count) from #TotalRows)
-- loop to create Dynamic max/case and store it into local variable
DECLARE #cnt INT = 1;
WHILE #cnt <= #maxColumnCount
BEGIN
set #DynamicColumnName= #DynamicColumnName + ', Max(case when row_no= '+cast(#cnt as varchar)+' then InvoiceType end )as InvoiceType'+cast(#cnt as varchar)+''
set #DynamicColumnName= #DynamicColumnName + ', Max(case when row_no= '+cast(#cnt as varchar)+' then BankRefno end )as BankRefno'+cast(#cnt as varchar)+''
set #DynamicColumnName= #DynamicColumnName + ', Max(case when row_no= '+cast(#cnt as varchar)+' then AmountReceived end )as AmountReceived'+cast(#cnt as varchar)+''
set #DynamicColumnName= #DynamicColumnName + ', Max(case when row_no= '+cast(#cnt as varchar)+' then AmountReceivedDate end )as AmountReceivedDate'+cast(#cnt as varchar)+''
SET #cnt = #cnt + 1;
END;
-- Create dynamic CTE and store it into local variable #query
set #Query='
with CTE_tbl as
(
SELECT InvoiceNo,InvoiceType,BankRefno,AmountReceived,AmountReceivedDate,
ROW_NUMBER() OVER(PARTITION BY InvoiceNo order by InvoiceNo Desc) as row_no
FROM tblExportPartProforma
)
select
InvoiceNo
'+#DynamicColumnName+'
FROM CTE_tbl
group By InvoiceNo'
-- Execute the Query
execute (#Query)

Related

Is it always possible to transform multiple spatial selects with while loop and variables into a single query without using temp tables in sql?

This problem can be solved with temp table, however, I don't want to use Temp table or var table, this question is mostly for my personal educational purposes.
I inherited the following SQL:
DECLARE #i int = 993
while #i <=1000
begin
declare #lat nvarchar(20)
select top 1 #lat = SUBSTRING(Address,0,CHARINDEX(',',Address,0)) from dbo.rent
where id = #i;
declare #lon nvarchar(20)
select top 1 #lon = SUBSTRING(Address,CHARINDEX(',',Address)+1,LEN(Address)) from dbo.rent
where id = #i
declare #p GEOGRAPHY = GEOGRAPHY::STGeomFromText('POINT('+ #lat +' '+#lon+')', 4326)
select price/LivingArea sq_m, (price/LivingArea)/avg_sq_m, * from
(select (sum(price)/sum(LivingArea)) avg_sq_m, count(1) cnt, #i id from
(select *, GEOGRAPHY::STGeomFromText('POINT('+
convert(nvarchar(20), SUBSTRING(Address,0,CHARINDEX(',',Address,0)))+' '+
convert( nvarchar(20), SUBSTRING(Address,CHARINDEX(',',Address)+1,LEN(Address)))+')', 4326)
.STBuffer(500).STIntersects(#p) as [Intersects]
from dbo.rent
where Address is not null
) s
where [Intersects] = 1) prox
inner join dbo.rent r on prox.id = r.id
set #i = #i+1
end
it is used to analyze property prices per square meter that are in proximity and compare them to see which ones are cheaper...
Problem: a mechanism for calling has to be moved from C# to SQL and all queries have to be combined into a single result (now you get one row per one while run), i.e #i and #p has to go and become while id < x and id > y or somehow magically joined,
the procedure is a cut down version of actual thing but having a solution to the above I will have no problem making the whole thing work...
I am of the opinion that any SQL mechanism with variables and loops can be transformed to a single SQL statement, hence the question.
SqlFiddle
If I understand your question properly (Remove the need for loops and return one data set) then you can use CTE (Common Table Expressions) for the Lats, Lons and Geog variables.
You;re SQLFIddle was referencing a database called "webanalyser" so I removed that from the query below
However, the query will not return anything as the sample data has wrong data for address column.
;WITH cteLatsLongs
AS(
SELECT
lat = SUBSTRING(Address, 0, CHARINDEX(',', Address, 0))
,lon = SUBSTRING(Address, CHARINDEX(',', Address) + 1, LEN(Address))
FROM dbo.rent
)
,cteGeogs
AS(
SELECT
Geog = GEOGRAPHY ::STGeomFromText('POINT(' + LL.lat + ' ' + LL.lon + ')', 4326)
FROM cteLatsLongs LL
),cteIntersects
AS(
SELECT *,
GEOGRAPHY::STGeomFromText('POINT(' + CONVERT(NVARCHAR(20), SUBSTRING(Address, 0, CHARINDEX(',', Address, 0))) + ' ' + CONVERT(NVARCHAR(20), SUBSTRING(Address, CHARINDEX(',', Address) + 1, LEN(Address))) + ')', 4326).STBuffer(500).STIntersects(G.Geog) AS [Intersects]
FROM dbo.rent
CROSS APPLY cteGeogs G
)
SELECT avg_sq_m = (SUM(price) / SUM(LivingArea)), COUNT(1) cnt
FROM
cteIntersects I
WHERE I.[Intersects] = 1
It can be done, in this specific case 'discovery' that was necessary was the ability to perform JOINs on Point e.g ability to join tables on proximity (another a small cheat was to aggregate point-strings to actual points, but it's just an optimization). Once this is done, a query could be rewritten as follows:
SELECT adds.Url,
adds.Price/adds.LivingArea Sqm,
(adds.Price/adds.LivingArea)/k1.sale1Avg ratio,
*
FROM
(SELECT baseid,
count(k1Rent.rentid) rent1kCount,
sum(k1Rent.RperSqM)/(count(k1Rent.rentid)) AS rent1kAvgSqM,
count(around1k.SaleId) sale1kCount,
(sum(k1sale.price)/sum(k1Sale.LivingArea)) sale1Avg,
(sum(k1sale.price)/sum(k1Sale.LivingArea))/((sum(k1Rent.RperSqM)/(count(k1Rent.rentid)))*12) years --*
FROM
(SELECT sa.id baseid,
s.id saleid,
s.RoomCount,
POINT
FROM SpatialAnalysis sa
INNER JOIN Sale s ON s.Id = SaleId
WHERE sa.SalesIn1kRadiusCount IS NULL) AS base
JOIN SpatialAnalysis around1k ON base.Point.STBuffer(1000).STIntersects(around1k.Point) = 1
LEFT OUTER JOIN
(SELECT id rentid,
rc,
Price/avgRoomSize RperSqM
FROM
(SELECT *
FROM
(SELECT rc,
sum(avgArea*c)/sum(c) avgRoomSize
FROM
(SELECT roomcount rc,
avg(livingarea) avgArea,
count(1) c
FROM Rent
WHERE url LIKE '%systemname%'
AND LivingArea IS NOT NULL
GROUP BY RoomCount
UNION
(SELECT roomcount rc,
avg(livingarea) avgArea,
count(1) c
FROM sale
WHERE url LIKE '%systemname%'
AND LivingArea IS NOT NULL
GROUP BY RoomCount))uni
GROUP BY rc) avgRoom) avgrents
JOIN rent r ON r.RoomCount = avgrents.rc) k1Rent ON k1Rent.rentid =around1k.RentId
AND base.RoomCount = k1Rent.rc
LEFT OUTER JOIN Sale k1Sale ON k1Sale.Id = around1k.SaleId
AND base.RoomCount = k1Sale.RoomCount
GROUP BY baseid) k1
left outer join SpatialAnalysis sp on sp.Id = baseid
left outer join Sale adds on adds.Id = sp.SaleId
where adds.Price < 250000
order by years, ratio

return value at a position from STRING_SPLIT in SQL Server 2016

Can I return a value at a particular position with the STRING_SPLIT function in SQL Server 2016 or higher?
I know the order from a select is not guaranteed, but is it with STRING_SPLIT?
DROP TABLE IF EXISTS #split
SELECT 'z_y_x' AS splitIt
INTO #split UNION
SELECT 'a_b_c'
SELECT * FROM #split;
WITH cte
AS (
SELECT ROW_NUMBER() OVER ( PARTITION BY s.splitIt ORDER BY s.splitIt ) AS position,
s.splitIt,
value
FROM #split s
CROSS APPLY STRING_SPLIT(s.splitIt, '_')
)
SELECT * FROM cte WHERE position = 2
Will this always return the value at the 2nd element? b for a_b_c and y for z_y_x?
I don't understand why Microsoft doesn't return a position indicator column alongside the value for this function.
There is - starting with v2016 - a solution via FROM OPENJSON():
DECLARE #str VARCHAR(100) = 'val1,val2,val3';
SELECT *
FROM OPENJSON('["' + REPLACE(#str,',','","') + '"]');
The result
key value type
0 val1 1
1 val2 1
2 val3 1
The documentation tells clearly:
When OPENJSON parses a JSON array, the function returns the indexes of the elements in the JSON text as keys.
For your case this was:
SELECT 'z_y_x' AS splitIt
INTO #split UNION
SELECT 'a_b_c'
DECLARE #delimiter CHAR(1)='_';
SELECT *
FROM #split
CROSS APPLY OPENJSON('["' + REPLACE(splitIt,#delimiter,'","') + '"]') s
WHERE s.[key]=1; --zero based
Let's hope, that future versions of STRING_SPLIT() will include this information
UPDATE Performance tests, compare with popular Jeff-Moden-splitter
Try this out:
USE master;
GO
CREATE DATABASE dbTest;
GO
USE dbTest;
GO
--Jeff Moden's splitter
CREATE FUNCTION [dbo].[DelimitedSplit8K](#pString VARCHAR(8000), #pDelimiter CHAR(1))
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (
SELECT TOP (ISNULL(DATALENGTH(#pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(#pString,t.N,1) = #pDelimiter
),
cteLen(N1,L1) AS(
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(#pDelimiter,#pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(#pString, l.N1, l.L1)
FROM cteLen l
;
GO
--Avoid first call bias
SELECT * FROM dbo.DelimitedSplit8K('a,b,c',',');
GO
--Table to keep the results
CREATE TABLE Results(ID INT IDENTITY,ResultSource VARCHAR(100),durationMS INT, RowsCount INT);
GO
--Table with strings to split
CREATE TABLE dbo.DelimitedItems(ID INT IDENTITY,DelimitedNString nvarchar(4000),DelimitedString varchar(8000));
GO
--Get rows wiht randomly mixed strings of 100 items
--Try to play with the count of rows (count behind GO) and the count with TOP
INSERT INTO DelimitedItems(DelimitedNString)
SELECT STUFF((
SELECT TOP 100 ','+REPLACE(v.[name],',',';')
FROM master..spt_values v
WHERE LEN(v.[name])>0
ORDER BY NewID()
FOR XML PATH('')),1,1,'')
--Keep it twice in varchar and nvarchar
UPDATE DelimitedItems SET DelimitedString=DelimitedNString;
GO 500 --create 500 differently mixed rows
--The tests
DECLARE #d DATETIME2;
SET #d = SYSUTCDATETIME();
SELECT DI.ID, DS.Item, DS.ItemNumber
INTO #TEMP
FROM dbo.DelimitedItems DI
CROSS APPLY dbo.DelimitedSplit8K(DI.DelimitedNString,',') DS;
INSERT INTO Results(ResultSource,RowsCount,durationMS)
SELECT 'delimited8K with NVARCHAR(4000)'
,(SELECT COUNT(*) FROM #TEMP) AS RowCountInTemp
,DATEDIFF(MILLISECOND,#d,SYSUTCDATETIME()) AS Duration_NV_ms_delimitedSplit8K
SET #d = SYSUTCDATETIME();
SELECT DI.ID, DS.Item, DS.ItemNumber
INTO #TEMP2
FROM dbo.DelimitedItems DI
CROSS APPLY dbo.DelimitedSplit8K(DI.DelimitedString,',') DS;
INSERT INTO Results(ResultSource,RowsCount,durationMS)
SELECT 'delimited8K with VARCHAR(8000)'
,(SELECT COUNT(*) FROM #TEMP2) AS RowCountInTemp
,DATEDIFF(MILLISECOND,#d,SYSUTCDATETIME()) AS Duration_V_ms_delimitedSplit8K
SET #d = SYSUTCDATETIME();
SELECT DI.ID, OJ.[Value] AS Item, OJ.[Key] AS ItemNumber
INTO #TEMP3
FROM dbo.DelimitedItems DI
CROSS APPLY OPENJSON('["' + REPLACE(DI.DelimitedNString,',','","') + '"]') OJ;
INSERT INTO Results(ResultSource,RowsCount,durationMS)
SELECT 'OPENJSON with NVARCHAR(4000)'
,(SELECT COUNT(*) FROM #TEMP3) AS RowCountInTemp
,DATEDIFF(MILLISECOND,#d,SYSUTCDATETIME()) AS Duration_NV_ms_OPENJSON
SET #d = SYSUTCDATETIME();
SELECT DI.ID, OJ.[Value] AS Item, OJ.[Key] AS ItemNumber
INTO #TEMP4
FROM dbo.DelimitedItems DI
CROSS APPLY OPENJSON('["' + REPLACE(DI.DelimitedString,',','","') + '"]') OJ;
INSERT INTO Results(ResultSource,RowsCount,durationMS)
SELECT 'OPENJSON with VARCHAR(8000)'
,(SELECT COUNT(*) FROM #TEMP4) AS RowCountInTemp
,DATEDIFF(MILLISECOND,#d,SYSUTCDATETIME()) AS Duration_V_ms_OPENJSON
GO
SELECT * FROM Results;
GO
--Clean up
DROP TABLE #TEMP;
DROP TABLE #TEMP2;
DROP TABLE #TEMP3;
DROP TABLE #TEMP4;
USE master;
GO
DROP DATABASE dbTest;
Results:
200 items in 500 rows
1220 delimited8K with NVARCHAR(4000)
274 delimited8K with VARCHAR(8000)
417 OPENJSON with NVARCHAR(4000)
443 OPENJSON with VARCHAR(8000)
100 items in 500 rows
421 delimited8K with NVARCHAR(4000)
140 delimited8K with VARCHAR(8000)
213 OPENJSON with NVARCHAR(4000)
212 OPENJSON with VARCHAR(8000)
100 items in 5 rows
10 delimited8K with NVARCHAR(4000)
5 delimited8K with VARCHAR(8000)
3 OPENJSON with NVARCHAR(4000)
4 OPENJSON with VARCHAR(8000)
5 items in 500 rows
32 delimited8K with NVARCHAR(4000)
30 delimited8K with VARCHAR(8000)
28 OPENJSON with NVARCHAR(4000)
24 OPENJSON with VARCHAR(8000)
--unlimited length (only possible with OPENJSON)
--Wihtout a TOP clause while filling
--results in about 500 items in 500 rows
1329 OPENJSON with NVARCHAR(4000)
1117 OPENJSON with VARCHAR(8000)
Facit:
the popular splitter function does not like NVARCHAR
the function is limited to strings within 8k byte volumen
Only the case with many items and many rows in VARCHAR lets the splitter function be ahead.
In all other cases OPENJSON seems to be more or less faster...
OPENJSON can deal with (almost) unlimited counts
OPENJSON demands for v2016
Everybody is waiting for STRING_SPLIT with the position
UPDATE Added STRING_SPLIT to the test
In the meanwhile I re-run the test with two more test sections using STRING_SPLIT(). As position I had to return a hardcoded value as this function does not return the part's index.
In all tested cases OPENJSON was close with STRING_SPLIT and often faster:
5 items in 1000 rows
250 delimited8K with NVARCHAR(4000)
124 delimited8K with VARCHAR(8000) --this function is best with many rows in VARCHAR
203 OPENJSON with NVARCHAR(4000)
204 OPENJSON with VARCHAR(8000)
235 STRING_SPLIT with NVARCHAR(4000)
234 STRING_SPLIT with VARCHAR(8000)
200 items in 30 rows
140 delimited8K with NVARCHAR(4000)
31 delimited8K with VARCHAR(8000)
47 OPENJSON with NVARCHAR(4000)
31 OPENJSON with VARCHAR(8000)
47 STRING_SPLIT with NVARCHAR(4000)
31 STRING_SPLIT with VARCHAR(8000)
100 items in 10.000 rows
8145 delimited8K with NVARCHAR(4000)
2806 delimited8K with VARCHAR(8000) --fast with many rows!
5112 OPENJSON with NVARCHAR(4000)
4501 OPENJSON with VARCHAR(8000)
5028 STRING_SPLIT with NVARCHAR(4000)
5126 STRING_SPLIT with VARCHAR(8000)
The simple answer is, no. Microsoft so far have refused to provide Ordinal position as part of the return dataset in STRING_SPLIT. You'll need to use a different solution I'm afraid. For example Jeff Moden's DelimitedSplit8k.
(Yes, I realise this is more or less a link only answer, however, pasting Jeff's solution here would effectively be plagiarism).
If you were to use Jeff's solution, then you would be able to do something like:
SELECT *
FROM dbo.DelimitedSplit8K('a,b,c,d,e,f,g,h,i,j,k',',') DS
WHERE ItemNumber = 2;
Of course, you'd likely be passing column rather than a literal string.
I just extended #Shnugo's answer if the splitted text would contain line breaks, unicode and other non json compatible characters, to use
STRING_ESCAPE
My Test code with pipe as separator instead comma:
DECLARE #Separator VARCHAR(5) = STRING_ESCAPE('|', 'json'); -- here pipe or use any other separator (even ones escaped by json)
DECLARE #LongText VARCHAR(MAX) = 'Albert says: "baby, listen!"|ve Çağrı söylüyor: "Elma"|1st Line' + CHAR(13) + CHAR(10) + '2nd line';
SELECT * FROM OPENJSON('["' + REPLACE(STRING_ESCAPE(#LongText, 'json'), #Separator ,'","') + '"]'); -- ok
-- SELECT * FROM OPENJSON('["' + REPLACE(#LongText, #Separator ,'","') + '"]'); -- fails with: JSON text is not properly formatted. ...
Updated due to comment from Simon Zeinstra
I didn't want to deal with OPENJSON, but still wanted to get string_split() value by index.
The performance was not an issue in my case.
I used CTE (Common Table Expression)
Assume you have string str = "part1 part2 part3".
WITH split_res_list as
(
SELECT value FROM STRING_SPLIT('part1 part2 part3', ' ')
),
split_res_list_with_index as
(
SELECT [value],
ROW_NUMBER() OVER (ORDER BY [value] ASC) as [RowNumber]
FROM split_res_list
)
SELECT * FROM split_res_list_with_index WHERE RowNumber = 2
BUT: please be aware that the order of 3 parts is changed according to ORDER BY condition!
The output for the second row with "part2" value:
Using STRING_SPLIT:
STRING_SPLIT ( string , separator [ , enable_ordinal ] )
enable_ordinal
An int or bit expression that serves as a flag to enable or disable the ordinal output column. A value of 1 enables the ordinal column. If enable_ordinal is omitted, NULL, or has a value of 0, the ordinal column is disabled.
The enable_ordinal argument and ordinal output column are currently only supported in Azure SQL Database, Azure SQL Managed Instance, and Azure Synapse Analytics (serverless SQL pool only).
Query:
SELECT value FROM STRING_SPLIT('part1_part2_part3', '_', 1) WHERE ordinal = 2;
Here is my workaround. I will follow the Question waiting for a better answer:
UPDATED: Original code did not take into consideration if a word contains another.
UPDATE 2: Performance was horrible in production so i have to think another way. you have it at the end as option 2, implementation for table.
UPDATE 3: Added code for UDF in the implementation in a string.
Implementation in a string:
declare #a as nvarchar(100) = 'Lorem ipsum dolor dol ol sit amet. D Lorem DO ipsum DOL dolor sit amet. DOLORES ipsum';
WITH T AS (
SELECT T1.value
,charindex(' ' + T1.value + ' ',' ' + #a + ' ' ,0) AS INDX
,RN = ROW_NUMBER() OVER (PARTITION BY value order BY value)
FROM STRING_SPLIT(#a, ' ') AS T1
WHERE T1.value <> ''
),
R (VALUE,INDX,RN) AS (
SELECT *
FROM T
WHERE T.RN = 1
UNION ALL
SELECT T.VALUE
,charindex(' ' + T.value + ' ',' ' + #a + ' ',R.INDX + 1) AS INDX
,T.RN
FROM T
JOIN R
ON T.value = R.VALUE
AND T.RN = R.RN + 1
)
SELECT * FROM R ORDER BY INDX
result:
tableOfResults
UDF:
CREATE FUNCTION DBO.UDF_get_word(#string nvarchar(100),#wordNumber int)
returns nvarchar(100)
AS
BEGIN
DECLARE #searchedWord nvarchar(100);
WITH T AS (
SELECT T1.value
,charindex(' ' + T1.value + ' ',' ' + #string + ' ' ,0) AS INDX
,RN = ROW_NUMBER() OVER (PARTITION BY value order BY value)
FROM STRING_SPLIT(#string, ' ') AS T1
WHERE T1.value <> ''
),
R (VALUE,INDX,RN) AS (
SELECT *
FROM T
WHERE T.RN = 1
UNION ALL
SELECT T.VALUE
,charindex(' ' + T.value + ' ',' ' + #string + ' ',R.INDX + 1) AS INDX
,T.RN
FROM T
JOIN R
ON T.value = R.VALUE
AND T.RN = R.RN + 1
)
SELECT #searchedWord = (value) FROM ( SELECT *, ORD = ROW_NUMBER() OVER (ORDER BY INDX) FROM R )AS TBL WHERE ORD = #wordNumber
RETURN #searchedword
END
GO
Modification for a column in a table, OPTION 1:
WITH T AS (
SELECT T1.stringToBeSplit
,T1.column1 --column1 is an example of column where stringToBeSplit is the same for more than one record. better to be avoid but if you need to added here it is how just follow column1 over the code
,T1.column2
,T1.value
,T1.column3
/*,...any other column*/
,charindex(' ' + T1.value + ' ',' ' + T1.stringToBeSplit + ' ' ,0) AS INDX
,RN = ROW_NUMBER() OVER (PARTITION BY t1.column1, T1.stringToBeSplit, T1.value order BY T1.column1, T1.T1.stringToBeSplit, T1.value) --any column that create duplicates need to be added here as example i added column1
FROM (SELECT TOP 10 * FROM YourTable D CROSS APPLY string_split(D.stringToBeSplit,' ')) AS T1
WHERE T1.value <> ''
),
R (stringToBeSplit, column1, column2, value, column3, INDX, RN) AS (
SELECT stringToBeSplit, column1, column2, value, column3, INDX, RN
FROM T
WHERE T.RN = 1
UNION ALL
SELECT T.stringToBeSplit, T.column1, column2, T.value, T.column3
,charindex(' ' + T.value + ' ',' ' + T.stringToBeSplit + ' ',R.INDX + 1) AS INDX
,T.RN
FROM T
JOIN R
ON T.value = R.VALUE AND T.COLUMN1 = R.COLUMN1 --any column that create duplicates need to be added here as exapmle i added column1
AND T.RN = R.RN + 1
)
SELECT * FROM R ORDER BY column1, stringToBeSplit, INDX
Modification for a column in a table, OPTION 2 (max performance i could get, main action came from removing the join and finding a way of properly execute (and stop) the recursive loop of the CTE, from 1.30 for 1000 lines to 2 sec for 30K lines of strings of similar type and length):
WITH T AS (
SELECT T1.stringToBeSplit --no extracolumns this time
,T1.value
,charindex(' ' + T1.value + ' ',' ' + T1.stringToBeSplit + ' ' ,0) AS INDX
,RN = ROW_NUMBER() OVER (PARTITION BY T1.stringToBeSplit,T1.value order BY T1.stringToBeSplit,T1.value) --from clause use distinct and where if possible
FROM (SELECT DISTINCT stringToBeSplit, VALUE FROM [your table] D CROSS APPLY string_split(D.stringToBeSplit,' ') WHERE [your filter]) AS T1
WHERE T1.value <> ''
),
R (stringToBeSplit, value, INDX, RN) AS (
SELECT stringToBeSplit, value, INDX, RN
FROM T
WHERE T.RN = 1
UNION ALL
SELECT R.stringToBeSplit, R.value
,charindex(' ' + R.value + ' ',' ' + R.stringToBeSplit + ' ',R.INDX + 1) AS INDX
,R.RN + 1
FROM R
WHERE charindex(' ' + R.value + ' ',' ' + R.stringToBeSplit + ' ',R.INDX + 1) <> 0
)
SELECT * FROM R ORDER BY stringToBeSplit, INDX
For getting the word ordinal instead of SELECT * FROM R USE:
SELECT stringToBeSplit ,value , ROW_NUMBER() OVER (PARTITION BY stringToBeSplit order BY [indX]) AS ORD FROM R
if instead of having one RW per word you prefer one column:
select * FROM (SELECT [name 1],value , ROW_NUMBER() OVER (PARTITION BY [name 1] order BY [indX]) AS ORD FROM R ) as R2
pivot (MAX(VALUE) FOR ORD in ([1],[2],[3]) ) AS PIV
if you don't want to specify the number of columns QUOTNAME() like in this link, in my case i only need first 4 words rest are irrelevant for the moment. Below the code from the page in case link fail:
DECLARE
#columns NVARCHAR(MAX) = '',
#sql NVARCHAR(MAX) = '';
-- select the category names
SELECT
#columns+=QUOTENAME(category_name) + ','
FROM
production.categories
ORDER BY
category_name;
-- remove the last comma
SET #columns = LEFT(#columns, LEN(#columns) - 1);
-- construct dynamic SQL
SET #sql ='
SELECT * FROM
(
SELECT
category_name,
model_year,
product_id
FROM
production.products p
INNER JOIN production.categories c
ON c.category_id = p.category_id
) t
PIVOT(
COUNT(product_id)
FOR category_name IN ('+ #columns +')
) AS pivot_table;';
-- execute the dynamic SQL
EXECUTE sp_executesql #sql;
Last but not least i'm really looking forward to know if there is an easier way with same performance either in SQL server or in C#. i just think everything that does not use external info should stay in the Server and run as query or batch but not sure to be honest as i heard the contrary (specially from people that use panda) but no one have convince me just yet.
This works
Example:
String = "pos1-pos2-pos3"
REVERSE(PARSENAME(REPLACE(REVERSE(String), '-', '.'), 1))
With 1 Returns "pos1"
With 2 will return "pos2"...

What is SQL Query (T-SQL) which counts transaction status for particular Name against all categorical statuses available?

I am trying to get a table that looks like:
Columns : [District] [Name] [Status1] [Status2] [Status3]
Data: DistrictA MrChan 1 1 1
Data: DistrictB MrFoo 1 0 2
Data: DistrictB MsLucy 0 1 0
(sorry the table turns out unexpected after posting)
select StatusID, StatusCode from
BookingStatus retrieves all categorical statuses Status1, Status2, Status3
select userid, DistrictA, StatusID from
UnitBooking retrieve multiple rows which represent booking transactions.
In example above Ms Lucy has done 1 booking she would have 1 row in UnitBooking. Mr Foo have 3 rows and Mr Chan also have 3 rows.
Example Data:
select [userid], [username], [District], [StatusID] from UnitBooking
[1],[MrChan],[DistrictA],[1]
[1],[MrChan],[DistrictA],[2]
[1],[MrChan],[DistrictA],[3]
[2],[MrFoo],[DistrictB],[1]
[2],[MrFoo],[DistrictB],[3]
[2],[MrFoo],[DistrictB],[3]
[3],[MsLucy],[DistrictB],[2]
select [StatusID], [StatusCode] from BookingStatus
[1],[Status1]
[2],[Status2]
[3],[Status3]
What is the T-SQL that produces the result set?
Thanks a lot
select
district,
username,
sum(case when bs.statusid=1 then 1 else 0 end) 'status1',
sum(case when bs.statusid=2 then 1 else 0 end) 'status2',
sum(case when bs.statusid=3 then 1 else 0 end) 'status13'
from
unitbooking ub
join
BookingStatus Bs
on bs.statusid=ub.statusid
group by district,username
I have come up with an SQL that works for my report but I am not good at what the STUFF function and XML PATH part do, except it works. Here goes:
DECLARE #cols AS NVARCHAR(MAX),#query AS NVARCHAR(MAX)
SELECT #cols = STUFF(( SELECT ', SUM(CASE WHEN ub.BookingStatus='
+ Cast(ItemID as nvarchar(128))
+ ' THEN 1 ELSE 0 END) '
+ QUOTENAME(StatusCode)
FROM customtable_BookingStatus
--WHERE ItemID Not In (1)
GROUP BY StatusCode
,ItemID
ORDER BY ItemID
FOR XML PATH('')
,TYPE
).value('.', 'NVARCHAR(MAX)'), 1, 1, '')
SET #query = 'SELECT usr.FullName, usi.District, '
+'cnt.[OPEN] as [DUALLANGUAGE], cnt.BOOKED, cnt.CONFIRMED, cnt.SOLD from customtable__User usr
left join customtable_Public_User_Info usi on usi.UserID = usr.UserID
inner join '
+'(SELECT ub.UserID,' + #cols
+'from customtable_UnitBooking ub
join customtable_BookingStatus bs on bs.ItemID = ub.BookingStatus
group by ub.UserId, ub.BookingStatus) '
+'cnt on cnt.UserID = usr.UserID'
--Print #query
EXEC sp_executesql #query;
Here's some value added conveniences:
-Support new entries in BookingStatus table dynamically. Still need to explicitly state the name of new column in final #query.
-Technically the names of new column could also be dynamic inside #query by adding an additional STUFF function with similar signature that retrieves plain comma separated columns. Here's the snippet:
DECLARE #colsForQuery AS NVARCHAR(MAX)
SELECT #colsForQuery = STUFF(( SELECT ',cnt.' + QUOTENAME(StatusCode)
FROM customtable_BookingStatus
--WHERE ItemID Not In (1)
GROUP BY StatusCode
,ItemID
ORDER BY ItemID
FOR XML PATH('')
,TYPE
).value('.', 'NVARCHAR(MAX)'), 1, 1, '')
produces this:
cnt.[OPEN],cnt.[BOOKED],cnt.[SOLD],cnt.[CONFIRMED]
-My report has dual language but not easy if all columns are entirely dynamic
-Omit columns using 'WHERE ItemID Not In (1)' or remove it in #query
-Watch out for columns that clashes with T-SQL keyword, example I must encase [OPEN] because its keyword
Overall I think TheGameiswar is correct answer. I am just extending his solution based on the URL suggested inside comments.

SQL Server Stored Procedure to loop through comma seperated list

Is it possible to create a stored procedure that will split a comma separated list and then loop through the list and perform update statements?
This is just 3 updates of what runs into several hundred update statements that is in the region of 120,000 chars long, that are executed concurrently:
UPDATE OPERATION
SET START = '20151012', FINISH = '20151012'
WHERE REF = '912^0^15';
UPDATE OPERATION
SET START = '20151012', FINISH = '20151013'
WHERE REF = '913^0^15';
UPDATE OPERATION
SET START = '20151013', FINISH = '20151014'
WHERE REF = '872^0^15';
What I am thinking is instead, passing a list to a procedure and then have it create the update statements, thereby reducing the length of each update from 111 chars per record to around 30 chars.
I don't know how to write stored procedures in SQL Server, but in javascript I would do it something like this:
Pseudo code:
list = "'20151012','20151012','912^0^15'|'20151012', '20151013','913^0^15'|'20151013','20151014','872^0^15'"
for each list.split('|') as row
cols = row.split(',')
UPDATE OPERATION
SET START = 'cols[0]', FINISH = 'cols[1]'
WHERE REF = 'cols[2]';
Is it possible to create a stored procedure that can do this?
I don't like your idea at all. But to prove that it is possible I've prepared small demo:
LiveDemo
DECLARE #string NVARCHAR(MAX) =
'20151012,20151012,912^0^15|20151012, 20151013,913^0^15|20151013,20151014,872^0^15';
CREATE TABLE #OPERATION(START NVARCHAR(100), FINISH NVARCHAR(100), REF NVARCHAR(100));
INSERT INTO #OPERATION
VALUES ('', '', '912^0^15'),
('', '', '913^0^15'),
('', '', '872^0^15');
WITH cte AS
(
SELECT
xml_data = CAST(REPLACE(REPLACE('<d>'+REPLACE(REPLACE(#string,'|','</d><d>'), ',' ,'</e><e>') +'</d>', '</d>', '</e></d>'), '<d>', '<d><e>') AS XML)
) , cte2 AS (
SELECT record.r.value('e[1]', 'NVARCHAR(100)') AS [start],
record.r.value('e[2]', 'NVARCHAR(100)') AS [finish],
record.r.value('e[3]', 'NVARCHAR(100)') AS [ref]
FROM cte
CROSS APPLY xml_data.nodes('//d') AS record(r)
)
UPDATE o
SET Start = c.[start]
,finish = c.[finish]
FROM #OPERATION o
JOIN cte2 c
ON c.ref = o.ref;
SELECT *
FROM #Operation;
You can wrap it with stored procedure where #string is parameter.
StoredProcedureDemo
CREATE PROCEDURE dbo.my_custom_update
#string NVARCHAR(MAX)
AS
BEGIN
SET NOCOUNT ON;
WITH cte AS
(
SELECT
xml_data = CAST(REPLACE(REPLACE('<d>'+REPLACE(REPLACE(#string,'|','</d><d>'), ',' ,'</e><e>') +'</d>', '</d>', '</e></d>'), '<d>', '<d><e>') AS XML)
) , cte2 AS (
SELECT record.r.value('e[1]', 'NVARCHAR(100)') AS [start],
record.r.value('e[2]', 'NVARCHAR(100)') AS [finish],
record.r.value('e[3]', 'NVARCHAR(100)') AS [ref]
FROM cte
CROSS APPLY xml_data.nodes('//d') AS record(r)
)
UPDATE o
SET Start = c.[start]
,finish = c.[finish]
FROM OPERATION o
JOIN cte2 c
ON c.ref = o.ref;
END;
GO
Try using XML like this
DECLARE #list VARCHAR(MAX) = '''20151012'',''20151012'',''912^0^15''|''20151012'', ''20151013'',''913^0^15''|''20151013'',''20151014'',''872^0^15'''
SELECT s.rowno
, MAX(CASE WHEN s.colno = 1 THEN s.value END) AS start
, MAX(CASE WHEN s.colno = 2 THEN s.value END) AS finish
, MAX(CASE WHEN s.colno = 3 THEN s.value END) AS ref
FROM (
SELECT c.rowno
, ROW_NUMBER() OVER (PARTITION BY c.rowno ORDER BY (SELECT 1))
, REPLACE(LTRIM(d.x.value('(./text())[1]', 'VARCHAR(MAX)')), '''', '')
FROM (
SELECT ROW_NUMBER() OVER (ORDER BY (SELECT 1))
, CONVERT(XML, '<i>' + REPLACE(b.x.value('(./text())[1]', 'VARCHAR(MAX)'), ',', '</i><i>') + '</i>')
FROM (
SELECT CONVERT(XML, '<i>' + REPLACE(#list, '|', '</i><i>') + '</i>')
) a(x)
CROSS APPLY a.x.nodes('i') b(x)
) c(rowno, x)
CROSS APPLY c.x.nodes('i') d(x)
) s(rowno, colno, value)
GROUP BY s.rowno

SQL Pivot table without aggregate

I have a number of text files that are in a format similar to what is shown below.
ENTRY,1,000000,Widget 4000,1,,,2,,
FIELD,Type,A
FIELD,Component,Widget 4000
FIELD,Vendor,Acme
ENTRY,2,000000,PRODUCT XYZ,1,,,3,
FIELD,Type,B
FIELD,ItemAssembly,ABCD
FIELD,Component,Product XYZ - 123
FIELD,Description1,Product
FIELD,Description2,XYZ-123
FIELD,Description3,Alternate Part #440
FIELD,Vendor,Contoso
They have been imported into a table with VARCHAR(MAX) as the only field. Each ENTRY is a "new" item, and all the subsequent FIELD rows are properties of that item. The data next to the FIELD is the column name of the property. The data to the right of the property is the data I want to display.
The desired output would be:
ENTRY Type Component Vendor ItemAssembly Description1
1,000000,Widget 4000 A Widget 4000 Acme
2,000000,Product XYZ B Product XYZ-123 Contoso ABCD Product
I've got the column names using the code below (there are several tables that I have UNIONed together to list all the property names).
select #cols =
STUFF (
(select Distinct ', ' + QUOTENAME(ColName) from
(SELECT
SUBSTRING(ltrim(textFileData),CHARINDEX(',', textFileData, 1)+1,CHARINDEX(',', textFileData, CHARINDEX(',', textFileData, 1)+1)- CHARINDEX(',', textFileData, 1)-1) as ColName
FROM [MyDatabase].[dbo].[MyTextFile]
where
(LEFT(textFileData,7) LIKE #c)
UNION
....
) A
FOR XML PATH(''), TYPE).value('.','NVARCHAR(MAX)'),1,1,'')
Is a Pivot table the best way to do this? No aggregation is needed. Is there a better way to accomplish this? I want to list out data next to the FIELD name in a column format.
Thanks!
Here is the solution in SQL fiddle:
http://sqlfiddle.com/#!3/8f0b0/8
Prepare raw data in format (entry, field, value), use dynamic SQL to make pivot on unknown column count.
MAX() for string is enough to simulate "without aggregate" behavior in this case.
create table t(data varchar(max))
insert into t values('ENTRY,1,000000,Widget 4000,1,,,2,,')
insert into t values('FIELD,Type,A')
insert into t values('FIELD,Component,Widget 4000')
insert into t values('FIELD,Vendor,Acme ')
insert into t values('ENTRY,2,000000,PRODUCT XYZ,1,,,3,')
insert into t values('FIELD,Type,B')
insert into t values('FIELD,ItemAssembly,ABCD')
insert into t values('FIELD,Component,Product XYZ - 123')
insert into t values('FIELD,Description1,Product ')
insert into t values('FIELD,Description2,XYZ-123 ')
insert into t values('FIELD,Description3,Alternate Part #440')
insert into t values('FIELD,Vendor,Contoso');
create type preparedtype as table (entry varchar(max), field varchar(max), value varchar(max))
declare #prepared preparedtype
;with identified as
(
select
row_number ( ) over (order by (select 1)) as id,
substring(data, 1, charindex(',', data) - 1) as type,
substring(data, charindex(',', data) + 1, len(data)) as data
from t
)
, tree as
(
select
id,
(select max(id)
from identified
where type = 'ENTRY'
and id <= i.id) as parentid,
type,
data
from identified as i
)
, pivotsrc as
(
select
p.data as entry,
substring(c.data, 1, charindex(',', c.data) - 1) as field,
substring(c.data, charindex(',', c.data) + 1, len(c.data)) as value
from tree as p
inner join tree as c on c.parentid = p.id
where p.id = p.parentid
and c.parentid <> c.id
)
insert into #prepared
select * from pivotsrc
declare #dynamicPivotQuery as nvarchar(max)
declare #columnName as nvarchar(max)
select #columnName = ISNULL(#ColumnName + ',','')
+ QUOTENAME(field)
from (select distinct field from #prepared) AS fields
set #dynamicPivotQuery = N'select * from #prepared
pivot (max(value) for field in (' + #columnName + ')) as result'
exec sp_executesql #DynamicPivotQuery, N'#prepared preparedtype readonly', #prepared
Here your are, this comes back exactly as you need it. I love tricky SQL :-). This is a real ad-hoc singel-statement call.
DECLARE #tbl TABLE(OneCol VARCHAR(MAX));
INSERT INTO #tbl
VALUES('ENTRY,1,000000,Widget 4000,1,,,2,,')
,('FIELD,Type,A')
,('FIELD,Component,Widget 4000')
,('FIELD,Vendor,Acme ')
,('ENTRY,2,000000,PRODUCT XYZ,1,,,3,')
,('FIELD,Type,B')
,('FIELD,ItemAssembly,ABCD')
,('FIELD,Component,Product XYZ - 123')
,('FIELD,Description1,Product ')
,('FIELD,Description2,XYZ-123 ')
,('FIELD,Description3,Alternate Part #440')
,('FIELD,Vendor,Contoso');
WITH OneColumn AS
(
SELECT ROW_NUMBER() OVER(ORDER BY (SELECT 1)) AS inx
,CAST('<root><r>' + REPLACE(OneCol,',','</r><r>') + '</r></root>' AS XML) AS Split
FROM #tbl AS tbl
)
,AsParts AS
(
SELECT inx
,Each.part.value('/root[1]/r[1]','varchar(max)') AS Part1
,Each.part.value('/root[1]/r[2]','varchar(max)') AS Part2
,Each.part.value('/root[1]/r[3]','varchar(max)') AS Part3
,Each.part.value('/root[1]/r[4]','varchar(max)') AS Part4
,Each.part.value('/root[1]/r[5]','varchar(max)') AS Part5
FROM OneColumn
CROSS APPLY Split.nodes('/root') AS Each(part)
)
,TheEntries AS
(
SELECT DISTINCT *
FROM AsParts
WHERE Part1='ENTRY'
)
SELECT TheEntries.Part2 + ',' + TheEntries.Part3 + ',' + TheEntries.Part4 AS [ENTRY]
,MyFields.AsXML.value('(fields[1]/field[Part2="Type"])[1]/Part3[1]','varchar(max)') AS [Type]
,MyFields.AsXML.value('(fields[1]/field[Part2="Component"])[1]/Part3[1]','varchar(max)') AS Component
,MyFields.AsXML.value('(fields[1]/field[Part2="Vendor"])[1]/Part3[1]','varchar(max)') AS Vendor
,MyFields.AsXML.value('(fields[1]/field[Part2="ItemAssembly"])[1]/Part3[1]','varchar(max)') AS ItemAssembly
,MyFields.AsXML.value('(fields[1]/field[Part2="Description1"])[1]/Part3[1]','varchar(max)') AS Description1
FROM TheEntries
CROSS APPLY
(
SELECT *
FROM AsParts AS ap
WHERE ap.Part1='FIELD' AND ap.inx>TheEntries.inx
AND ap.inx < ISNULL((SELECT TOP 1 nextEntry.inx FROM TheEntries AS nextEntry WHERE nextEntry.inx>TheEntries.inx ORDER BY nextEntry.inx DESC),10000000)
ORDER BY ap.inx
FOR XML PATH('field'), ROOT('fields'),TYPE
) AS MyFields(AsXML)

Resources