Related
I have been looking into this for a while now and I cannot find a way to remove duplicate strings from a comma-separated as well as pipeline seperated string in SQL Server.
Given the string
test1,test2,test1|test2,test3|test4,test4|test4
does anyone know how would you return test1,test2,test3,test4?
Approach
The following approach can be used to de-duplicate a delimited list of values.
Use the REPLACE() function to convert different delimiters into the same delimiter.
Use the REPLACE() function to inject XML closing and opening tags to create an XML fragment
Use the CAST(expr AS XML) function to convert the above fragment into the XML data type
Use OUTER APPLY to apply the table-valued function nodes() to split the XML fragment into its constituent XML tags. This returns each XML tag on a separate row.
Extract just the value from the XML tag using the value() function, and returns the value using the specified data type.
Append a comma after the above-mentioned value.
Note that these values are returned on separate rows. The usage of the DISTINCT keyword now removes duplicate rows (i.e. values).
Use the FOR XML PATH('') clause to concatenate the values across multiple rows into a single row.
Query
Putting the above approach in query form:
SELECT DISTINCT PivotedTable.PivotedColumn.value('.','nvarchar(max)') + ','
FROM (
-- This query returns the following in theDataXml column:
-- <tag>test1</tag><tag>test2</tag><tag>test1</tag><tag>test2</tag><tag>test3</tag><tag>test4</tag><tag>test4</tag><tag>test4</tag>
-- i.e. it has turned the original delimited data into an XML fragment
SELECT
DataTable.DataColumn AS DataRaw
, CAST(
'<tag>'
-- First replace commas with pipes to have only a single delimiter
-- Then replace the pipe delimiters with a closing and opening tag
+ replace(replace(DataTable.DataColumn, ',','|'), '|','</tag><tag>')
-- Add a final set of closing tags
+ '</tag>'
AS XML) AS DataXml
FROM ( SELECT 'test1,test2,test1|test2,test3|test4,test4|test4' AS DataColumn) AS DataTable
) AS x
OUTER APPLY DataXml.nodes('tag') AS PivotedTable(PivotedColumn)
-- Running the query without the following line will return the data in separate rows
-- Running the query with the following line returns the rows concatenated, i.e. it returns:
-- test1,test2,test3,test4,
FOR XML PATH('')
Input & Result
Given the input:
test1,test2,test1|test2,test3|test4,test4|test4
The above query will return the result:
test1,test2,test3,test4,
Notice the trailing comma at the end. I'll leave it as an exercise to you to remove that.
EDIT: Count of Duplicates
OP requested in a comment "how do i get t5he count of duplicates as well? in a seperate column".
The simplest way would be to use the above query but remove the last line FOR XML PATH(''). Then, counting all values and distinct values returned by the SELECT expression in the above query (i.e. PivotedTable.PivotedColumn.value('.','nvarchar(max)')). The difference between the count of all values and the count of distinct values is the count of duplicate values.
SELECT
COUNT(PivotedTable.PivotedColumn.value('.','nvarchar(max)')) AS CountOfAllValues
, COUNT(DISTINCT PivotedTable.PivotedColumn.value('.','nvarchar(max)')) AS CountOfUniqueValues
-- The difference of the previous two counts is the number of duplicate values
, COUNT(PivotedTable.PivotedColumn.value('.','nvarchar(max)'))
- COUNT(DISTINCT PivotedTable.PivotedColumn.value('.','nvarchar(max)')) AS CountOfDuplicateValues
FROM (
-- This query returns the following in theDataXml column:
-- <tag>test1</tag><tag>test2</tag><tag>test1</tag><tag>test2</tag><tag>test3</tag><tag>test4</tag><tag>test4</tag><tag>test4</tag>
-- i.e. it has turned the original delimited data into an XML fragment
SELECT
DataTable.DataColumn AS DataRaw
, CAST(
'<tag>'
-- First replace commas with pipes to have only a single delimiter
-- Then replace the pipe delimiters with a closing and opening tag
+ replace(replace(DataTable.DataColumn, ',','|'), '|','</tag><tag>')
-- Add a final set of closing tags
+ '</tag>'
AS XML) AS DataXml
FROM ( SELECT 'test1,test2,test1|test2,test3|test4,test4|test4' AS DataColumn) AS DataTable
) AS x
OUTER APPLY DataXml.nodes('tag') AS PivotedTable(PivotedColumn)
For the same input shown above, the output of this query is:
CountOfAllValues CountOfUniqueValues CountOfDuplicateValues
---------------- ------------------- ----------------------
8 4 4
Solution to your problem is as given below :
DECLARE #Data_String AS VARCHAR(1000), #Result as varchar(1000)=''
SET #Data_String = 'test1,test2,test1|test2,test3|test4,test4|test4'
SET #Data_String = REPLACE(#Data_String,'|',',')
SELECT #Result=#Result+col+',' from(
SELECT DISTINCT t.c.value('.','varchar(100)') col from(
SELECT cast('<A>'+replace(#Data_String,',','</A><A>')+'</A>' as xml)col1)data cross apply col1.nodes('/A') as t(c))Data
SELECT LEFT(#Result,LEN(#Result)-1)
Result
test1,test2,test3,test4
DECLARE #string AS VARCHAR(1000)
SET #string = 'test1,test2,test1|test2,test3|test4,test4|test4'
SET #string = REPLACE(#string,'|',',')
DECLARE #t TABLE (val VARCHAR(MAX))
DECLARE #xml XML
SET #xml = N'<root><r>' + REPLACE(#string, ',', '</r><r>') + '</r></root>'
INSERT INTO #t(val) SELECT r.value('.','VARCHAR(MAX)') as Item FROM #xml.nodes('//root/r') AS RECORDS(r)
;WITH cte
AS (SELECT ROW_NUMBER() OVER (PARTITION BY val ORDER BY val desc) RN
FROM #t)
DELETE FROM cte
WHERE RN > 1
Try Following SQL Script :
declare #List nvarchar(max)='test1,test2,test1|test2,test3|test4,test4|test4';
declare #Delimiter CHAR(1) =','
declare #XML AS XML
declare #result varchar(max)
set #List=Replace(#List,'|',',')
--Select #List
SET #XML = CAST(('<X>'+REPLACE(#List,#Delimiter ,'</X><X>')+'</X>') AS XML)
DECLARE #temp TABLE (Data nvarchar(100))
INSERT INTO #temp
SELECT N.value('.', 'nvarchar(100)') AS Data FROM #XML.nodes('X') AS T(N)
--SELECT distinct * FROM #temp
IF OBJECT_ID('tempdb..#temp') IS NOT NULL DROP TABLE #temp
Select distinct Data into #temp from #temp
SET #result = ''
select #result = #result + Data + ', ' from #temp
select SUBSTRING(#result, 0, LEN(#result))
I just tried following script working perfectly :
declare #List VARCHAR(MAX)='test1,test2,test1|test2,test3|test4,test4|test4'
declare #Delim CHAR=','
DECLARE #ParsedList TABLE
(
Item VARCHAR(MAX)
)
DECLARE #list1 VARCHAR(MAX), #Pos INT, #rList VARCHAR(MAX)
set #List=Replace(#List,'|',',')
SET #list = LTRIM(RTRIM(#list)) + #Delim
SET #pos = CHARINDEX(#delim, #list, 1)
WHILE #pos > 0
BEGIN
SET #list1 = LTRIM(RTRIM(LEFT(#list, #pos - 1)))
IF #list1 <> ''
INSERT INTO #ParsedList VALUES (CAST(#list1 AS VARCHAR(MAX)))
SET #list = SUBSTRING(#list, #pos+1, LEN(#list))
SET #pos = CHARINDEX(#delim, #list, 1)
END
SELECT #rlist = COALESCE(#rlist+',','') + item
FROM (SELECT DISTINCT Item FROM #ParsedList) t
Select #rlist
honestly, for several days, i am trying to learn about pivot table behavior. rightnow, i am able to display sum of row and column in pivot table. Here is the code that i am trying to set
DECLARE #cols AS NVARCHAR(MAX)
DECLARE #colswithNoNulls AS NVARCHAR(MAX)
DECLARE #query AS NVARCHAR(MAX)
DECLARE #tanggal_awal DATE
DECLARE #tanggal_akhir DATE
DECLARE #print NVARCHAR(MAX)
DECLARE #querycount AS NVARCHAR(MAX)
CREATE TABLE #datatable
(
product_id int,
product_date date,
product_ammount int
)
SET #tanggal_awal = convert(DATE,'02-01-2017')
SET #tanggal_akhir = convert(DATE,DATEADD(dd,-1,(DATEADD(mm,1,#tanggal_awal))))
--SELECT DATEADD(s,-1,DATEADD(mm, DATEDIFF(m,0,GETDATE())+1,0))
INSERT INTO #datatable (product_id,product_date,product_ammount) VALUES
(1,GETDATE(),100),
(1,GETDATE(),900),
(2,DATEADD(DD,-1,GETDATE()),400),
(3,DATEADD(DD,4,GETDATE()),300),
(1,DATEADD(DD,4,GETDATE()),200),
(2,DATEADD(DD,2,GETDATE()),700),
(4,DATEADD(DD,-3,GETDATE()),1000),
(4,DATEADD(MM,1,GETDATE()),200),
(4,GETDATE(),750)
;WITH CTE (datelist,maxdate) AS
(
SELECT CONVERT(INT,(MIN(DATEPART(day,#tanggal_awal)))) datelist, CONVERT(INT,MAX(DATEPART(day,product_date))) maxdate
FROM #datatable
UNION ALL
SELECT CONVERT(INT,(DATEPART(day,datelist))), CONVERT(INT,(DATEPART(day,#tanggal_akhir)))
FROM cte
WHERE datelist < maxdate
) SELECT c.datelist
INTO #temp
FROM cte c
ORDER BY c.datelist
OPTION (maxrecursion 0)
SELECT #cols = STUFF((SELECT ',' + QUOTENAME(CONVERT(int, datelist))
FROM #temp
GROUP BY datelist
ORDER BY CONVERT(int, datelist)
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
,1,1,''
)
SELECT #colswithNoNulls = STUFF((SELECT ',ISNULL(' + QUOTENAME(CONVERT(int, datelist)) +',''0'') '+ QUOTENAME(CONVERT(int, datelist))
FROM #temp
GROUP BY datelist
ORDER BY CONVERT(int, datelist)
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
,1,1,'')
SET #query =
'SELECT product_id, '+ #colswithNoNulls+', Total FROM
(
select
ISNULL((CAST(b.product_id as nvarchar(30))), ''Total'') product_id,
coalesce(b.product_ammount,0) as product_ammount,
DATEPART(dd,(convert(CHAR(10), product_date, 120))) PivotDate,
SUM(product_ammount) over (partition by b.product_id) as Total
FROM #datatable b
WHERE product_date between #tanggal_awal and #tanggal_akhir
GROUP BY product_ammount,product_date,product_id
WITH ROllup
) x
pivot
(
sum(product_ammount)
for PivotDate in (' +#cols+ ')
) p
ORDER BY CASE when (product_id = ''Total'') then 1 else 0 end, product_id'
EXECUTE sp_executesql #query ,N'#tanggal_awal DATE, #tanggal_akhir DATE', #tanggal_awal,#tanggal_akhir
IF(OBJECT_ID('tempdb.dbo.#temp','U') IS NOT NULL)
BEGIN
TRUNCATE TABLE #temp
TRUNCATE TABLE #datatable
DROP TABLE #temp
DROP TABLE #datatable
END
ELSE
BEGIN
SELECT '#temp is not created in this script' AS MESSAGE
END
as you can see , the result is show on the display. However, the total value at the very right bottom is strange since it is like doubled up exact total value like in this picture:
How to resolve this issue btw? since it was bit confusing for me. thank you for your help :)
Generally, I am not fully aware for RollUp functionality. From your PIVOT query. I have found some of the empty rows is coming up (basically subtotal rows from "With Rollup" option), so I have modified the Group by statement a little bit to achieve the expected result.
select
ISNULL((CAST(b.product_id as nvarchar(30))), 'Total') product_id,
coalesce(b.product_ammount,0) as product_ammount,
DATEPART(dd,(convert(CHAR(10), product_date, 120))) PivotDate,
SUM(product_ammount) over (partition by b.product_id) as Total
FROM #datatable b
WHERE product_date between #tanggal_awal and #tanggal_akhir
GROUP BY product_ammount,product_date,ROllup(product_id)
Kindly replace this query in PIVOT, then you will get the desired output.
Note: Sorry I am not fully aware of RollUp functionality, so I'm unable to give the right explanation.
I am trying to create a function in SQL SERVER which I can use to compare two tables, to check if they are identical. I do that with two excepts.
The Tables are supposed to be exactly the same, with the same data formats and column names as well as all values identical in both tables. This will be a manual check, so if differences are there, a thrown error is not a problem. The aim is just to see if two approaches of creating the tables leads to the same tables.
I am really new to functions in SQL, so I am not sure how to solve the problem.
I want to pass both tables as parameters to the function, to get something like this:
CREATE FUNCTION DIFFERING_ROWS
(#TABLE1, #TABLE2)
RETURNS TABLE
AS
RETURN (
SELECT *, 'A_not_B' as [Difference] FROM #TABLE1
except
SELECT *, 'A_not_B' as [Difference] FROM #TABLE2
union all
SELECT *, 'B_not_A' as [Difference] FROM #TABLE2
except
SELECT *, 'B_not_A' as [Difference] FROM #TABLE1
)
END
How is this implemented correctly?
Can anybody help me?
You cannot do this in a function. The only way you can pass table names as parameters is to use Dynamic SQL, and Dynamic SQL is not allowed in functions. You CAN do it with a stored procedure.
You can create this stored procedure that counts if the tables have the same column_names:
CREATE PROCEDURE checkEqualTables
#table1 varchar(100),
#table2 varchar(100)
AS
BEGIN
DECLARE #xCount int;
(SELECT #xCount = COUNT(*) from (SELECT column_name FROM information_schema.COLUMNS WHERE table_name=#table1) base
where column_name not in (SELECT column_name FROM information_schema.COLUMNS WHERE table_name=#table2))
IF(#xCount <= 0)
print 'Tables are equal!';
ELSE
print 'Tables are not equal!'
END
Ok I took the information from the answers and comments and researched about how to put this into procedures, and this is what I built:
I think this does what I want:
CREATE PROCEDURE checkEqualTables
#table1 nvarchar(100),
#table2 nvarchar(100)
AS
BEGIN
DECLARE #SQL nvarchar(max);
SET #SQL = 'SELECT * FROM ' + #TABLE1 +
'except
SELECT * FROM ' + #TABLE2 +
'union all
SELECT * FROM ' + #TABLE2 +
'except
SELECT * FROM ' + #TABLE1
EXECUTE sp_executesql #SQL
END
I've applied a "borrowed" solution to splitting a delimited string into rows (I'm working in MSSQL 2008 R2), but the solution was specific to CTE/recursive queries. This works great, but only transforms one row of data. How would I adjust this to return all rows in my table (or, better yet, be able to include a where clause)?
I have struck out on researching this for two days, and as I'm not too experienced with CTE/recursive queries or XML...any expertise would be welcome! Thanks!!
DECLARE #RowData varchar(2000)
DECLARE #SplitOn varchar(1)
DECLARE #ObjectID int
SELECT
#ObjectID = ObjectID, #RowData = ObjectName, #SplitOn = ';' from Objects
declare #xml as xml
SET #XML = '<t><r>' + Replace(#RowData , #spliton, '</r><r>') + '</r></t>'
select #objectid as objectid, rtrim(ltrim(t.r.value('.', 'VARCHAR(8000)'))) as splitvalue
from #xml.nodes('/t/r') as t(r)
I would start by creating a table valued function that does the string splitting like the one found here: http://ole.michelsen.dk/blog/split-string-to-table-using-transact-sql.html
Then use an OUTER APPLY to use these against rows in a table. It is a query, so you can apply a where clause. Here is an example of the create for the function, a temp table that I filled with some test data and the select statement.
CREATE FUNCTION [dbo].[Split]
(
#String NVARCHAR(4000),
#Delimiter NCHAR(1)
)
RETURNS TABLE
AS
RETURN
(
WITH Split(stpos,endpos)
AS(
SELECT 0 AS stpos, CHARINDEX(#Delimiter,#String) AS endpos
UNION ALL
SELECT endpos+1, CHARINDEX(#Delimiter,#String,endpos+1)
FROM Split
WHERE endpos > 0
)
SELECT 'Id' = ROW_NUMBER() OVER (ORDER BY (SELECT 1)),
'Data' = SUBSTRING(#String,stpos,COALESCE(NULLIF(endpos,0),LEN(#String)+1)-stpos)
FROM Split
)
GO
IF ( OBJECT_ID(N'tmpdb..#Object') IS NOT NULL ) DROP TABLE [#Object];
CREATE TABLE [#Object]
(
[ObjectId] INT NOT NULL IDENTITY(1, 1)
, [Object] VARCHAR(1000) NOT NULL
);
INSERT INTO #Object
( [Object] )
VALUES ( 'brad;bill;jerry'), ('Scott;MATT;DEAN'), ('larry;bob;john')
GO
SELECT [tt].[ObjectId]
, [s].[Data]
FROM #Object AS [tt]
OUTER APPLY dbo.Split([tt].[Object], ';') AS s
WHERE tt.[ObjectId] < 3
Just replace the temp table stuff with your table(s). Hope that helps!
The reason why you are not getting more than one row is because you are thinking about sets in a programatic way instead of in a set-centric fashion.
SELECT #ObjectID .... gets one row ... the last row.
I suggest you use CROSS APPLY:
-- Data Setup
CREATE TABLE #Objects ( ObjectID INTEGER, ObjectName VARCHAR(2000) )
INSERT INTO #Objects ( ObjectID, ObjectName )
SELECT 1, 'foo;bar' UNION
SELECT 2, 'biz;baz;buz'
-- The Meat and Potatoes. You can add a WHERE to your inner or outer query
SELECT
ObjectID,
RTRIM( LTRIM( t.r.value( '.', 'VARCHAR(8000)' ) ) ) AS splitvalue
FROM
(
SELECT
ObjectID,
CONVERT( XML, '<t><r>' + REPLACE( ObjectName, ';', '</r><r>' ) + '</r></t>' ) AS xml_part
FROM
#Objects
) AS xml_part_builder
CROSS APPLY xml_part_builder.xml_part.nodes('/t/r') AS t(r)
-- CLEAN UP
DROP TABLE #Objects
Let me know if this helps.
Cheers!
I have a tables as follows. Table #temp
Product Date 1st Pass Count 2nd Pass Count 3rd Pass Count
A 06-07-2015 2 4 5
A 06-07-2015 3 2 1
B 06-07-2015 1 1 1
Now I want a view as follows;
Product 06-07-2015 07-07-2015 08-07-2015
A 17 0 0
B 3 0 0
The date column is a sum of the 1st, 2nd and 3rd pass.
I have tried the query below . 2 problems I need help with.
Problem 1 - More than one row for Product A.
Problem 2 - Cant seem to add all 1st, 2nd and 3rd pass in sql query with pivot. Tried sum ( [1st pass]+[2nd pass]+[3rd pass] ) and it gave a syntax error.
Current code that works before I try things to correct the 2 problems above.
DECLARE #cols as NVARCHAR(MAX)
DECLARE #query as NVARCHAR(MAX)
Select #cols=STUFF ( SELECT ',' +QUOTENAME(PRODUCT) FROM #TEMP group by DATE ORDER BY DATE FOR XML PATH (''), TYPE).value.('.',NVARCHAR(MAX)'),1,1,'') set #query='SELECT [PRODUCT],' + #cols + 'from 'Select [PRODUCT],[DATE],[1st Pass],[2nd Pass],[3rd Pass] from #TEMP)x Pivot (sum [1st pass] FOR DTE in ('+#cols+') )p' execute (#query)
Is there something obvious I am missing here in terms of solving these last 2 problems ?
we can get the above result set by using Pivot and Cross Apply
Normal Pivot
DECLARE #t TABLE (Product Varchar(5),dated varchar(10),firstcount int,secondcount int,thirdcpount int)
INSERT INTO #t (Product,dated,firstcount,secondcount,thirdcpount)values
('A','06-07-2015',2,4,5),
('A','06-07-2015',3,2,1),
('B','06-07-2015',1,1,1)
select Product,SUM(ISNULL([06-07-2015],0)) As [06-07-2015],SUM(ISNULL([07-07-2015],0))As [07-07-2015],SUM(ISNULL([08-07-2015],0))As [08-07-2015] from (
select Product,dated,COL,val from #t
CROSS APPLY (VALUES('firstcount',firstcount),('secondcount',secondcount),('thirdcpount',thirdcpount))CS(COL,val))TT
PIVOT (SUM(VAL) FOR Dated IN ([06-07-2015],[07-07-2015],[08-07-2015]))T
GROUP BY Product
And
by using Dynamic Query Pivot
IF OBJECT_ID('tempdb..#t') IS NOT NULL
DROP TABLE #t
GO
CREATE TABLE #t (Product Varchar(5),dated varchar(10),firstcount int,secondcount int,thirdcpount int)
INSERT INTO #t (Product,dated,firstcount,secondcount,thirdcpount)values
('A','06-07-2015',2,4,5),
('A','06-07-2015',3,2,1),
('B','06-07-2015',1,1,1)
,('A','07-07-2015',2,11,5),
('A','07-07-2015',3,2,1),
('B','07-07-2015',1,1,1)
,('A','08-07-2015',3,11,6),
('A','08-07-2015',1,6,1),
('B','08-07-2015',11,1,6)
DECLARE #statement NVARCHAR(max)
,#columns NVARCHAR(max)
SELECT #columns = ISNULL(#columns + ', ', '') + N'[' + tbl.dated + ']'
FROM (
SELECT DISTINCT dated
FROM #t
) AS tbl
SELECT #statement = ' select Product,SUM(ISNULL([06-07-2015],0)) As [06-07-2015],SUM(ISNULL([07-07-2015],0))As [07-07-2015],SUM(ISNULL([08-07-2015],0))As [08-07-2015] from (
select Product,dated,COL,val from #t
CROSS APPLY (VALUES(''firstcount'',firstcount),(''secondcount'',secondcount),(''thirdcpount'',thirdcpount))CS(COL,val))TT
PIVOT (SUM(VAL) FOR Dated IN (' + #columns + ')) as pvt GROUP BY Product'
EXEC sp_executesql #statement = #statement