Select distinct values across columns - sql-server

I'm trying to create a query that returns the distinct values across 4 columns in a single row. My where clause will limit the depth of the results to just one row but keep the data separated into columns. For example
SELECT color1, color2, color3, color4 FROM art WHERE id = 1
would return (in columns)
|Red |Blue |Blue |Green |
But I want only the distinct values
|Red |Blue |Green | |
Is there such a thing as using an unpivot and then pivot back to columns?

One option is to UNPIVOT your data and then PIVOT via a dense_rank()
Example
Declare #YourTable Table ([ID] varchar(50),[Color1] varchar(50),[Color2] varchar(50),[Color3] varchar(50),[Color4] varchar(50))
Insert Into #YourTable Values
(1,'Red','Blue','Blue','Green')
Select *
From (
Select A.ID
,B.Item
,Col = concat('Color',dense_rank() over (partition by ID order by Item) )
from #YourTable A
Cross Apply ( values (color1)
,(color2)
,(color3)
,(color4)
) B(Item)
Where ID=1
) src
Pivot (max(item) for Col in (Color1,Color2,Color3,Color4) ) pvt
Returns
ID Color1 Color2 Color3 Color4
1 Blue Green Red NULL
Note: The Where ID=1 is optional

Related

Split a string in SQL by hyphen in 2012 version

I have multiple string in a column where I have get last string after column
Below are three example like same I have different number hyphen that can occur in a string but desired result is I have string before last hyphen
1. abc-def-Opto
2. abc-def-ijk-5C-hello-Opto
3. abc-def-ijk-4C-hi-Build
4. abc-def-ijk-4C-123-suppymanagement
Desired result set is
def
hello
hi
123
How to do this in SQL query to get this result set. I have MSSQL 2012 version
Require a generic sql which can get the result set
There are many ways to split/parse a string. ParseName() would fail because you may have more than 4 positions.
One option (just for fun), is to use a little XML.
We reverse the string
Convert into XML
Grab the second node
Reverse the desired value for the final presentation
Example
Declare #YourTable Table ([SomeCol] varchar(50))
Insert Into #YourTable Values
('abc-def-Opto')
,('abc-def-ijk-5C-hello-Opto')
,('abc-def-ijk-4C-hi-Build')
,('abc-def-ijk-4C-123-suppymanagement')
Select *
,Value = reverse(convert(xml,'<x>'+replace(reverse(SomeCol),'-','</x><x>')+'</x>').value('x[2]','varchar(150)'))
from #YourTable
Returns
SomeCol Value
abc-def-Opto def
abc-def-ijk-5C-hello-Opto hello
abc-def-ijk-4C-hi-Build hi
abc-def-ijk-4C-123-suppymanagement 123
Without getting into XML stuff, simply using string functions of sql server.
Declare #YourTable Table ([SomeCol] varchar(50))
Insert Into #YourTable Values
('abc-def-Opto')
,('abc-def-ijk-5C-hello-Opto')
,('abc-def-ijk-4C-hi-Build')
,('abc-def-ijk-4C-123-suppymanagement');
SELECT *
,RTRIM(LTRIM(REVERSE(
SUBSTRING(
SUBSTRING(REVERSE([SomeCol]) , CHARINDEX('-', REVERSE([SomeCol])) +1 , LEN([SomeCol]) )
, 1 , CHARINDEX('-', SUBSTRING(REVERSE([SomeCol]) , CHARINDEX('-', REVERSE([SomeCol])) +1 , LEN([SomeCol]) ) ) -1
)
)))
FROM #YourTable
i am not sure this script will exactly useful to your requirement but i am just trying to give an idea how to split the data
IF OBJECT_ID('tempdb..#Temp')IS NOT NULL
DROP TABLE #Temp
;WITH CTE(Id,data)
AS
(
SELECT 1,'abc-def-Opto' UNION ALL
SELECT 2,'abc-def-ijk-5C-hello-Opto' UNION ALL
SELECT 3,'abc-def-ijk-4C-hi-Build' UNION ALL
SELECT 4,'abc-def-ijk-4C-123-suppymanagement'
)
,Cte2
AS
(
SELECT Id, CASE WHEN Id=1 AND Setdata=1 THEN data
WHEN Id=2 AND Setdata=2 THEN data
WHEN Id=3 AND Setdata=3 THEN data
WHEN Id=4 AND Setdata=4 THEN data
ELSE NULL
END AS Data
FROM
(
SELECT Id,
Split.a.value('.','nvarchar(1000)') AS Data,
ROW_NUMBER()OVER(PARTITION BY id ORDER BY id) AS Setdata
FROM(
SELECT Id,
CAST('<S>'+REPLACE(data ,'-','</S><S>')+'</S>' AS XML) AS data
FROM CTE
) AS A
CROSS APPLY data.nodes('S') AS Split(a)
)dt
)
SELECT * INTO #Temp FROM Cte2
SELECT STUFF((SELECT DISTINCT ', '+ 'Set_'+CAST(Id AS VARCHAR(10))+':'+Data
FROM #Temp WHERE ISNULL(Data,'')<>'' FOR XML PATH ('')),1,1,'')
Result
Set_1:abc, Set_2:def, Set_3:ijk, Set_4:4C
You can do like
WITH CTE AS
(
SELECT 1 ID,'abc-def-Opto' Str
UNION
SELECT 2, 'abc-def-ijk-5C-hello-Opto'
UNION
SELECT 3, 'abc-def-ijk-4C-hi-Build'
UNION
SELECT 4, 'abc-def-ijk-4C-123-suppymanagement'
)
SELECT ID,
REVERSE(LEFT(REPLACE(P2, P1, ''), CHARINDEX('-', REPLACE(P2, P1, ''))-1)) Result
FROM (
SELECT LEFT(REVERSE(Str), CHARINDEX('-', REVERSE(Str))) P1,
REVERSE(Str) P2,
ID
FROM CTE
) T;
Returns:
+----+--------+
| ID | Result |
+----+--------+
| 1 | def |
| 2 | hello |
| 3 | hi |
| 4 | 123 |
+----+--------+
Demo

How can I ignore duplicate rows where columns do not contain data

I have a table with duplicate rows, however, some of the duplicate rows have columns does not contain data for the same column. How can I remove/ignore only those row where columns are blank? In some instances:
Name Employee# Location City
-----------------------------------------
BowerT 48999 NJ Foods
BowerT 48999 NJ Foods Pearl
BowerT 48999 NJ Foods Johns
BowerT 48999 NJ Foods Johns
I'm using with CTE to delete duplicate, however, if 2nd, 3rd, or 4th row has the data I need for that column, I lose it because these are greater than row 1.
;With hrEmployee as
(
Select
*,
Row_Number () Over (Partition BY Employee_Number order by Employee_Number) As RowNumber
From
[dbo].[hrEmployee]
Where
Employee_Number = '48999'
)
Delete hrEmployees
where RowNumber > 1
What am I missing?
Here is an entire example the relevant code change is:
ROW_NUMBER() OVER (PARTITION BY Employee_Number ORDER BY
CASE WHEN ISNULL(City,'') = '' THEN 1 ELSE 0 END
) as RowNumber
What that does is simply ORDER your results of what you want to keep by saying if the City is null or '' (blank) make it last. You can rank your results anyway you want by specifying different order in your ORDER BY.
DECLARE #Table AS TABLE (Name VARCHAR(10), Employee_Number INT, Location VARCHAR(20), City VARCHAR(20))
INSERT INTO #Table VALUES ('BowerT',48999,'NJ Foods',NULL)
,('BowerT',48999,'NJ Foods','Pearl')
,('BowerT',48999,'NJ Foods','Johns')
,('BowerT',48999,'NJ Foods','Johns')
SELECT *
FROM
#Table
;WITH hrEmployee AS (
SELECT
*
,ROW_NUMBER() OVER (PARTITION BY Employee_Number ORDER BY
CASE WHEN ISNULL(City,'') = '' THEN 1 ELSE 0 END
) as RowNumber
FROM
#Table
where Employee_Number = '48999'
)
DELETE
FROM
hrEmployee
WHERE
RowNumber > 1
SELECT *
FROM
#Table

need to update first non null field_x in non-normalised table

I have the following table that I have to work with.
SQL Fiddle
Basically, it is a product that stores up to 10 barcodes for a product code (simplified example). At any time, any number of those 10 barcode fields might have a value.
I have another table that has a list of product code and barcode, and need to add these to the product barcode table.
I need to perform an update so that any of the barcodes in barcodes_to_import are appended to the product_barcode table, into the first non null barcode column.
table product_barcodes
product_Code barcode_1 barcode_2 barcode_3 barcode_4 barcode_5
ABC 1 2 3
BCD 4
table barcodes_to_import
product_code barcode
ABC 7
BCD 8
Expected output:
product_Code barcode_1 barcode_2 barcode_3 barcode_4 barcode_5
ABC 1 2 3 7
BCD 4 8
create table product_barcodes(product_Code varchar(10),barcode_1 int,barcode_2 int,barcode_3 int
,barcode_4 int,barcode_5 int,barcode_6 int,barcode_7 int,barcode_8 int,barcode_9 int,barcode_10 int)
create table barcodes_to_import(product_code varchar(10),barcode int)
--Inserted Sample values as below
SELECT * FROM product_barcodes
SELECT * FROM barcodes_to_import
--Output Query
;with cte
as
(
select product_code,data,col_name
from product_barcodes
unpivot
(
data for col_name in (
barcode_1,barcode_2,barcode_3,barcode_4,barcode_5
,barcode_6,barcode_7,barcode_8,barcode_9,barcode_10
)
) upvt
)
,cte1
as
(
select *,ROW_NUMBER() OVER(PARTITION BY product_code ORDER BY col_name) as rn
from
(
select product_code, data,col_name from cte
union all
select product_code,barcode,'barcode_z' as col_name from barcodes_to_import
) t
)
select
product_Code
,SUM(1) as barcode_1
,SUM([2]) as barcode_2
,SUM([3]) as barcode_3
,SUM([4]) as barcode_4
,SUM([5]) as barcode_5
,SUM([6]) as barcode_6
,SUM([7]) as barcode_7
,SUM([8]) as barcode_8
,SUM([9]) as barcode_9
,SUM([10]) as barcode_10
from cte1
PIVOT
(
AVG(data) for rn in (1,[2],[3],[4],[5],[6],[7],[8],[9],[10])
) pvt
GROUP BY product_Code

Query to find the record with most matching columns, where the number of columns and names of columns is unknown?

I have two tables, X and Y, with identical schema but different records. Given a record from X, I need a query to find the closest matching record in Y that contains NULL values for non-matching columns. Identity columns should be excluded from the comparison. For example, if my record looked like this:
------------------------
id | col1 | col2 | col3
------------------------
0 |'abc' |'def' | 'ghi'
And table Y looked like this:
------------------------
id | col1 | col2 | col3
------------------------
6 |'abc' |'def' | 'zzz'
8 | NULL |'def' | NULL
Then the closest match would be record 8, since where the columns don't match, there are NULL values. 6 WOULD have been the closest match, but the 'zzz' disqualified it.
What's unique about this problem is that the schema of the tables is unknown besides the id column and the data types. There could be 4 columns, or there could be 7 columns. We just don't know - it's dynamic. All we know is that there is going to be an 'id' column and that the columns will be strings, either varchar or nvarchar.
What is the best query in this case to pick the closest matching record out of Y, given a record from X? I'm actually writing a function. The input is an integer (the id of a record in X) and the output is an integer (the id of a record in Y, or NULL). I'm an SQL novice, so a brief explanation of what's happening in your solution would help me greatly.
There could be 4 columns, or there could be 7 columns.... I'm actually writing a function.
This is an impossible task. Because functions are deterministic, so you cannot have a function that will work on an arbitrary table structure, using dynamic SQL. A stored procedure, sure, but not a function.
However, the below shows you a way using FOR XML and some decomposing of the XML to unpivot rows into column names and values which can then be compared. The technique used here and the queries can be incorporated into a stored procedure.
MS SQL Server 2008 Schema Setup:
-- this is the data table to match against
create table t1 (
id int,
col1 varchar(10),
col2 varchar(20),
col3 nvarchar(40));
insert t1
select 6, 'abc', 'def', 'zzz' union all
select 8, null , 'def', null;
-- this is the data with the row you want to match
create table t2 (
id int,
col1 varchar(10),
col2 varchar(20),
col3 nvarchar(40));
insert t2
select 0, 'abc', 'def', 'ghi';
GO
Query 1:
;with unpivoted1 as (
select n.n.value('local-name(.)','nvarchar(max)') colname,
n.n.value('.','nvarchar(max)') value
from (select (select * from t2 where id=0 for xml path(''), type)) x(xml)
cross apply x.xml.nodes('//*[local-name()!="id"]') n(n)
), unpivoted2 as (
select x.id,
n.n.value('local-name(.)','nvarchar(max)') colname,
n.n.value('.','nvarchar(max)') value
from (select id,(select * from t1 where id=outr.id for xml path(''), type) from t1 outr) x(id,xml)
cross apply x.xml.nodes('//*[local-name()!="id"]') n(n)
)
select TOP(1) WITH TIES
B.id,
sum(case when A.value=B.value then 1 else 0 end) matches
from unpivoted1 A
join unpivoted2 B on A.colname = B.colname
group by B.id
having max(case when A.value <> B.value then 1 end) is null
ORDER BY matches;
Results:
| ID | MATCHES |
----------------
| 8 | 1 |

Getting filtered results with subquery

I have a table with something like the following:
ID Name Color
------------
1 Bob Blue
2 John Yellow
1 Bob Green
3 Sara Red
3 Sara Green
What I would like to do is return a filtered list of results whereby the following data is returned:
ID Name Color
------------
1 Bob Blue
2 John Yellow
3 Sara Red
i.e. I would like to return 1 row per user. (I do not mind which row is returned for the particular user - I just need that the [ID] is unique.) I have something already that works but is really slow where I create a temp table adding all the ID's and then using a "OUTER APPLY" selecting the top 1 from the same table, i.e.
CREATE TABLE #tb
(
[ID] [int]
)
INSERT INTO #tb
select distinct [ID] from MyTable
select
T1.[ID],
T2.[Name],
T2.Color
from
#tb T1
OUTER APPLY
(
SELECT TOP 1 * FROM MyTable T2 WHERE T2.[ID] = T1.[ID]
) AS V2
DROP TABLE #tb
Can somebody suggest how I may improve it?
Thanks
Try:
WITH CTE AS
(
SELECT ROW_NUMBER() OVER (PARTITION BY ID ORDER BY ID) AS 'RowNo',
ID, Name, Color
FROM table
)
SELECT ID,Name,color
FROM CTE
WHERE RowNo = 1
or
select
*
from
(
Select
ID, Name, Color,
rank() over (partition by Id order by sum(Name) desc) as Rank
from
table
group by
ID
)
HRRanks
where
rank = 1
If you're using SQL Server 2005 or higher, you could use the Ranking functions and just grab the first one in the list.
http://msdn.microsoft.com/en-us/library/ms189798.aspx

Resources