Select Distinct on one column and eliminate nulls in Select Distinct? - sql-server

follow this question
I have...
ID SKU PRODUCT
=======================
1 FOO-23 Orange
2 BAR-23 Orange
3 FOO-24 Apple
4 FOO-25 Orange
5 FOO-25 null
6 FOO-25 null
expected result:
1 FOO-23 Orange
3 FOO-24 Apple
5 FOO-25 null
6 FOO-25 null
This query isn't getting me there. How can I SELECT DISTINCT on just one column and eliminate null in SELECT DISTINCT?
SELECT *
FROM (SELECT ID, SKU, Product,
ROW_NUMBER() OVER (PARTITION BY PRODUCT ORDER BY ID) AS RowNumber
FROM MyTable
WHERE SKU LIKE 'FOO%') AS a
WHERE a.RowNumber = 1

Perhaps one approach is using the WITH TIES in concert with a conditional PARTITION
Example
Declare #YourTable Table ([ID] int,[SKU] varchar(50),[PRODUCT] varchar(50))
Insert Into #YourTable Values
(1,'FOO-23','Orange')
,(2,'BAR-23','Orange')
,(3,'FOO-24','Apple')
,(4,'FOO-25','Orange')
,(5,'FOO-25',NULL)
,(6,'FOO-25',NULL)
Select top 1 with ties *
From #YourTable
Where SKU Like 'FOO%'
Order By Row_Number() over (Partition By IsNull(Product,NewID()) Order By ID)
Returns
ID SKU PRODUCT
6 FOO-25 NULL
5 FOO-25 NULL
3 FOO-24 Apple
1 FOO-23 Orange

Using John Cappelletti's sample data here is another approach. All you really needed was to add the OR predicate to your where clause.
Declare #YourTable Table ([ID] int,[SKU] varchar(50),[PRODUCT] varchar(50))
Insert Into #YourTable Values
(1,'FOO-23','Orange')
,(2,'BAR-23','Orange')
,(3,'FOO-24','Apple')
,(4,'FOO-25','Orange')
,(5,'FOO-25',NULL)
,(6,'FOO-25',NULL)
SELECT *
FROM
(
SELECT ID
, SKU
, Product
, ROW_NUMBER() OVER (PARTITION BY PRODUCT ORDER BY ID) AS RowNumber
FROM #YourTable
WHERE SKU LIKE 'FOO%'
) AS a
WHERE a.RowNumber = 1
OR a.PRODUCT IS NULL --This was the only part you were missing

I changed your row_number to dense rank:
Declare #YourTable Table ([ID] int,[SKU] varchar(50),[PRODUCT] varchar(50))
Insert Into #YourTable Values
(1,'FOO-23','Orange')
,(2,'BAR-23','Orange')
,(3,'FOO-24','Apple')
,(4,'FOO-25','Orange')
,(5,'FOO-25',NULL)
,(6,'FOO-25',NULL)
SELECT *
FROM (SELECT ID, SKU, Product,
Dense_RANK() OVER (PARTITION BY SKU ORDER BY Product) AS RowNumber
FROM #YourTable
WHERE left(SKU,3) = 'FOO') AS a
WHERE a.RowNumber = 1
Results:
ID SKU Product RowNumber
1 FOO-23 Orange 1
3 FOO-24 Apple 1
5 FOO-25 NULL 1
6 FOO-25 NULL 1

Related

Get sum up of every 2nd day data between a selected date range

I having table like below in Sql Server. I need to get data within in a date range, for example -: StartDate = '2020-09-01' and EndDate = '2020-09-11'. Its quite simple to get data between a date range but complicated part is that,i need to Sum up data in every 2nd day in the selected date range.
For Example -:
As in the above image, i need to Sum up of SKU in every 2nd day in single column. Could anyone help me out with the query for this result output.
CREATE TABLE #Temp
(
Sku Nvarchar(50),
OrderDate DateTime,
Quantity Int,
)
INSERT INTO #Temp(Sku,OrderDate,Quantity)Values('#xyz','2020-09-01 00:00:00.000',2)
INSERT INTO #Temp(Sku,OrderDate,Quantity)Values('#xyz','2020-09-02 00:00:00.000',1)
INSERT INTO #Temp(Sku,OrderDate,Quantity)Values('#xyz','2020-09-03 00:00:00.000',3)
INSERT INTO #Temp(Sku,OrderDate,Quantity)Values('#xyz','2020-09-04 00:00:00.000',4)
INSERT INTO #Temp(Sku,OrderDate,Quantity)Values('#xyz','2020-09-05 00:00:00.000',5)
INSERT INTO #Temp(Sku,OrderDate,Quantity)Values('#xyz','2020-09-06 00:00:00.000',6)
INSERT INTO #Temp(Sku,OrderDate,Quantity)Values('#xyz','2020-09-07 00:00:00.000',2)
INSERT INTO #Temp(Sku,OrderDate,Quantity)Values('#xyz','2020-09-08 00:00:00.000',1)
INSERT INTO #Temp(Sku,OrderDate,Quantity)Values('#xyz','2020-09-09 00:00:00.000',3)
INSERT INTO #Temp(Sku,OrderDate,Quantity)Values('#xyz','2020-09-10 00:00:00.000',1)
INSERT INTO #Temp(Sku,OrderDate,Quantity)Values('#xyz','2020-09-11 00:00:00.000',10)
INSERT INTO #Temp(Sku,OrderDate,Quantity)Values('#abc','2020-09-01 00:00:00.000',1)
INSERT INTO #Temp(Sku,OrderDate,Quantity)Values('#abc','2020-09-02 00:00:00.000',10)
INSERT INTO #Temp(Sku,OrderDate,Quantity)Values('#abc','2020-09-03 00:00:00.000',10)
select * from #Temp
Use row_number() window function to generate a sequence number per Sku. Do a GROUP BY (rn - 1) / 2. HAVING COUNT(*) = 2 is to only consider those with 2 rows
; with
cte as
(
select *, rn = row_number() over (partition by Sku order by OrderDate)
from #Temp
)
select Sku, sum(Quantity)
from cte
group by Sku, (rn - 1) / 2
having count(*) = 2
order by Sku , (rn - 1) / 2
Use STRING_AGG if you want the result in CSV.
With ROW_NUMBER() and LAG() window functions:
select Sku, Quantity
from (
select Sku,
row_number() over (partition by Sku order by OrderDate) rn,
Quantity + lag(Quantity) over (partition by Sku order by OrderDate) Quantity
from #Temp
where OrderDate between '20200901' and '20200911'
) t
where rn % 2 = 0
order by Sku, rn;
See the demo.
Results:
> Sku | Quantity
> :--- | -------:
> #abc | 11
> #xyz | 3
> #xyz | 7
> #xyz | 11
> #xyz | 3
> #xyz | 4
Something like this
;with
string_cte(Sku, OrderDate, Quantity, rn_grp) as(
select *, (row_number() over (partition by Sku order by OrderDate)+1)/2
from #Temp),
sum_cte(Sku, rn_grp, sum_quantity) as (
select Sku, rn_grp, sum(quantity)
from string_cte
group by Sku, rn_grp
having count(*)>1)
select
Sku, string_agg(sum_quantity, ',') within group (order by rn_grp) SecondDaySumUp
from sum_cte
group by Sku
order by 1 desc;
Output
Sku SecondDaySumUp
#xyz 3,7,11,3,4
#abc 11

Select ID for corresponding max date using GROUP BY

My table structure as below
Category Sex Last Modified Date Id
7 2 2015-01-16 87603
7 1 2014-11-27 87729
7 2 2018-09-06 87135
7 1 2017-12-27 87568
My sql query as below
SELECT
MAX(Id) AS Id
FROM
Table
GROUP BY
Category, Sex
Result as below
87603
87729
But I would like to get Id as Max Last Modified Date. Correct result should be as below
87135
87568
You can use ROW_NUMBER() to find most recent row per group:
SELECT Id, LastModifiedDate
FROM (
SELECT Id, LastModifiedDate, ROW_NUMBER() OVER (PARTITION BY Category, Sex ORDER BY LastModifiedDate DESC) AS rnk
FROM t
) AS cte
WHERE rnk = 1
Use RANK() if you're interested in finding all rows with ties for LastModifiedDate.
You can also get it as
SELECT T.*
FROM
(
SELECT Sex,
MAX([Last Modified Date]) [Last Modified Date],
Category
FROM T
GROUP BY Sex,
Category
) TT INNER JOIN T ON T.[Last Modified Date] = TT.[Last Modified Date]
WHERE T.Sex = TT.Sex
AND
T.Category = TT.Category;
Returns:
+----------+-----+---------------------+-------+
| Category | Sex | Last Modified Date | Id |
+----------+-----+---------------------+-------+
| 7 | 2 | 06/09/2018 00:00:00 | 87135 |
| 7 | 1 | 27/12/2017 00:00:00 | 87568 |
+----------+-----+---------------------+-------+
We can get the solution by joining the same table with its grouped set:
SELECT MIN(T.Id)
FROM Table T
INNER JOIN (SELECT Category,
Sex,
MAX(LastModifiedDate) AS LastModifiedDate
FROM Table
GROUP BY Category, Sex) GT
ON GT.Category = T.Category
AND GT.Sex = T.Sex
AND GT.LastModifiedDate = T.LastModifiedDate
GROUP BY T.Category, T.Sex
Other option is to use correlated subquery :
select t.*
from table t
where t.LastModifiedDate = (select max(t1.LastModifiedDate)
from table t1
where t1.Category = t.Category and t1.Sex = t.Sex
);
Here are a few different approaches... (in no particular order)
IF OBJECT_ID('tempdb..#TestData', 'U') IS NOT NULL
DROP TABLE #TestData;
GO
CREATE TABLE #TestData (
Category TINYINT NOT NULL,
Sex TINYINT NOT NULL,
LastModifiedDate DATE NOT NULL,
Id INT NOT NULL
);
GO
INSERT #TestData(Category, Sex, LastModifiedDate, Id) VALUES
(7, 2, '2015-01-16', 87603),
(7, 1, '2014-11-27', 87729),
(7, 2, '2018-09-06', 87135),
(7, 1, '2017-12-27', 87568);
GO
/* nonclustered index to support the query. */
CREATE UNIQUE NONCLUSTERED INDEX ix_TestData_Category_Sex_LastModifiedDate
ON #TestData (Category ASC, Sex ASC, LastModifiedDate DESC)
INCLUDE (Id);
GO
--====================================================
-- option 1: TOP(n) WITH TIES...
SELECT TOP (1) WITH TIES
td.Id
FROM
#TestData td
ORDER BY
ROW_NUMBER() OVER (PARTITION BY td.Category, td.Sex ORDER BY td.LastModifiedDate DESC);
GO
-----------------------------------------------------
-- option 2: Filter on ROW_NUMBER()...
WITH
cte_AddRN AS (
SELECT
td.Id,
rn = ROW_NUMBER() OVER (PARTITION BY td.Category, td.Sex ORDER BY td.LastModifiedDate DESC)
FROM
#TestData td
)
SELECT
arn.Id
FROM
cte_AddRN arn
WHERE
arn.rn = 1;
GO
-----------------------------------------------------
-- option 3: binary concatination...
SELECT
Id = CONVERT(INT, SUBSTRING(MAX(bv.bin_val), 4, 4))
FROM
#TestData td
CROSS APPLY ( VALUES (CONVERT(BINARY(3), td.LastModifiedDate) + CONVERT(BINARY(4), td.Id)) ) bv (bin_val)
GROUP BY
td.Category,
td.Sex;
GO
--====================================================

SQL combine multiple rows into 1 row

I am trying to take rows with the same ID and return them on the same row. My data looks like the follow:
ID Fruit
1 Banana
1 Apple
1 Grapefruit
2 Cherry
2 Blueberry
3 Lime
3 Pear
And I would like it to look like this:
ID Fruit Fruit1 Fruit2
1 Banana Apple Grapefruit
2 Cherry Blueberry NULL
I have tried this as a query, but I don't seem to be having much luck:
SELECT a.[ID],a.[Fruit],b.[Fruit]
FROM [test].[dbo].[Fruit] a
JOIN [test].[dbo].[Fruit] b
ON a.ID = b.ID
WHERE a.FRUIT <> b.FRUIT
Can anybody help with this?
Thanks!
If the fruit count is not fixed, you can you use dynamic script:
IF OBJECT_ID('tempdb..#t') IS NOT NULL DROP TABLE #t
CREATE TABLE #t(ID INT,Fruit VARCHAR(100))
INSERT INTO #t(ID,Fruit)
SELECT 1,'Banana' UNION
SELECT 1,'Apple' UNION
SELECT 1,'Grapefruit' UNION
SELECT 2,'Cherry' UNION
SELECT 2,'Blueberry' UNION
SELECT 3,'Lime' UNION
SELECT 3,'Pear'
DECLARE #sql NVARCHAR(max),#cols VARCHAR(max)
SELECT #cols=ISNULL(#cols+',','')+t.col FROM (
SELECT *,'Fruit'+LTRIM(ROW_NUMBER()OVER(PARTITION BY ID ORDER BY(SELECT 1) )) AS col FROM #t AS t
) AS t GROUP BY t.col
SET #sql='
SELECT * FROM (
SELECT *,''Fruit''+LTRIM(ROW_NUMBER()OVER(PARTITION BY ID ORDER BY(SELECT 1) )) AS col FROM #t AS t
) AS t PIVOT(MAX(Fruit) FOR col in ('+#cols+')) p
'
PRINT #sql
EXEC(#sql)
IF OBJECT_ID('tempdb..#t') IS NOT NULL DROP TABLE #t
ID Fruit1 Fruit2 Fruit3
----------- ---------- ---------- ----------
1 Apple Banana Grapefruit
2 Blueberry Cherry NULL
3 Lime Pear NULL
You can use combination of a windowing function like row_number and then some conditional aggregation using a CASE expression with MAX() to get the result that you want:
select
Id,
Fruit = max(case when rn = 1 then Fruit end),
Fruit1 = max(case when rn = 2 then Fruit end),
Fruit2 = max(case when rn = 3 then Fruit end)
from
(
select
Id,
Fruit,
rn = row_number() over(partition by Id order by Id)
from [test].[dbo].[Fruit]
) d
group by Id;
See a Demo. The row_number() function creates a unique number for each id, then using this number along with CASE and MAX you will convert your rows of data into columns.
you can use pivot to do this as below:
Select Id, [0] as Fruit, [1] as [Fruit1], [2] as [Fruit2] from (
Select *, RowN = Row_Number() over (partition by Id order by Fruit) - 1 from yourtable )
pivot ( max(Fruit) for RowN in ([0], [1],[2]) ) p

I need a more reliable sort for CTE hierarchy query

Example table:
CREATE TABLE Fruit (
ID int identity(1,1) NOT NULL,
ParentID int NULL,
Name varchar(255)
);
I want to sort parent and child records from the same table in alphabetical order (more than one level deep):
Apples
--Green
----Just Sour
----Really Sour
--Red
----Big
----Small
Bananas
--etc.
I attempted this:
;WITH CTE(ID, ParentID, Name, Sort) AS
(
SELECT
ID
,ParentID
,Name
,cast('\' + Name as nvarchar(255)) AS Sort
FROM Fruit
WHERE ParentID IS NULL
UNION ALL
SELECT
a.ID
,a.ParentID
,a.Name
,cast(b.Sort + '\' + a.Name as nvarchar(255)) AS Sort
FROM Fruit a
INNER JOIN CTE b ON a.ParentID = b.ID
)
SELECT * FROM CTE Order by Sort
This produces results for the sort like:
\Apples
\Apples\Green
\Apples\Green\Just Sour
\etc.
Just when I thought things were good, it isn't reliable. For example, if an item has more than one word. Like:
\Apples
\Apples A <-- culprit
\Apples\Green
If I can expand my question while I'm at it, I'd like to show actual hyphens or something in the results:
Parent
- Child
--Grandchild
The cruddy way I quickly did this was by adding a prefix column in the table with the value of - for all records. Then I could do this:
;WITH CTE(ID, ParentID, Name, Sort, Prefix) AS
(
SELECT
ID
,ParentID
,Name
,cast('\' + Name as nvarchar(255)) AS Sort
,Prefix
FROM Fruit
WHERE ParentID IS NULL
UNION ALL
SELECT
a.ID
,a.ParentID
,a.Name
,cast(b.Sort + '\' + a.Name as nvarchar(255)) AS Sort
,cast(b.Prefix + a.Prefix as nvarchar(10)) AS Prefix
FROM Fruit a
INNER JOIN CTE b ON a.ParentID = b.ID
)
SELECT * FROM CTE Order by Sort
But that seems incorrect or not optimal.
These hierarchical queries still give me a headache, so perhaps I'm just not seeing the obvious.
I tend to use row_number() ordered by Name in this case
Example
Declare #YourTable table (id int,ParentId int,Name varchar(50))
Insert into #YourTable values
( 1, NULL,'Apples')
,( 2, 1 ,'Green')
,( 3, 2 ,'Just Sour')
,( 4, 2 ,'Really Sour')
,( 5, 1 ,'Red')
,( 6, 5 ,'Big')
,( 7, 5 ,'Small')
,( 8, NULL,'Bananas')
Declare #Top int = null --<< Sets top of Hier Try 5
Declare #Nest varchar(25) = '|-----' --<< Optional: Added for readability
;with cteP as (
Select Seq = cast(1000+Row_Number() over (Order by Name) as varchar(500))
,ID
,ParentId
,Lvl=1
,Name
From #YourTable
Where IsNull(#Top,-1) = case when #Top is null then isnull(ParentId ,-1) else ID end
Union All
Select Seq = cast(concat(p.Seq,'.',1000+Row_Number() over (Order by r.Name)) as varchar(500))
,r.ID
,r.ParentId
,p.Lvl+1
,r.Name
From #YourTable r
Join cteP p on r.ParentId = p.ID)
Select A.ID
,A.ParentId
,A.Lvl
,Name = Replicate(#Nest,A.Lvl-1) + A.Name
,Seq --<< Can be removed
From cteP A
Order By Seq
Returns
ID ParentId Lvl Name Seq
1 NULL 1 Apples 1001
2 1 2 |-----Green 1001.1001
3 2 3 |-----|-----Just Sour 1001.1001.1001
4 2 3 |-----|-----Really Sour 1001.1001.1002
5 1 2 |-----Red 1001.1002
6 5 3 |-----|-----Big 1001.1002.1001
7 5 3 |-----|-----Small 1001.1002.1002
8 NULL 1 Bananas 1002
I'm going to guess you want this result
\Apples
\Apples\Green
\Apples A
Maybe try something like this:
SELECT *
FROM CTE
ORDER BY replace(Sort, ' ', '~')
'~' is ascii 126, You can also check using excel sorting.

Select top 2 distinct for each id and date

I have a table like this :
Table1:
[Id] [TDate] [Score]
1 1.1.00 50
1 1.1.00 60
2 1.1.01 50
2 1.1.01 70
2 1.3.01 40
3 1.1.00 80
3 1.1.00 30
3 1.2.00 40
My desired output should be like this:
[ID] [TDate] [Score]
1 1.1.00 60
2 1.1.01 70
2 1.3.01 40
3 1.1.00 80
3 1.2.00 40
So fare, I have written this:
SELECT DISTINCT TOP 2 Id, TDate, Score
FROM
( SELECT Id, TDate, Score, ROW_NUMBER() over(partition by TDate order by Score) Od
FROM Table1
) A
WHERE A.Od = 1
ORDER BY Score
But it gives me :
[ID] [TDate] [Score]
2 1.1.01 70
3 1.1.00 80
of course I can do this:
"select top 2 ...where ID = 1"
and then:
union
`"Select top 2 ... where ID = 2"`
etc..
but I have a 100,000 of this..
Any way to generalize it to any Id?
Thank you.
WITH TOPTWO AS (
SELECT Id, TDate, Score, ROW_NUMBER()
over (
PARTITION BY TDate
order by SCORE
) AS RowNo
FROM [table_name]
)
SELECT * FROM TOPTWO WHERE RowNo <= 2
Your output doesn't make sense. Let me assume you want two rows per id. Then the query would look like:
SELECT TOP 2 Id, TDate, Score
FROM (SELECT Id, TDate, Score,
ROW_NUMBER() over (partition by id order by Score DESC) as seqnum
FROM Table1
) t
WHERE seqnum <= 2
ORDER BY Score;
Notes:
This assumes that you want two rows per id. Hence, id is in the PARTITION BY.
The WHERE now selects two rows per group in the PARTITION BY.
There is no need for SELECT DISTINCT in the outer query -- at least for this question.
Try this : Make partition by ID and TDate and sort by score in descending order
ROW_NUMBER() over(partition by ID,TDate order by Score DESC) Od
Complete script
WITH CTE AS(
SELECT *,
ROW_NUMBER() over(partition by ID,TDate order by Score DESC) RN
FROM TableName
)
SELECT *
FROM CTE
WHERE RN = 1
Unless I am missing something this can be done with a simple group by
First I prepare a temp table for testing :
declare #table table (ID int, TDate varchar(10), Score int)
insert into #Table values(1, '1.1.00', 50)
insert into #Table values(1, '1.1.00', 60)
insert into #Table values(2, '1.1.01', 50)
insert into #Table values(2, '1.1.01', 70)
insert into #Table values(2, '1.3.01', 40)
insert into #Table values(3, '1.1.00', 80)
insert into #Table values(3, '1.1.00', 30)
insert into #Table values(3, '1.2.00', 40)
Now lets do a select on this table
select ID, TDate, max(Score) as Score
from #table
group by ID, TDate
order by ID, TDate
The result is this :
ID TDate Score
1 1.1.00 60
2 1.1.01 70
2 1.3.01 40
3 1.1.00 80
3 1.2.00 40
So all you need to do is change #table to your table name and you are done

Resources