SQL Server - Select top 2 rows - sql-server

I'm attempting to write a query that will return
The most recent AccountDate with a record of 0 per locationID
Then the second most recent AccountDate per locationID. The record can be either 1 or 0.
If there are two AccountDates with the same date then return the most recent AccountDate based on DateAccountLoaded
How ever my solution doesn't look very elegant. Has anyone got a better way of achieving this.
Please see below my solution
CREATE TABLE [dbo].[TopTwoKeyed](
ID INT IDENTITY(1,1) PRIMARY KEY(ID),
[LocationID] [int] NULL,
[AccountDate] [date] NULL,
[Record] [tinyint] NULL,
[DateAccountLoaded] [date] NULL
)
INSERT INTO [dbo].[TopTwoKeyed] (
[LocationID],
AccountDate,
Record,
DateAccountLoaded
)
VALUES(1,'2009-10-31',0,'2011-03-23'),
(1,'2008-10-31',1,'2011-03-23'),
(1,'2008-10-31',0,'2010-03-22'),
(1,'2008-10-31',1,'2009-03-23'),
(1,'2011-10-31',1,'2010-03-22'),
(1,'2009-10-31',0,'2010-03-23'),
(2,'2011-10-31',0,'2010-03-23'),
(2,'2010-10-31',0,'2010-03-23'),
(2,'2010-10-31',1,'2010-03-23'),
(2,'2010-10-31',1,'2009-03-23'),
(3,'2010-10-31',0,'2010-03-23'),
(3,'2009-10-31',0,'2010-03-23'),
(3,'2008-10-31',1,'2010-03-23')
-- Get the most recent Account Date per locationID which has a record type of 0
SELECT f.LocationID
,f.AccountDate
,f.DateAccountLoaded
FROM (
SELECT ROW_NUMBER() OVER (PARTITION BY LocationID ORDER BY AccountDate DESC,DateAccountLoaded DESC) AS RowNumber
,LocationID AS LocationID
,AccountDate AS AccountDate
,DateAccountLoaded AS DateAccountLoaded
FROM [dbo].[TopTwoKeyed]
WHERE Record = 0
) f
WHERE f.RowNumber = 1
UNION ALL
SELECT ff.LocationID
,ff.AccountDate
,ff.DateAccountLoaded
FROM (
-- Get the SECOND most recent AccountDate. Can be either Record 0 or 1.
SELECT ROW_NUMBER() OVER (PARTITION BY LocationID ORDER BY AccountDate DESC,DateAccountLoaded DESC) AS RowNumber
,LocationID AS LocationID
,AccountDate AS AccountDate
,DateAccountLoaded 'DateAccountLoaded'
FROM [dbo].[TopTwoKeyed] tt
WHERE EXISTS
(
-- Same query as top of UNION. Get the most recent Account Date per locationID which has a record type of 0
SELECT 1
FROM (
SELECT ROW_NUMBER() OVER (PARTITION BY LocationID ORDER BY AccountDate DESC,DateAccountLoaded DESC) AS RowNumber
,LocationID AS LocationID
,AccountDate AS AccountDate
FROM [dbo].[TopTwoKeyed]
WHERE Record = 0
) f
WHERE f.RowNumber = 1
AND tt.LocationID = f.LocationID
AND tt.AccountDate < f.AccountDate
)
) ff
WHERE ff.RowNumber = 1
-- DROP TABLE [dbo].[TopTwoKeyed]

You could use a row_number subquery to find the most recent account date. Then you can outer apply to search for the next most recent account date:
select MostRecent.LocationID
, MostRecent.AccountDate
, SecondRecent.AccountDate
from (
select row_number() over (partition by LocationID order by
AccountDate desc, DateAccountLoaded desc) as rn
, *
from TopTwoKeyed
where Record = 0
) MostRecent
outer apply
(
select top 1 *
from TopTwoKeyed
where Record in (0,1)
and LocationID = MostRecent.LocationID
and AccountDate < MostRecent.AccountDate
order by
AccountDate desc
, DateAccountLoaded desc
) SecondRecent
where MostRecent.rn = 1
EDIT: To place the rows below eachother, you probably have to use a union. A single row_number can't work because the second row has different criterium for the Record column.
; with Rec0 as
(
select ROW_NUMBER() over (partition by LocationID
order by AccountDate desc, DateAccountLoaded desc) as rn
, *
from TopTwoKeyed
where Record = 0
)
, Rec01 as
(
select ROW_NUMBER() over (partition by LocationID
order by AccountDate desc, DateAccountLoaded desc) as rn
, *
from TopTwoKeyed t1
where Record in (0,1)
and not exists
(
select *
from Rec0 t2
where t2.rn = 1
and t1.LocationID = t2.LocationID
and t2.AccountDate < t1.AccountDate
)
)
select *
from Rec0
where rn = 1
union all
select *
from Rec01
where rn = 1

Related

Select ID for corresponding max date using GROUP BY

My table structure as below
Category Sex Last Modified Date Id
7 2 2015-01-16 87603
7 1 2014-11-27 87729
7 2 2018-09-06 87135
7 1 2017-12-27 87568
My sql query as below
SELECT
MAX(Id) AS Id
FROM
Table
GROUP BY
Category, Sex
Result as below
87603
87729
But I would like to get Id as Max Last Modified Date. Correct result should be as below
87135
87568
You can use ROW_NUMBER() to find most recent row per group:
SELECT Id, LastModifiedDate
FROM (
SELECT Id, LastModifiedDate, ROW_NUMBER() OVER (PARTITION BY Category, Sex ORDER BY LastModifiedDate DESC) AS rnk
FROM t
) AS cte
WHERE rnk = 1
Use RANK() if you're interested in finding all rows with ties for LastModifiedDate.
You can also get it as
SELECT T.*
FROM
(
SELECT Sex,
MAX([Last Modified Date]) [Last Modified Date],
Category
FROM T
GROUP BY Sex,
Category
) TT INNER JOIN T ON T.[Last Modified Date] = TT.[Last Modified Date]
WHERE T.Sex = TT.Sex
AND
T.Category = TT.Category;
Returns:
+----------+-----+---------------------+-------+
| Category | Sex | Last Modified Date | Id |
+----------+-----+---------------------+-------+
| 7 | 2 | 06/09/2018 00:00:00 | 87135 |
| 7 | 1 | 27/12/2017 00:00:00 | 87568 |
+----------+-----+---------------------+-------+
We can get the solution by joining the same table with its grouped set:
SELECT MIN(T.Id)
FROM Table T
INNER JOIN (SELECT Category,
Sex,
MAX(LastModifiedDate) AS LastModifiedDate
FROM Table
GROUP BY Category, Sex) GT
ON GT.Category = T.Category
AND GT.Sex = T.Sex
AND GT.LastModifiedDate = T.LastModifiedDate
GROUP BY T.Category, T.Sex
Other option is to use correlated subquery :
select t.*
from table t
where t.LastModifiedDate = (select max(t1.LastModifiedDate)
from table t1
where t1.Category = t.Category and t1.Sex = t.Sex
);
Here are a few different approaches... (in no particular order)
IF OBJECT_ID('tempdb..#TestData', 'U') IS NOT NULL
DROP TABLE #TestData;
GO
CREATE TABLE #TestData (
Category TINYINT NOT NULL,
Sex TINYINT NOT NULL,
LastModifiedDate DATE NOT NULL,
Id INT NOT NULL
);
GO
INSERT #TestData(Category, Sex, LastModifiedDate, Id) VALUES
(7, 2, '2015-01-16', 87603),
(7, 1, '2014-11-27', 87729),
(7, 2, '2018-09-06', 87135),
(7, 1, '2017-12-27', 87568);
GO
/* nonclustered index to support the query. */
CREATE UNIQUE NONCLUSTERED INDEX ix_TestData_Category_Sex_LastModifiedDate
ON #TestData (Category ASC, Sex ASC, LastModifiedDate DESC)
INCLUDE (Id);
GO
--====================================================
-- option 1: TOP(n) WITH TIES...
SELECT TOP (1) WITH TIES
td.Id
FROM
#TestData td
ORDER BY
ROW_NUMBER() OVER (PARTITION BY td.Category, td.Sex ORDER BY td.LastModifiedDate DESC);
GO
-----------------------------------------------------
-- option 2: Filter on ROW_NUMBER()...
WITH
cte_AddRN AS (
SELECT
td.Id,
rn = ROW_NUMBER() OVER (PARTITION BY td.Category, td.Sex ORDER BY td.LastModifiedDate DESC)
FROM
#TestData td
)
SELECT
arn.Id
FROM
cte_AddRN arn
WHERE
arn.rn = 1;
GO
-----------------------------------------------------
-- option 3: binary concatination...
SELECT
Id = CONVERT(INT, SUBSTRING(MAX(bv.bin_val), 4, 4))
FROM
#TestData td
CROSS APPLY ( VALUES (CONVERT(BINARY(3), td.LastModifiedDate) + CONVERT(BINARY(4), td.Id)) ) bv (bin_val)
GROUP BY
td.Category,
td.Sex;
GO
--====================================================

Update null values by value in same column

I have a table in MS SQL Server, where are some null values in column "value"
Group ID Value
A 1 10
A 2
A 3
A 4 40
B 1
B 2 20
B 3 30
B 4
I want to update null values by not null in the same group with with the first higher ID, or if there is not any higher in same group, first lower. So the result should look like this.
Group ID Value
A 1 10
A 2 40
A 3 40
A 4 40
B 1 20
B 2 20
B 3 30
B 4 30
Thanks!
You can use windowed version of SUM function in order to determine islands of NULL valued records along with the record having the higher ID in the same group:
SELECT [Group], ID, Value,
SUM(CASE WHEN Value IS NULL THEN 0 ELSE 1 END) OVER
(PARTITION BY [Group] ORDER BY ID DESC) AS grp
FROM mytable
Output:
Group ID Value grp
-----------------------
A 4 40 1
A 3 30 2
A 2 NULL 2
A 1 NULL 2
B 4 40 1
B 3 NULL 1
B 2 20 2
B 1 10 3
You can now wrap the above query in a CTE and use another CTE to do the update:
;WITH CTE AS (
SELECT [Group], ID, Value,
SUM(CASE WHEN Value IS NULL THEN 0 ELSE 1 END) OVER
(PARTITION BY [Group] ORDER BY ID DESC) AS grp
FROM mytable
), ToUpdate AS (
SELECT [Group], ID, Value,
MAX(Value) OVER (PARTITION BY [Group], grp) AS group_value
FROM CTE
)
UPDATE ToUpdate
SET Value = group_value
WHERE Value IS NULL
Demo here
Edit:
The above query doesn't handle the edge case where the very last record within a Group slice is NULL. To handle this case as well you can use the following query:
;WITH CTE AS (
SELECT [Group], ID, Value,
SUM(CASE WHEN Value IS NULL THEN 0 ELSE 1 END) OVER
(PARTITION BY [Group] ORDER BY ID DESC) AS grp,
SUM(CASE WHEN Value IS NULL THEN 0 ELSE 1 END) OVER
(PARTITION BY [Group] ORDER BY ID) AS grp2
FROM mytable
), ToUpdate AS (
SELECT [Group], ID, Value,
MAX(Value) OVER (PARTITION BY [Group], grp) AS group_value,
MAX(Value) OVER (PARTITION BY [Group], grp2) AS group_value2
FROM CTE
)
UPDATE ToUpdate
SET Value = COALESCE(group_value, group_value2)
WHERE Value IS NULL
Demo here
Please try this-
DATA GENERATION
DECLARE #T TABLE
(
GroupCd CHAR(1),
Id INT,
Value INT
)
INSERT INTO #T
VALUES('A',1,10),
('A',2,NULL),
('A',3,NULL),
('A',4,40),
('B',1,NULL),
('B',2,20),
('B',3,30),
('B',4,NULL)
SOLUTION
UPDATE a
SET a.Value = b.Value
FROM #T a
INNER JOIN
(
SELECT a.GroupCd,a.Id,Coalesce(a.Value,z.Value,z1.Value) Value
FROM #T a
OUTER APPLY
(
SELECT TOP 1 Value
FROM #T b
WHERE a.GroupCd = b.GroupCd
AND b.Value IS NOT NULL AND a.Id < b.Id
ORDER BY Id
)z
OUTER APPLY
(
SELECT TOP 1 Value
FROM #T b
WHERE a.GroupCd = b.GroupCd
AND b.Value IS NOT NULL AND a.Id > b.Id
ORDER BY Id DESC
)z1
)b ON a.GroupCd = b.GroupCd AND a.Id = b.Id
SELECT * FROM #T
OUTPUT
GroupCd Id Value
------- ----------- -----------
A 1 10
A 2 40
A 3 40
A 4 40
B 1 20
B 2 20
B 3 30
B 4 30
(8 rows affected)
You Can try This simple Method
DECLARE #T TABLE
(
GroupCd CHAR(1),
Id INT,
Value INT
)
INSERT INTO #T
VALUES('A',1,NULL),
('A',2,NULL),
('A',3,30),
('A',4,40),
('B',1,10),
('B',2,20),
('B',3,NULL),
('B',4,40)
SELECT
*,
NewVal = COALESCE(Value,(SELECT TOP 1 Value FROM #T WHERE GroupCd = T.GroupCd AND Id > T.Id AND Value IS NOT NULL ORDER BY Id ASC))
FROM #T T
My Result
update MY_TABLE set [value] = [newValue] from (
select [Group] [newGroup],
[Value] [newValue]
from (
select [Group], [Value],
row_number() over (partition by [group] order by [Id] desc) [rn]
from MY_TABLE
where [Value] is not null
) [a] where [rn] = 1
) where [Group] = [newGroup] and [Value] is null

SQL LAG Days since last order

Hi I am trying to create a windowed query in SQL that shows me the days since last order for each customer.
It now shows me the days in between each order.
What do I need to change in my query to have it only show the days since the last and the previous order per customer? Now it shows it for every order the customer made.
Query:
SELECT klantnr,besteldatum,
DATEDIFF(DAY,LAG(besteldatum) OVER(PARTITION BY klantnr ORDER BY besteldatum),besteldatum) AS DaysSinceLastOrder
FROM bestelling
GROUP BY klantnr,besteldatum;
You can use row_number() to order the rows by besteldatum for each klantnr, and return the latest two using a derived table (subquery) or common table expression.
derived table version:
select klantnr, besteldatum, DaysSinceLastOrder
from (
select klantnr, besteldatum
, DaysSinceLastOrder = datediff(day,lag(besteldatum) over (partition by klantnr order by besteldatum),besteldatum)
, rn = row_number() over (partition by klantnr order by besteldatum desc)
from bestelling
group by klantnr, besteldatum
) t
where rn = 1
common table expression version:
;with cte as (
select klantnr, besteldatum
, DaysSinceLastOrder = datediff(day,lag(besteldatum) over (partition by klantnr order by besteldatum),besteldatum)
, rn = row_number() over (partition by klantnr order by besteldatum desc)
from bestelling
group by klantnr, besteldatum
)
select klantnr, besteldatum, DaysSinceLastOrder
from cte
where rn = 1
If you want one row per customer, rn = 1 is the proper filter. If you want n number of latest rows, use rn < n+1.

SQL Server 2008 R2 GROUP BY or OVER

I have this table:
ID COLOR TYPE DATE
-------------------------------
1 blue A 2012.02.05
2 white V 2010.10.23
3 white V 2014.03.05
4 black S 2013.02.14
I'd like to select only the ID, but in case of 2nd and 3rd rows I want to select the 3rd row because of its latest DATE value.
I have tried this query but it gives back all the two rows:
SELECT
ID, MAX(DATE) OVER(PARTITION BY COLOR, TYPE)
FROM
TABLE
WHERE
...
How can I select just one column value while I group the rows by other columns, please?
;WITH CTE AS
(
SELECT * , ROW_NUMBER() OVER (PARTITION BY COLOR,[TYPE] ORDER BY [DATE] DESC) rn
FROM TableName
)
SELECT ID
,COLOR
,[TYPE]
,[DATE]
FROM CTE
WHERE rn = 1
OR
SELECT ID
,COLOR
,[TYPE]
,[DATE]
FROM
(
SELECT * , ROW_NUMBER() OVER (PARTITION BY COLOR,[TYPE] ORDER BY [DATE] DESC) rn
FROM TableName
) A
WHERE rn = 1

T-SQL - aggregate in all rows using aggregate from correlated sub-query

store item datekey onhand salesunits
--------------------------------------------
001 A 50 65 2
001 A 51 8 4
001 A 52 0 8
--------------------------------------------
What I need to accomplish: to get the latest onhand greater than zero minus the total units sold, by store and item. So in the example above it would be 8-14=-6.
I am using a correlated sub-query to determine the latest datekey and then joining back to the main query. But obviously by doing so I lose the data related to the other rows necessary to sum the salesunits:
This is what I have and it's wrong:
select s1.Store, s1.Item, s1.OnHand, sum(salesunit)
from sales s1
join (select top 1 store,item, max(DateKey) as datekey
from sales
where isnull(onhand,0) > 0
and DateKey in (50,51,52)
group by store, item) s2 on s2.store=s1.store and s2.item=s1.item and s2.datekey=s1.datekey
group by s1.Store, s1.Item, s1.OnHand
Thanks for your help!
;
WITH totals AS (
SELECT
*,
totalsalesunits = SUM(salesunits) OVER (PARTITION BY store, item),
rnk = ROW_NUMBER() OVER (PARTITION BY store, item
ORDER BY SIGN(onhand) DESC, datekey DESC)
FROM sales
)
SELECT
store,
item,
onhand,
totalsalesunits
FROM totals
WHERE rnk = 1
I would do it something like this:
First some test data:
DECLARE #tbl TABLE
(
store VARCHAR(4),
item VARCHAR(2),
datekey INT,
onhand INT,
salesUnits INT
)
INSERT INTO #tbl
VALUES
('001','A',50,65,2),
('001','A',51,8,4),
('001','A',52,0,8)
The the query like this:
;WITH cteTotalSales AS
(
SELECT
SUM(tbl.salesUnits) OVER(PARTITION BY 1) AS TotalSalesUnit,
tbl.store,
tbl.item,
ISNULL(tbl.onhand,0) AS onhand,
tbl.salesUnits,
tbl.datekey
FROM
#tbl AS tbl
), cteLatest AS
(
SELECT
RANK() OVER
(
PARTITION BY cteTotalSales.store,cteTotalSales.item
ORDER BY cteTotalSales.datekey DESC
) AS iRank,
cteTotalSales.store,
cteTotalSales.item,
cteTotalSales.onhand,
cteTotalSales.salesUnits,
cteTotalSales.datekey
FROM
cteTotalSales
WHERE
(cteTotalSales.onhand-cteTotalSales.TotalSalesUnit)>0
)
SELECT
*
FROM
cteLatest
WHERE
iRank=1
;with a as
(
select rn = row_number() over (partition by store, item order by case when onhand = 0 then -1 else datekey end desc),
Store, Item, OnHand, salesunit
from sales
)
select store, item, sum(case when rn = 1 then onhand end)-sum(salesunit) OnHand, sum(salesunit) sumsalesunit from a
group by store, item

Resources