Getting the last row from a ROW_NUMBER using SQL - sql-server

I am thinking there is a better way to grab the last row from a row_number instead of doing multiple nesting using T-SQL.
I need the total number of orders and the last ordered date. Say I have the following:
DECLARE #T TABLE (PERSON_ID INT, ORDER_DATE DATE)
INSERT INTO #T VALUES(1, '2016/01/01')
INSERT INTO #T VALUES(1, '2016/01/02')
INSERT INTO #T VALUES(1, '2016/01/03')
INSERT INTO #T VALUES(2, '2016/01/01')
INSERT INTO #T VALUES(2, '2016/01/02')
INSERT INTO #T VALUES(3, '2016/01/01')
INSERT INTO #T VALUES(3, '2016/01/02')
INSERT INTO #T VALUES(3, '2016/01/03')
INSERT INTO #T VALUES(3, '2016/01/04')
What I want is:
PERSON_ID ORDER_DATE ORDER_CNT
1 2016-01-03 3
2 2016-01-02 2
3 2016-01-04 4
Is there a better way to do this besides the following:
SELECT *
FROM (
SELECT *
, ROW_NUMBER() OVER (PARTITION BY PERSON_ID ORDER BY ORDER_CNT DESC) AS LAST_ROW
FROM (
SELECT *
, ROW_NUMBER () OVER (PARTITION BY PERSON_ID ORDER BY ORDER_DATE) AS ORDER_CNT
FROM #T
) AS A
) AS B
WHERE LAST_ROW = 1

Yes, you can use this:
SELECT
PERSON_ID,
MAX(ORDER_DATE) AS ORDER_DATE,
COUNT(*) AS ORDER_CNT
FROM #T
GROUP BY PERSON_ID

SELECT a.PERSON_ID
, a.ORDER_DATE
, a.ORDER_CNT
FROM
(
SELECT PERSON_ID
, ORDER_DATE
, rn = ROW_NUMBER () OVER (PARTITION BY PERSON_ID ORDER BY ORDER_DATE DESC)
, ORDER_CNT = COUNT(ORDER_DATE) OVER (PARTITION BY PERSON_ID)
FROM #T
) AS a
WHERE rn = 1
ORDER BY a.PERSON_ID;

Related

How to reference the current column you are defining using lag?

I have a salary table like this:
declare #t table (OrderedID int, EmpID int, EffDate date, Salary money)
insert into #t
values
(1,1234,'20150101',100)
,(2,1234,'20160101',100)
,(3,1234,'20170101',100)
,(4,1234,'20180101',300)
,(1,2351,'20150101',100)
I am trying to get an initial effective date on each row:
First 3 rows have 1/1/2015
4th row has new value 1/1/2018
Here is what I tried with a case and a lag but i can't figure out how to reference the prior value of the column I am creating.
case when OrderedID = 1 then EFFDaTe
when Salary != LAG(Salary,1) then EFFDaTe
else lag(SalaryEFFDT,1) over (order by 1)
end as SalaryEFFDT
Thanks for your help.
As you haven't provided the expected output, I think this is what you want:
declare #t table (OrderedID int, EmpID int, EffDate date, Salary money)
insert into #t
values
(1,1234,'20150101',100)
,(2,1234,'20160101',100)
,(3,1234,'20170101',100)
,(4,1234,'20180101',300)
,(1,2351,'20150101',100)
,(5,1234,'20190101',100)
;with cte as
(Select *, OrderedId - Row_Number() over (partition by EmpId,Salary order by OrderedID) as grp
from #t)
, cte1 as
(Select EmpID, grp, min(effDate) as effDate from cte c group by EmpID, grp)
Select OrderedID, t.EmpID, t.EffDate, t.Salary, c.effDate as computeddate
from cte t join cte1 c on t.EmpID = c.EmpID and t.grp = c.grp
order by OrderedID
So you are trying to get the first effective date for each EmpID? the code below should do that. If that is not your desired output can you put what the output should look like?
declare #t table (OrderedID int, EmpID int, EffDate date, Salary money)
insert into #t
values
(1,1234,'20150101',100)
,(2,1234,'20160101',100)
,(3,1234,'20170101',100)
,(4,1234,'20180101',300)
,(1,2351,'20140101',100)
,(2,2351,'20150101',100)
Select
T.*,FE.FirstEff
From #t T
inner join (Select EmpID,MIN(EffDate) as FirstEff from #t group by
The second set is if you need the first time they have that salary, however you will have issues if someone gets a raise and then a demotion.
Select
T.*,FE.FirstEff
From #t T
inner join (Select EmpID,Salary,MIN(EffDate) as FirstEff from #t group by EmpID,Salary) FE on FE.EmpID = T.EmpID
and FE.Salary = T.Salary

SQL Select Only the Most Common Results

I have a table with IDs and Items where sometimes the associated Item has a variation from the other Items associated with the same ID. I need a query that selects the most common Item and assigns it to that ID.
The below query works, but I'm hoping to optimize it to avoid having to join two separate CTEs at the end, and rather have one slick SELECT statement:
IF OBJECT_ID('tempdb..#Test') IS NOT NULL
DROP TABLE #Test
CREATE TABLE #Test
(
[ID] INT
,[Item] VARCHAR(20)
)
INSERT #Test
VALUES
(100, 'Apple'),
(100, 'Apple'),
(100, 'Apples'),
(200, 'Orange'),
(200, 'Orange'),
(200, 'Orange'),
(200, 'Oranges'),
(300, 'Grape');
WITH cteOne AS (SELECT
[ID]
,[Item]
,COUNT(*) [Count]
FROM #Test
GROUP BY [ID]
,[Item]
),
cteTwo AS (SELECT
[ID]
,MAX([Count]) [Max]
FROM cteOne
GROUP BY [ID])
SELECT
C1.[ID]
,C1.[Item]
FROM cteOne C1
INNER JOIN cteTwo C2 ON C2.[ID] = C1.[ID]
AND C2.[Max] = C1.[Count]
ORDER BY [ID]
Any help is appreciated!
You can try top 1 with ties with row_number
select
top 1 with ties [ID], [Item]
from (
SELECT
[ID], [Item], COUNT(*) [Count]
FROM #Test
GROUP BY [ID], [Item]
) t
order by row_number() over (partition by [ID] order by [Count] desc)
This is even better:
;WITH
cteOne AS (
SELECT [ID],[Item] ,COUNT(*) [Count]
FROM #Test
GROUP BY [ID],[Item]
),
cteTwoo as (
select *, ROW_NUMBER() over (partition by id order by count) idx
from cteOne
)
select ID, Item
from cteTwoo
where idx = 1

sql query that gets the difference between 2 recent rows for every row item that occurs more than once in a table

Sql query that gets the difference between 2 recent rows for every value that occurs more than once in a table.
for example
book value date
A 4 2017-07-17 09:16:44.480
A 2 2017-08-15 10:05:58.273
B 3 2017-04-15 10:05:58.273
C 2 2017-08-15 10:05:58.273
B 3 2017-04-13 10:05:58.273
B 3 2017-04-12 10:05:58.273
should return
A 2
B 0
Here is a solution:
SELECT book, MAX(value) - MIN(value) AS difference FROM (
SELECT book, value, ROW_NUMBER() OVER (PARTITION BY book ORDER BY date DESC) AS rownum FROM t
) AS a WHERE rownum <= 2 GROUP BY book HAVING MAX(rownum) >= 2
And here it is in SQLFiddle
SELECT id_pk FROM [table] GROUP BY [fields you whant to compare by] HAVING COUNT(*) > 1)
this select returns you the list of pk from element that are repited
so, in other select you migth get another Select like
Select * from [table] where id_pk in(
SELECT id_pk FROM [table] GROUP BY [fields you whant to compare by] HAVING COUNT(*) > 1)) limit 2
this is functional, still not good as i'm not analising complexity.
Add a rownumber before calculating:
create table #test ([book] char(1), [value] int, [date] datetime)
insert into #test values ('A', 4, '2017-07-17 09:16:44.480')
insert into #test values ('A', 2, '2017-08-15 10:05:58.273')
insert into #test values ('B', 3, '2017-04-15 10:05:58.273')
insert into #test values ('C', 2, '2017-08-15 10:05:58.273')
insert into #test values ('B', 3, '2017-04-13 10:05:58.273')
insert into #test values ('B', 3, '2017-04-12 10:05:58.273')
;with cte as(
Select ROW_NUMBER () OVER (order by [book], [date] ) as rownumber, *
from #test)
select distinct [1].book, abs(first_value([1].[Value]) over (partition by [1].book order by [1].rownumber desc) - [2].val2) as [Difference]
from cte [1]
inner join
(select rownumber, book, first_value([Value]) over (partition by book order by rownumber desc) as val2
from cte) [2] on [1].book = [2].book and [1].rownumber < [2].rownumber
I would use analytic functions:
;with CTE as (
SELECT book
,value
,LAG(value) OVER (PARTITION BY book ORDER BY date) last_value
,ROW_NUMBER() OVER (PARTITION BY book ORDER BY date DESC) rn
FROM MyTable
)
SELECT book
,value - last_value as value_change
FROM CTE
WHERE rn = 1
AND last_value IS NOT NULL
LAG() was added in SQL Server 2012, but even if you're on a higher version, your database must have the compatibility version set to 110 or higher for them to be available. Here's an alternative that should work on SQL Server 2005 or higher, or a database compatibility 90 or higher.
;with CTE as (
SELECT book
,value
,ROW_NUMBER() OVER (PARTITION BY book ORDER BY date DESC) rn
FROM MyTable
)
SELECT c1.book
c1.value - c2.value as value_change
FROM CTE c1
INNER JOIN CTE c2
ON c1.book = c2.book
WHERE c1.rn = 1
AND c2.rn = 2

SQL - Filter on dates X number of days apart from the previous

I have a table containing orders. I would like to select those orders that are a certain number of days apart for a specific client. For example, in the table below I would like to select all of the orders for CustomerID = 10 that are at least 30 days apart from the previous instance. With the starting point to be the first occurrence (07/05/2014 in this data).
OrderID | CustomerID | OrderDate
==========================================
1 10 07/05/2014
2 10 07/15/2014
3 11 07/20/2014
4 11 08/20/2014
5 11 09/21/2014
6 10 09/23/2014
7 10 10/15/2014
8 10 10/30/2014
I would want to select OrderIDs (1,6,8) since they are 30 days apart from each other and all from CustomerID = 10. OrderIDs 2 and 7 would not be included as they are within 30 days of the previous order for that customer.
What confuses me is how to set the "checkpoint" to the last valid date. Here is a little "pseudo" SQL.
SELECT OrderID
FROM Orders
WHERE CusomerID = 10
AND OrderDate > LastValidOrderDate + 30
i came here and i saw #SveinFidjestøl already posted answer but i can't control my self after by long tried :
with the help of LAG and LEAD we can comparison between same column
and as per your Q you are looking 1,6,8. might be this is helpful
SQL SERVER 2012 and after
declare #temp table
(orderid int,
customerid int,
orderDate date
);
insert into #temp values (1, 10, '07/05/2014')
insert into #temp values (2, 10, '07/15/2014')
insert into #temp values (3, 11, '07/20/2014')
insert into #temp values (4, 11, '08/20/2014')
insert into #temp values (5, 11, '09/21/2014')
insert into #temp values (6, 10, '09/23/2014')
insert into #temp values (7, 10, '10/15/2014')
insert into #temp values (8, 10, '10/30/2014');
with cte as
(SELECT orderid,customerid,orderDate,
LAG(orderDate) OVER (ORDER BY orderid ) PreviousValue,
LEAD(orderDate) OVER (ORDER BY orderid) NextValue,
rownum = ROW_NUMBER() OVER (ORDER BY orderid)
FROM #temp
WHERE customerid = 10)
select orderid,customerid,orderDate from cte
where DATEDIFF ( day , PreviousValue , orderDate) > 30
or PreviousValue is null or NextValue is null
SQL SERVER 2005 and after
WITH CTE AS (
SELECT
rownum = ROW_NUMBER() OVER (ORDER BY p.orderid),
p.orderid,
p.customerid,
p.orderDate
FROM #temp p
where p.customerid = 10)
SELECT CTE.orderid,CTE.customerid,CTE.orderDate,
prev.orderDate PreviousValue,
nex.orderDate NextValue
FROM CTE
LEFT JOIN CTE prev ON prev.rownum = CTE.rownum - 1
LEFT JOIN CTE nex ON nex.rownum = CTE.rownum + 1
where CTE.customerid = 10
and
DATEDIFF ( day , prev.orderDate , CTE.orderDate) > 30
or prev.orderDate is null or nex.orderDate is null
GO
You can use the LAG() function, available in SQL Server 2012, together with a Common Table Expression. You calculate the days between the customer's current order and the customer's previous order and then query the Common Table Expression using the filter >= 30
with cte as
(select OrderId
,CustomerId
,datediff(d
,lag(orderdate) over (partition by CustomerId order by OrderDate)
,OrderDate) DaysSinceLastOrder
from Orders)
select OrderId, CustomerId, DaysSinceLastOrder
from cte
where DaysSinceLastOrder >= 30 or DaysSinceLastOrder is null
Results:
OrderId CustomerId DaysSinceLastOrder
1 10 NULL
6 10 70
3 11 NULL
4 11 31
5 11 32
(Note that 1970-01-01 is chosen arbitrarily, you may choose any date)
Update
A slighty more reliable way of doing it will involve a temporary table. But the original table tbl can be left unchanged. See here:
CREATE TABLE #tmp (id int); -- set-up temp table
INSERT INTO #tmp VALUES (1); -- plant "seed": first oid
WHILE (##ROWCOUNT>0)
INSERT INTO #tmp (id)
SELECT TOP 1 OrderId FROM tbl
WHERE OrderId>0 AND CustomerId=10
AND OrderDate>(SELECT max(OrderDate)+30 FROM tbl INNER JOIN #tmp ON id=OrderId)
ORDER BY OrderDate;
-- now list all found entries of tbl:
SELECT * FROM tbl WHERE EXISTS (SELECT 1 FROM #tmp WHERE id=OrderId)
#tinka shows how to use CTEs to do the trick, and the new windowed functions (for 2012 and later) are probably the best answer. There is also the option, assuming you do not have a very large data set, to use a recursive CTE.
Example:
declare #customerid int = 10;
declare #temp table
(orderid int,
customerid int,
orderDate date
);
insert into #temp values (1, 10, '07/05/2014')
insert into #temp values (2, 10, '07/15/2014')
insert into #temp values (3, 11, '07/20/2014')
insert into #temp values (4, 11, '08/20/2014')
insert into #temp values (5, 11, '09/21/2014')
insert into #temp values (6, 10, '09/23/2014')
insert into #temp values (7, 10, '10/15/2014')
insert into #temp values (8, 10, '10/30/2014');
with datefilter AS
(
SELECT row_number() OVER(PARTITION BY CustomerId ORDER BY OrderDate) as RowId,
OrderId,
CustomerId,
OrderDate,
DATEADD(day, 30, OrderDate) as FilterDate
from #temp
WHERE CustomerId = #customerid
)
, firstdate as
(
SELECT RowId, OrderId, CustomerId, OrderDate, FilterDate
FROM datefilter
WHERE rowId = 1
union all
SELECT datefilter.RowId, datefilter.OrderId, datefilter.CustomerId,
datefilter.OrderDate, datefilter.FilterDate
FROM datefilter
join firstdate
on datefilter.CustomerId = firstdate.CustomerId
and datefilter.OrderDate > firstdate.FilterDate
WHERE NOT EXISTS
(
SELECT 1 FROM datefilter betweens
WHERE betweens.CustomerId = firstdate.CustomerId
AND betweens.orderdate > firstdate.FilterDate
AND datefilter.orderdate > betweens.orderdate
)
)
SELECT * FROM firstdate

How to use Row_Number to group a resultset

i'm stuck with a query and i don't want to use a while loop or another nasty method to do this.
Here's the situation:
I've got a query that gets some data, and i need to calculate a column based on 2 other columns.
My results are as follow:
Type | Customer | Cycle | Amount | Expiration | Row_Number (Partition By Customer, Cycle)
So, my row_number column needs to "group" customers and cycles, here's a Fiddle to better understand it
Here's an example:
As you can see, iteration column is correctly applied as far as i know what row_number does.
But i need to do this:
Is there a way to do this with Row_Number ?
or should i need store the data in a temp table, loop through it and update this ITERATION column?
Maybe a CTE?
Any help on this will be highly appreciated. Thanks!
just run this as new query, replace what you need in your query...
WITH T(StyleID, ID)
AS (SELECT 1,1 UNION ALL
SELECT 1,1 UNION ALL
SELECT 1,1 UNION ALL
SELECT 1,2)
SELECT *,
RANK() OVER(PARTITION BY StyleID ORDER BY ID) AS 'RANK',
ROW_NUMBER() OVER(PARTITION BY StyleID ORDER BY ID) AS 'ROW_NUMBER',
DENSE_RANK() OVER(PARTITION BY StyleID ORDER BY ID) AS 'DENSE_RANK'
FROM T
regards,
Valentin
You could use DENSE_RANK function instead of ROW_NUMBER.
DECLARE #MyTable TABLE
(
Customer NVARCHAR(100) NOT NULL,
[Cycle] SMALLINT NOT NULL
);
INSERT #MyTable VALUES ('C1', 2010);
INSERT #MyTable VALUES ('C1', 2010);
INSERT #MyTable VALUES ('C1', 2011);
INSERT #MyTable VALUES ('C1', 2012);
INSERT #MyTable VALUES ('C1', 2012);
INSERT #MyTable VALUES ('C1', 2012);
INSERT #MyTable VALUES ('C2', 2010);
INSERT #MyTable VALUES ('C2', 2010);
SELECT t.Customer, t.[Cycle],
DENSE_RANK() OVER(PARTITION BY t.Customer ORDER BY t.[Cycle]) AS Rnk
FROM #MyTable t
ORDER BY Customer, [Cycle];
Results:
Customer Cycle Rnk
-------- ------ ---
C1 2010 1
C1 2010 1
C1 2011 2
C1 2012 3
C1 2012 3
C1 2012 3
C2 2010 1
C2 2010 1
SQL Fiddle

Resources