SQL - How to only show the row with the greatest date value based on ID?

SQL - How to only show the row with the greatest date value based on ID? - sql-server

I have SQL statements in SQL Server 2008 R2 based on a few joined tables that utilizes all the information I need in my program:
SELECT
Laptops.Laptop_ID,
Laptops.Model_Name,
...
Users.Firstname + Users.Lastname AS Name,
Loans.Date_Loaned
FROM Users
INNER JOIN Loans ON Users.User_ID = Loans.User_ID
RIGHT OUTER JOIN Laptops ON Loans.Laptop_ID = Laptops.Laptop_ID
This brings up a table similar to:
ID Model_Name ... Name Date_Loaned
1 ... ... Kris 18-08-11
2 ... ... Jo 20-08-11
2 ... ... Bert 18-08-11
4 ... ... Sam 19-08-11
What I'm trying to do is where there would be repeated ID, I want to only show the row with the highest date, like this:
ID Model_Name ... Name Date_Loaned
1 ... ... Kris 18-08-11
2 ... ... Jo 20-08-11
4 ... ... Sam 19-08-11
I'm having problems figuring out how to do this with the SQL statement that I already have. Help!

Use windows functions:
SELECT * FROM(
SELECT Laptops.Laptop_ID,
Laptops.Model_Name,
...
Users.Firstname + Users.Lastname AS Name,
Loans.Date_Loaned,
row_number()
over(partition by Laptops.Laptop_ID
order by Loans.Date_Loaned desc) rn
FROM Users
INNER JOIN Loans ON Users.User_ID = Loans.User_ID
RIGHT OUTER JOIN Laptops ON Loans.Laptop_ID = Laptops.Laptop_ID) t
WHERE rn = 1

Try this
SELECT Laptops.Laptop_ID,
Laptops.Model_Name,
...
Users.Firstname + Users.Lastname AS Name,
MAX(Loans.Date_Loaned) AS date
FROM Users
INNER JOIN Loans ON Users.User_ID = Loans.User_ID
RIGHT OUTER JOIN Laptops ON Loans.Laptop_ID = Laptops.Laptop_ID
GROUP BY Laptops.Laptop_ID,
Laptops.Model_Name,
...
Users.Firstname + Users.Lastname AS Name

Glad you got your answer, but just wanted to mention you might get better performance selecting from Laptops and LEFT OUTER JOIN your Users/Loans sub query. It might be a little easier for the next person to decipher since RIGHT OUT JOIN is not used very often. Using aliases also helps eliminate some typing.
SELECT
l.Laptop_ID,
l.Model_Name,
ul.Name,
ul.Date_Loaned
FROM
Laptops l
LEFT JOIN (
SELECT l.Laptop_ID,
u.Firstname + u.Lastname AS Name,
l.Date_Loaned,
ROW_NUMBER() OVER(PARTITION BY l.Laptop_ID ORDER BY l.Date_Loaned desc) Rn
FROM Loans l
JOIN Users u ON l.User_ID = u.User_ID
) ul ON l.Laptop_ID = ul.Laptop_ID
AND ul.Rn = 1

Related

Using the results of WITH clause IN where STATEMENT of main query

I am relatively new at SQL so I apologise if this is obvious but I cannot work out how to use the results of the WITH clause query in the where statement of my main query.
My with query pulls the first record for each customer and gives the sale date for that record:
WITH summary AS(
SELECT ed2.customer,ed2.saledate,
ROW_NUMBER()OVER(PARTITION BY ed2.customer
ORDER BY ed2.saledate)AS rk
FROM Filteredxportdocument ed2)
SELECT s.*
FROM summary s
WHERE s.rk=1
I need to use the date in the above query as the starting point and pull all records for each customer for their first 12 months i.e. where the sale date is between ed2.saledate AND ed2.saledate+12 months.
My main query is:
SELECT ed.totalamountincvat, ed.saledate, ed.name AS SaleRef,
ed.customer, ed.customername, comp.numberofemployees,
comp.companyuid
FROM exportdocument AS ed INNER JOIN
FilteredAccount AS comp ON ed.customer = comp.accountid
WHERE (ed.statecode = 0) AND
ed.saledate BETWEEN ed2.saledate AND DATEADD(M,12,ed2.saledate)
I am sure that I need to add the main query into the WITH clause but I cant work out where. Is anyone able to help please

Does this help?
;WITH summary AS(
SELECT ed2.customer,ed2.saledate,
ROW_NUMBER()OVER(PARTITION BY ed2.customer
ORDER BY ed2.saledate)AS rk
FROM Filteredxportdocument ed2)
SELECT ed.totalamountincvat, ed.saledate, ed.name AS SaleRef,
ed.customer, ed.customername, comp.numberofemployees,
comp.companyuid
FROM exportdocument AS ed INNER JOIN
FilteredAccount AS comp ON ed.customer = comp.accountid
OUTER APPLY (SELECT s.* FROM summary s WHERE s.rk=1) ed2
WHERE ed.statecode = 0 AND
ed.saledate BETWEEN ed2.saledate AND DATEADD(M,12,ed2.saledate)
and ed.Customer = ed2.Customer
Results of CTE are not cached or stored, so you can't reuse it.
EDIT:
Based upon your requirement that all the records from CTE should be in final result, this is a new query:
;WITH summary AS(
SELECT ed2.customer,ed2.saledate,
ROW_NUMBER()OVER(PARTITION BY ed2.customer
ORDER BY ed2.saledate)AS rk
FROM Filteredxportdocument ed2)
SELECT
ed.totalamountincvat,
ed.saledate,
ed.name AS SaleRef,
ed.customer,
ed.customername,
comp.numberofemployees,
comp.companyuid
FROM
summary ed2
left join exportdocument ed
on ed.Customer = ed2.Customer
and ed.statecode = 0
AND ed.saledate BETWEEN ed2.saledate AND DATEADD(M,12,ed2.saledate)
INNER JOIN FilteredAccount comp
ON ed.customer = comp.accountid
WHERE
s.rk=1

summary you will be able to use only once. Alternate solution is store summary into temp table and use that as many times as u want.
Something like : Select * into #temp from Summary s where s.rk=1

How can I nest a query as a variable in SQL?

Obviously, SQL isn't my first language, so I need help with something that is probably trivial.
I have the following query:
SELECT Airports.IATA_Code,
COUNT(*) AS Departures,
(SELECT COUNT(*) FROM Flights WHERE DestinationAirportId = 63384) AS Arrivals,
SUM(Flights.Tickets) AS Tickets,
SUM(Flights.Fare * Flights.Tickets) As Revenue,
AVG(Flights.Demand) AS Demand
FROM Flights
LEFT JOIN Airports
ON Flights.OriginAirportId = Airports.Id
WHERE AnalysisId = 2
GROUP BY IATA_Code
ORDER BY Tickets DESC
This query works fine, but I need to replace the hard-coded id of 63384 with the actual Airport Id. This would be Airports.Id but when I try that, I get the following error:
Column 'Airports.Id' is invalid in the select list because it is not contained in either an aggregate function or the GROUP BY clause.
Solved!
Just needed to group by the Airport Id as well:
SELECT Airports.IATA_Code,
COUNT(*) AS Departures,
(SELECT COUNT(*) FROM Flights WHERE DestinationAirportId = Airports.Id) AS Arrivals,
SUM(Flights.Tickets) AS Tickets,
SUM(Flights.Fare * Flights.Tickets) As Revenue,
AVG(Flights.Demand) AS Demand
FROM Flights
LEFT JOIN Airports
ON Flights.OriginAirportId = Airports.Id
WHERE AnalysisId = 2
GROUP BY IATA_Code, Airports.Id <---------------------------
ORDER BY Tickets DESC

Just guessing here... there should be a FK on OriginAirportId referencing Airports.Id. If that's the case, you can do an inner join instead of left join.
Also, try using CROSS APPLY if that's an option for you.
SELECT a.IATA_Code,
COUNT(*) AS Departures,
t.Arrivals,
SUM(f.Tickets) AS Tickets,
SUM(f.Fare * f.Tickets) As Revenue,
AVG(f.Demand) AS Demand
FROM Flights f
INNER JOIN Airports a
ON f.OriginAirportId = a.Id
CROSS APPLY (
SELECT COUNT(*) AS Arrivals
FROM Flights f1
WHERE f1.DestinationAirportId = a.Id) t
WHERE AnalysisId = 2
GROUP BY IATA_Code, a.Id
ORDER BY Tickets DESC
I didn't test this code so please just use it as reference only please.
Or you can even try this...
;WITH AirportDepartureCount AS (
SELECT
OriginAirportId AS AirportId,
Count(*) AS DepartCount,
SUM(f.Tickets) AS Tickets,
SUM(f.Fare * f.Tickets) As Revenue,
AVG(f.Demand) AS Demand
FROM Flights
GROUP BY OriginAirportId
), AirportArrivalCount AS (
SELECT DestinationAirportId AS AirportId, COUNT(*) AS ArrivalCount
FROM Flights
GROUP BY DestinationAirportId
)
SELECT a.Id, a.IATA_Code,
COALESCE(depart.DepartCount,0) AS DepartCount,
COALESCE(arrival.ArrivalCount,0) AS ArrivalCount,
COALESCE(depart.Tickets,0) AS Tickets,
COALESCE(depart.Revenue,0) AS Revenue,
COALESCE(depart.Demand,0) AS Demand
FROM Airports a
LEFT JOIN AirportDepartureCount depart
ON a.Id = depart.AirportId
LEFT JOIN AirportArrivalCount arrival
ON a.Id = arrival.AirportId
ORDER BY COALESCE(depart.Tickets,0) DESC
Main difference here is that this code accounts for all airports (even those that did not have any flights). In your solution, you're ignoring any airports that did not have any departing flights. Perhaps that's by design but thought I'd throw this out there for completeness sake... ;)

Join subquery with min

I'm pulling my hair out over a subquery that I'm using to avoid about 100 duplicates (out of about 40k records). The records that are duplicated are showing up because they have 2 dates in h2.datecreated for a valid reason, so I can't just scrub the data.
I'm trying to get only the earliest date to return. The first subquery (that starts with "select distinct address_id", with the MIN) works fine on it's own...no duplicates are returned. So it would seem that the left join (or just plain join...I've tried that too) couldn't possibly see the second h2.datecreated, since it doesn't even show up in the subquery. But when I run the whole query, it's returning 2 values for some ipc.mfgid's, one with the h2.datecreated that I want, and the other one that I don't want.
I know it's got to be something really simple, or something that just isn't possible. It really seems like it should work! This is MSSQL. Thanks!
select distinct ipc.mfgid as IPC, h2.datecreated,
case when ad.Address is null
then ad.buildingname end as Address, cast(trace.name as varchar)
+ '-' + cast(trace.Number as varchar) as ONT,
c.ACCOUNT_Id,
case when h.datecreated is not null then h.datecreated
else h2.datecreated end as Install
from equipmentjoin as ipc
left join historyjoin as h on ipc.id = h.EQUIPMENT_Id
and h.type like 'add'
left join circuitjoin as c on ipc.ADDRESS_Id = c.ADDRESS_Id
and c.GRADE_Code like '%hpna%'
join (select distinct address_id, equipment_id,
min(datecreated) as datecreated, comment
from history where comment like 'MAC: 5%' group by equipment_id, address_id, comment)
as h2 on c.address_id = h2.address_id
left join (select car.id, infport.name, carport.number, car.PCIRCUITGROUP_Id
from circuit as car (NOLOCK)
join port as carport (NOLOCK) on car.id = carport.CIRCUIT_Id
and carport.name like 'lead%'
and car.GRADE_Id = 29
join circuit as inf (NOLOCK) on car.CCIRCUITGROUP_Id = inf.PCIRCUITGROUP_Id
join port as infport (NOLOCK) on inf.id = infport.CIRCUIT_Id
and infport.name like '%olt%' )
as trace on c.ccircuitgroup_id = trace.pcircuitgroup_id
join addressjoin as ad (NOLOCK) on ipc.address_id = ad.id

The typical approach to only getting the lowest row is one of the following. You didn't bother to specify what version of SQL Server you're using, what you want to do with ties, and I have little interest to try to work this into your complex query, so I'll show you an abstract simplification for different versions.
SQL Server 2000
SELECT x.grouping_column, x.min_column, x.other_columns ...
FROM dbo.foo AS x
INNER JOIN
(
SELECT grouping_column, min_column = MIN(min_column)
FROM dbo.foo GROUP BY grouping_column
) AS y
ON x.grouping_column = y.grouping_column
AND x.min_column = y.min_column;
SQL Server 2005+
;WITH x AS
(
SELECT grouping_column, min_column, other_columns,
rn = ROW_NUMBER() OVER (ORDER BY min_column)
FROM dbo.foo
)
SELECT grouping_column, min_column, other_columns
FROM x
WHERE rn = 1;

This subqery:
select distinct address_id, equipment_id,
min(datecreated) as datecreated, comment
from history where comment like 'MAC: 5%' group by equipment_id, address_id, comment
Probably will return multiple rows because the comment is not guaranteed to be the same.
Try this instead:
CROSS APPLY (
SELECT TOP 1 H2.DateCreated, H2.Comment -- H2.Equipment_id wasn't used
FROM History H2
WHERE
H2.Comment LIKE 'MAC: 5%'
AND C.Address_ID = H2.Address_ID
ORDER BY DateCreated
) H2
Switch that to OUTER APPLY in case you want rows that don't have a matching desired history entry.

How to SELECT DISTINCT Info with TOP 1 Info and an Order By FROM the Top 1 Info

I have 2 tables, that look like:
CustomerInfo(CustomterID, CustomerName)
CustomerReviews(ReviewID, CustomerID, Review, Score)
I want to search reviews for a string and return CustomerInfo.CustomerID and CustomerInfo.CustomerName. However, I only want to show distinct CustomerID and CustomerName along with just one of their CustomerReviews.Reviews and CustomerReviews.Score. I also want to order by the CustomerReviews.Score.
I can't figure out how to do this, since a customer can leave multiple reviews, but I only want a list of customers with their highest scored review.
Any ideas?

This is the greatest-n-per-group problem that has come up dozens of times on Stack Overflow.
Here's a solution that works with a window function:
WITH CustomerCTE (
SELECT i.*, r.*, ROW_NUMBER() OVER (PARTITION BY CustomerID ORDER BY Score DESC) AS RN
FROM CustomerInfo i
INNER JOIN CustomerReviews r ON i.CustomerID = r.CustomerID
WHERE CONTAINS(r.Review, '"search"')
)
SELECT * FROM CustomerCTE WHERE RN = 1
ORDER BY Score;
And here's a solution that works more broadly with RDBMS brands that don't support window functions:
SELECT i.*, r1.*
FROM CustomerInfo i
INNER JOIN CustomerReviews r1 ON i.CustomerID = r1.CustomerID
AND CONTAINS(r1.Review, '"search"')
LEFT OUTER JOIN CustomerReviews r2 ON i.CustomerID = r2.CustomerID
AND CONTAINS(r1.Review, '"search"')
AND (r1.Score < r2.Score OR r1.Score = r2.Score AND r1.ReviewID < r2.ReviewID)
WHERE r2.CustomerID IS NULL
ORDER BY Score;
I'm showing the CONTAINS() function because you should be using the fulltext search facility in SQL Server, not using LIKE with wildcards.

I voted for Bill Karwin's answer, but I thought I'd throw out another option.
It uses a correlated subquery, which can often incur performance problems with large data sets, so use with caution. I think the only upside is that the query is easier to immediately understand.
select *
from [CustomerReviews] r
where [ReviewID] =
(
select top 1 [ReviewID]
from [CustomerReviews] rInner
where rInner.CustomerID = r.CustomerID
order by Score desc
)
order by Score desc
I didn't add the string search filter, but that can be easily added.

I think this should do it
select ci.CustomterID, ci.CustomerName, cr.Review, cr.Score
from CustomerInfo ci inner join
(select top 1*
from CustomerReviews
where Review like '%search%'
order by Score desc) cr on ci.CustomterID = cr.CustomterID
order by cr.Score

Sql Server double subquery

I have a table which is kinda like an historic table... so I have data like this
idA numberMov FinalDate
1 10 20090209
2 14 20090304
1 12 20090304
3 54 20080508
4 42 20090510
... ... ....
I need to retrieve the numberMov based on the newest finalDate from each idA so I use this
select a.numberMov from (select idA, max(finalDate) maxDate from table1 group by idA) as b inner join table1 a on a.idA=b.idA and a.finalDate = b.maxDate
Now I have another query like this
select m fields from n tables where n5.numberMov in ("insert first query here")
I feel like there is a better solution but can't think of any, I really dont like having two subqueries in there.
Any suggestions?

Not enough information to test it myself but something like this might work.
select m fields
from a inner join
(select numberMov,
max(FinalDate) as maxDate
from a
group by numberMov) b
on a.numberMov = b.numberMov
and a.FinalDate = b.maxDate inner join
n tables on a.numberMov = n.numberMov

You don't say which edition of SQL server, but this will work in SQL 2005+
;WITH rankCTE
AS
(
SELECT idA
,numberMov
,FinalDate
,ROW_NUMBER() OVER (PARTITION BY idA
ORDER BY FinalDate DESC
) AS rn
FROM table1
)
,latestCTE
AS
(
SELECT idA
,numberMov
,FinalDate
FROM rankCTE
WHERE rn = 1
)
SELECT m fields
FROM n tables
WHERE n5.numberMov IN (SELECT numberMov FROM latestCTE)