Left outer join with CASE condition on most recent date - sql-server

I have two tables:
dbo.Order
PK_Order FK_Customer OrderDate Total
1 1 2020-01-20 150.00
2 1 2020-01-25 200.00
dbo.Customer:
PK_Customer Name Age
1 John Miller 25
2 Max Monroe 28
I would like to join these two tables BUT when a customer has more than one order, only the one with the most recent date should be joined. This would be the initial code to join the two:
SELECT *
FROM dbo.Customer as Customer
LEFT OUTER JOIN dbo.Order
ON Customer.PK_Customer = dbo.Order.FK_Customer
I have never worked with case conditions in queries. Could anybody give me a hint?

I like using TOP 1 WITH TIES for problems like this:
SELECT TOP 1 WITH TIES *
FROM dbo.Customer c
LEFT OUTER JOIN o
ON c.PK_Customer = o.FK_Customer
ORDER BY
ROW_NUMBER() OVER (PARTITION BY c.PK_Customer ORDER BY o.OrderDate DESC);

You can LEFT JOIN only record with the latest date:
--CREATE TABLE [Order]
--(
-- PK_Order int,
-- FK_Customer int,
-- OrderDate date,
-- Total decimal(10,2)
--)
--INSERT [Order] VALUES
--(1,1,'2020-01-20',150),
--(2,1,'2020-01-25',200)
--CREATE TABLE Customer
--(
-- PK_Customer int,
-- Name nvarchar(20),
-- Age int
--)
--INSERT [Customer] VALUES
--(1,'John Miller',25),
--(2,'Max Monroe',28)
SELECT *
FROM dbo.Customer C
LEFT OUTER JOIN dbo.[Order] O
ON C.PK_Customer = O.FK_Customer
AND OrderDate=(SELECT MAX(OrderDate) FROM [Order] WHERE [Order].FK_Customer=O.FK_Customer)
Note 1: Since there can be many orders in recent date, I preserve all.
Note 2: It's not a good idea to keep age - it must be updated every year. Keep date of birth.

A similar way to Tim's answer but the difference is that the Partition by is within orders table and joining on Row =1 for each customer.
select * from #Customer c
left join
(select ROW_NUMBER() over (partition by FK_Customer order by OrderDate desc) as order_NUM,
PK_Order,
FK_Customer,
OrderDate,
Total from #Order
) o on c.PK_Customer = o.FK_Customer and order_NUM = 1
order by c.PK_Customer, o.OrderDate desc

Related

Count top 5 persons that were most together

I have a check-in table that consists of the flowing columns:
PK CheckInID int
PersonID int
CheckInDate smalldatetime
I'm trying to create a query that gives me a top 3 of persons who most frequently were checked-in together for a specific person.
For example:
personID 1 was
18 times together with personID 3
13 times together with personID 9
11 times together with personID 4
Implementing this in C# is not really a problem for me but I want to create a stored procedure and TSQL is not really my strong side.
Assuming that date is designator:
SELECT TOP 3 PersonId, COUNT(*) cnt
FROM your_table
WHERE CheckInDate IN (SELECT CheckInDate
FROM your_table
WHERE PersonId = ?)
AND PersonId <> ? -- do not count the same person
GROUP BY PersonId
ORDER BY cnt DESC;
A faster way (no subquery and no "IN" statement) is :
SELECT TOP 3 T2.PersonId
, SUM(1) AS NB_TIME_CHECKED_IN_WITH_XXX
FROM your_table AS T1
INNER JOIN your_table AS T2 ON (T1.[PK CheckInID]=T2.[PK CheckInID] AND T2.PersonId <> XXX)
WHERE T1.PersonId = XXX
GROUP BY PersonId
ORDER BY NB_TIME_CHECKED_IN_WITH_XXX DESC;

how to use sum and max in one query

I have a table
tblPay
(
CID bigint,
PartyIdID bigint,
PartyName varchar(50),
AgentName varchar(50),
Agent bigint,
Amount decimal(18,2),
RecAmount decimal(18,2),
OutStanding decimal(18,2)
)
I want to select sum of Amount,RecAmount group by PartyId and I also want to select Last Outstanding entry of each PartyID. For this i use following query.
SELECT DISTINCT CID,
Party.AccLedger_ID PartyID,
Party.AccLedger_Name PartyName,
Agent.AccLedger_Name AgentName,
Agent.AccLedger_ID AgentID,
sum(S.Amount) Amount,
Sum(S.RecAmount) RecAmount,
S.OutStanding
Group by PartyID,
Cid,
Party.AccLedger_ID,
Party.AccLedger_Name,
Agent.AccLedger_Name,
Agent.AccLedger_ID,
S.OutStanding
But i am unable to achieve the sum of Amount,RecAmount and Last Outstanding Record of each Party. Can someone help me here.
This is the Answer i got.
Assuming your records are inserted in tblPay.CID order, use a subquery
SELECT p.PartyID,
p.PartyName,
SUM(p.Amount) AS PartyAmount,
SUM(p.RecAmount) AS PartyRecAmount,
(SELECT TOP 1 OutStanding FROM tblPay p2 WHERE p.PartyID = p2.PartyId ORDER BY CID DESC) AS LastOutStanding
FROM tblPay p
GROUP BY p.PartyID, p.PartyName
or an OUTER APPLY:
SELECT p.PartyID,
p.PartyName,
SUM(p.Amount) AS PartyAmount,
SUM(p.RecAmount) AS PartyRecAmount,
lastRecord.OutStanding AS LastOutStanding
FROM tblPay p
OUTER APPLY
(
SELECT TOP 1 OutStanding
FROM tblPay p2
WHERE p.PartyID = p2.PartyId
ORDER BY CID DESC
) lastRecord
GROUP BY p.PartyID, p.PartyName
or, as JamieD77 suggests in his answer, use a CTE.
use a cte to get your tblPay information and join that to your other tables on the last record
WITH cte AS (
SELECT
CID,
PartyName,
SUM(Amount) OVER (PARTION BY PartyIdID) Amount,
SUM(RecAmount) OVER (PARTITION BY PartyIdID) RecAmount,
OutStanding,
-- only assuming your CID determines order since you have no date?
ROW_NUMBER() OVER (PARTITION BY PartyIdID ORDER BY CID DESC) Rn
FROM tblPay
)
SELECT Party.*,
Agent.*,
p.CID,
p.PartyName,
p.Amount,
p.RecAmount,
p.Oustanding
FROM Party JOIN Agent
JOIN cte p ON p.PartyIdID = Party.AccLedger_ID AND p.Rn = 1

Left join with Sum Clause with more than 1 table gives incorrect Sum

I am trying to get the Sum of rows while applying a left join with more than 1 table. It seems it is creating a matrix of result which results in wrong sum function.
Example:
First Table: Customer
Second Table: TotalAssets
Third Table: TotalLiability
Table Structure:
Customer
CustID(int) CustomerName(varchar)
1 Abc
2 Def
3 Ghi
TotalAssets
CustID Amount
1 2000
1 1000
2 600
TotalLiability
CustID Amount
1 1000
1 1000
2 800
Output Expected
CustID TotalAssets TotalLiability
1 3000 2000
2 600 800
Current Query
Select c.CustID , Sum(a.Amount) , Sum(l.Amount) From Customer c
left join TotalAssests a on a.CustID = c.CustID
left join TotalLiability l on l.CustID = c.CustID
Group by c.CustID
The problem with this current query is the sum is not correct as i think the first left join create a first set with multiple records and then second one is applied.
Any help is appreciated
UPDATE:
I find some luck by following method but it seems a bad/hacky option as in my case i have over 7-8 elements in group by and adding more left clauses results in query difficult to manage.
New Query which is resulting correct result but looks very bad to maintains
Select Set1.CustID , Set1.TotalAssets, Sum(l.Amount) from (Select c.CustID , Sum(a.Amount) as TotalAssets From Customer c
left join TotalAssests a on a.CustID = c.CustID
Group by c.CustID)Set1
left join TotalLiability l on l.CustID = Set1.CustID.
Group by Set1.CustID , Set1.TotalAssets
I think this gets you what you want with minimum complexity:
select c.CustId, isnull(a.Amount, 0) as TotalAssets, isnull(l.Amount, 0) as TotalLiability
from Customers c
left join (
select CustId, sum(Amount) as Amount from TotalAssets group by CustId
) a on a.CustId = c.CustId
left join (
select CustId, sum(Amount) as Amount from TotalLiability group by CustId
) l on l.CustId = c.CustId
You need to group/sum the two tables separately, since the data in them is independent. Left-joining both to the customers table ensures that customers with no entries in either/both tables are still reported.
This should work:
Select c.CustID
, (select sum(a.amount) from TotalAssests a where a.CustId = c.CustID) as SumAsset
, (select Sum(l.Amount) TotalLiability l where l.CustID = c.CustID) as SumLiability
From Customer c
Hope the below works with less maintenance,
DECLARE #Customer TABLE (CustID int, CustomerName varchar(50)) DECLARE #TotalAssets TABLE (CustID int, Amount INT) DECLARE #TotalLiability TABLE (CustID int, Amount INT)
INSERT INTO #Customer
SELECT 1,
'ABC'
UNION
SELECT 2,
'DEF'
UNION
SELECT 3,
'GHI'
--Select * From #Customer
INSERT INTO #TotalAssets
SELECT 1,
2000
UNION
SELECT 1,
1000
UNION
SELECT 2,
600
--Select * From #TotalAssets
INSERT INTO #TotalLiability
SELECT 1,
1000
UNION
SELECT 1,
1000
UNION
SELECT 2,
800
--Select * From #TotalLiability
SELECT *
FROM #Customer
SELECT C.CustID,
C.CustomerName,
Sum(A.Amount) TotalAssets,
Sum(L.Amount) TotalLiability
FROM #Customer C
JOIN #TotalAssets A ON C.CustID = A.CustID
JOIN #TotalLiability L ON C.CustId = L.CustID
GROUP BY C.CustID,
C.CustomerName

Joining on unique ID and date range - must return 1 row

In my calculated data layer, I am attempting to populate a Customer's postcode at the time of the order, a sub sample of the table being populated is as follows:
CustomerOrders
(
CustomerID varchar(20),
...
OrderDate date,
...
CustomerPostcodeAtTimeOfOrder varchar(10)
)
This table is a join of the Customers table, the Orders table and the CustomerAddress table which looks like follows:
CustomerAddress
(
CustomerID varchar(20),
AddressType varchar(10),
/*
AddressDetails
*/
StartDate date,
EndDate date,
AddressRank int
)
It is quite conceivable that a customer may have recorded addresses of various types for a single date so the intention when populating the CustomerOrders table is to join as below:
SELECT *
FROM Customers c
LEFT JOIN Orders o
ON o.CustomerID = c.CustomerID
OUTER APPLY
(
SELECT TOP 1 Postcode
FROM CustomerAddress ca
WHERE ca.CustomerID = c.CustomerID
AND o.OrderDate BETWEEN ca.StartDate AND ca.EndDate
ORDER BY AddressRank
)
However, the performance hit I am getting by adding this join to the query means that returning 1000 rows goes from taking 4 seconds to taking 106 seconds.
Just to note, I have added a non-clustered index on the Address table too. The definition of which is as below:
CREATE NONCLUSTERED INDEX (IX_CustomerAddress)
ON CustomerAddress (StartDate, EndDate)
INCLUDE (AddressRank, CustomerID, Postcode)
I'm looking for any suggestions on the best way to tackle this issue please?
I'm not completely sure if this will return results faster, but you can rewrite your query like this:
;WITH OrderAddress AS
(
SELECT o.*,
ca.Postcode,
RN = ROW_NUMBER() OVER(PARTITION BY CustomerID ORDER BY AddressRank DESC)
FROM CustomerAddress ca
INNER JOIN Orders o
ON ca.CustomerID = c.CustomerID
AND o.OrderDate BETWEEN ca.StartDate AND ca.EndDate
)
SELECT *
FROM Customers c
LEFT JOIN ( SELECT *
FROM OrderAddress
WHERE RN = 1) o
ON o.CustomerID = c.CustomerID;
You should also post the index definition on the Address table.

Join two tables with conditions depending on multiples columns

In SQL Server 2008, I want to join two table on key that might have duplicate, but the match is unique with the information from other columns.
For a simplified purchase record example,
Table A:
UserId PayDate Amount
1 2015 100
1 2010 200
2 2014 150
Table B:
UserId OrderDate Count
1 2009 4
1 2014 2
2 2013 5
Desired Result:
UserId OrderDate PayDate Amount Count
1 2009 2010 200 4
1 2014 2015 100 2
2 2013 2014 150 5
It's guaranteed that:
Table A and Table B have same number of rows, and UserId in both table are same set of numbers.
For any UserId, PayDate is always later than OrderDate
Rows with same UserId are matched by sorted sequence of Date. For example, Row 1 in Table A should match Row 2 in Table B
My idea is that on both tables, first sort by Date, then add another Id column, then join on this Id column. But I not authorized to write anything into the database. How can I do this task?
Row_Number() will be your friend here. It allows you to add a virtual sequencing to your resultset.
Run this and study the output:
SELECT UserID
, OrderDate
, "Count" As do_not_use_reserved_words_for_column_names
, Row_Number() OVER (PARTITION BY UserID ORDER BY OrderDate) As sequence
FROM table_b
The PARTITION BY determines when the counter should be "reset" i.e. it should restart after a change of UserID
The ORDER BY, well, you've guessed it - determines the order of the sequence!
Pull this all together:
; WITH payments AS (
SELECT UserID
, PayDate
, Amount
, Row_Number() OVER (PARTITION BY UserID ORDER BY PayDate) As sequence
FROM table_b
)
, orders AS (
SELECT UserID
, OrderDate
, "Count" As do_not_use_reserved_words_for_column_names
, Row_Number() OVER (PARTITION BY UserID ORDER BY OrderDate) As sequence
FROM table_b
)
SELECT orders.UserID
, orders.OrderDate
, orders.do_not_use_reserved_words_for_column_names
, payments.PayDate
, payments.Amount
FROM orders
LEFT
JOIN payments
ON payments.UserID = orders.UserID
AND payments.sequence = orders.sequence
P.S. I've opted for an outer join because I assumed that there's not always going to be a payment for every order.
Try:
;WITH t1
AS
(
SELECT UserId, PayDate, Amount,
ROW_NUMBER() OVER (PARTITION BY UserId ORDER BY PayDate) AS RN
FROM TableA
),
t2
AS
(
SELECT UserId, OrderDate, [Count],
ROW_NUMBER() OVER (PARTITION BY UserId ORDER BY OrderDate) AS RN
FROM TableB
)
SELECT t1.UserId, t2.OrderDate, t1.PayDate, t1.Amount, t2.[Count]
FROM t1
INNER JOIN t2
ON t1.UserId = t2.UserId AND t1.RN = t2.RN

Resources