Join two tables with conditions depending on multiples columns - sql-server

In SQL Server 2008, I want to join two table on key that might have duplicate, but the match is unique with the information from other columns.
For a simplified purchase record example,
Table A:
UserId PayDate Amount
1 2015 100
1 2010 200
2 2014 150
Table B:
UserId OrderDate Count
1 2009 4
1 2014 2
2 2013 5
Desired Result:
UserId OrderDate PayDate Amount Count
1 2009 2010 200 4
1 2014 2015 100 2
2 2013 2014 150 5
It's guaranteed that:
Table A and Table B have same number of rows, and UserId in both table are same set of numbers.
For any UserId, PayDate is always later than OrderDate
Rows with same UserId are matched by sorted sequence of Date. For example, Row 1 in Table A should match Row 2 in Table B
My idea is that on both tables, first sort by Date, then add another Id column, then join on this Id column. But I not authorized to write anything into the database. How can I do this task?

Row_Number() will be your friend here. It allows you to add a virtual sequencing to your resultset.
Run this and study the output:
SELECT UserID
, OrderDate
, "Count" As do_not_use_reserved_words_for_column_names
, Row_Number() OVER (PARTITION BY UserID ORDER BY OrderDate) As sequence
FROM table_b
The PARTITION BY determines when the counter should be "reset" i.e. it should restart after a change of UserID
The ORDER BY, well, you've guessed it - determines the order of the sequence!
Pull this all together:
; WITH payments AS (
SELECT UserID
, PayDate
, Amount
, Row_Number() OVER (PARTITION BY UserID ORDER BY PayDate) As sequence
FROM table_b
)
, orders AS (
SELECT UserID
, OrderDate
, "Count" As do_not_use_reserved_words_for_column_names
, Row_Number() OVER (PARTITION BY UserID ORDER BY OrderDate) As sequence
FROM table_b
)
SELECT orders.UserID
, orders.OrderDate
, orders.do_not_use_reserved_words_for_column_names
, payments.PayDate
, payments.Amount
FROM orders
LEFT
JOIN payments
ON payments.UserID = orders.UserID
AND payments.sequence = orders.sequence
P.S. I've opted for an outer join because I assumed that there's not always going to be a payment for every order.

Try:
;WITH t1
AS
(
SELECT UserId, PayDate, Amount,
ROW_NUMBER() OVER (PARTITION BY UserId ORDER BY PayDate) AS RN
FROM TableA
),
t2
AS
(
SELECT UserId, OrderDate, [Count],
ROW_NUMBER() OVER (PARTITION BY UserId ORDER BY OrderDate) AS RN
FROM TableB
)
SELECT t1.UserId, t2.OrderDate, t1.PayDate, t1.Amount, t2.[Count]
FROM t1
INNER JOIN t2
ON t1.UserId = t2.UserId AND t1.RN = t2.RN

Related

Left outer join with CASE condition on most recent date

I have two tables:
dbo.Order
PK_Order FK_Customer OrderDate Total
1 1 2020-01-20 150.00
2 1 2020-01-25 200.00
dbo.Customer:
PK_Customer Name Age
1 John Miller 25
2 Max Monroe 28
I would like to join these two tables BUT when a customer has more than one order, only the one with the most recent date should be joined. This would be the initial code to join the two:
SELECT *
FROM dbo.Customer as Customer
LEFT OUTER JOIN dbo.Order
ON Customer.PK_Customer = dbo.Order.FK_Customer
I have never worked with case conditions in queries. Could anybody give me a hint?
I like using TOP 1 WITH TIES for problems like this:
SELECT TOP 1 WITH TIES *
FROM dbo.Customer c
LEFT OUTER JOIN o
ON c.PK_Customer = o.FK_Customer
ORDER BY
ROW_NUMBER() OVER (PARTITION BY c.PK_Customer ORDER BY o.OrderDate DESC);
You can LEFT JOIN only record with the latest date:
--CREATE TABLE [Order]
--(
-- PK_Order int,
-- FK_Customer int,
-- OrderDate date,
-- Total decimal(10,2)
--)
--INSERT [Order] VALUES
--(1,1,'2020-01-20',150),
--(2,1,'2020-01-25',200)
--CREATE TABLE Customer
--(
-- PK_Customer int,
-- Name nvarchar(20),
-- Age int
--)
--INSERT [Customer] VALUES
--(1,'John Miller',25),
--(2,'Max Monroe',28)
SELECT *
FROM dbo.Customer C
LEFT OUTER JOIN dbo.[Order] O
ON C.PK_Customer = O.FK_Customer
AND OrderDate=(SELECT MAX(OrderDate) FROM [Order] WHERE [Order].FK_Customer=O.FK_Customer)
Note 1: Since there can be many orders in recent date, I preserve all.
Note 2: It's not a good idea to keep age - it must be updated every year. Keep date of birth.
A similar way to Tim's answer but the difference is that the Partition by is within orders table and joining on Row =1 for each customer.
select * from #Customer c
left join
(select ROW_NUMBER() over (partition by FK_Customer order by OrderDate desc) as order_NUM,
PK_Order,
FK_Customer,
OrderDate,
Total from #Order
) o on c.PK_Customer = o.FK_Customer and order_NUM = 1
order by c.PK_Customer, o.OrderDate desc

Count top 5 persons that were most together

I have a check-in table that consists of the flowing columns:
PK CheckInID int
PersonID int
CheckInDate smalldatetime
I'm trying to create a query that gives me a top 3 of persons who most frequently were checked-in together for a specific person.
For example:
personID 1 was
18 times together with personID 3
13 times together with personID 9
11 times together with personID 4
Implementing this in C# is not really a problem for me but I want to create a stored procedure and TSQL is not really my strong side.
Assuming that date is designator:
SELECT TOP 3 PersonId, COUNT(*) cnt
FROM your_table
WHERE CheckInDate IN (SELECT CheckInDate
FROM your_table
WHERE PersonId = ?)
AND PersonId <> ? -- do not count the same person
GROUP BY PersonId
ORDER BY cnt DESC;
A faster way (no subquery and no "IN" statement) is :
SELECT TOP 3 T2.PersonId
, SUM(1) AS NB_TIME_CHECKED_IN_WITH_XXX
FROM your_table AS T1
INNER JOIN your_table AS T2 ON (T1.[PK CheckInID]=T2.[PK CheckInID] AND T2.PersonId <> XXX)
WHERE T1.PersonId = XXX
GROUP BY PersonId
ORDER BY NB_TIME_CHECKED_IN_WITH_XXX DESC;

SQL Server - Select most recent records with condition

I have a table like this.
Table :
ID EnrollDate ExitDate
1 4/1/16 8/30/16
2 1/1/16 null
2 1/1/16 7/3/16
3 2/1/16 8/1/16
3 2/1/16 9/1/16
4 1/1/16 12/12/16
4 1/1/16 12/12/16
4 1/1/16 12/12/16
4 1/1/16 null
5 5/1/16 11/12/16
5 5/1/16 11/12/16
5 5/1/16 11/12/16
Need to select the most recent records with these conditions.
One and only one record has the most recent enroll date - select that
Two or more share same most recent enroll date and one and only one record has either a NULL Exit Date or the most recent Exit Date - Select the record with null. If no null record pick the record with recent exit date
Two or more with same enroll and Exit Date - If this case exists, don't select those record
So the expected result for the above table should be :
ID EnrollDate ExitDate
1 4/1/16 8/30/16
2 1/1/16 null
3 2/1/16 9/1/16
4 1/1/16 null
I wrote the query with group by. I am not sure how to select with the conditions 2 and 3.
select t1.* from table t1
INNER JOIN(SELECT Id,MAX(EnrollDate) maxentrydate
FROM table
GROUP BY Id)t2 ON EnrollDate = t2.maxentrydate and t1.Id=t2.Id
Please let me know what is the best way to do this.
Using the rank() window function, I think it's possible.
This is untested, but it should work:
select t.ID, t.EnrollDate, t.ExitDate
from (select t.*,
rank() over(
partition by ID
order by EnrollDate desc,
case when ExitDate is null then 1 else 2 end,
ExitDate desc) as rnk
from tbl t) t
where t.rnk = 1
group by t.ID, t.EnrollDate, t.ExitDate
having count(*) = 1
The basic idea is that the rank() window function will rank the most "recent" rows with a value of 1, which we filter on in the outer query's where clause.
If more than one row have the same "most recent" data, they will all share the same rank of 1, but will get filtered out by the having count(*) = 1 clause.
Use ROW_NUMBER coupled with CASE expression to achieve the desired result:
WITH Cte AS(
SELECT t.*,
ROW_NUMBER() OVER(
PARTITION BY t.ID
ORDER BY
t.EnrollDate DESC,
CASE WHEN t.ExitDate IS NULL THEN 0 ELSE 1 END,
t.ExitDate DESC
) AS rn
FROM Tbl t
INNER JOIN (
SELECT
ID,
COUNT(DISTINCT CHECKSUM(EnrollDate, ExitDate)) AS DistinctCnt, -- Count distinct combination of EnrollDate and ExitDate per ID
COUNT(*) AS RowCnt -- Count number of rows per ID
FROM Tbl
GROUP BY ID
) a
ON t.ID = a.ID
WHERE
(a.DistinctCnt = 1 AND a.RowCnt = 1)
OR a.DistinctCnt > 1
)
SELECT
ID, EnrollDate, ExitDate
FROM Cte c
WHERE Rn = 1
The ORDER BY clause in the ROW_NUMBER takes care of conditions 2 and 3.
The INNER JOIN and the WHERE clause take care of 1 and 4.
ONLINE DEMO
with B as (
select id, enrolldate ,
exitdate,
row_number() over (partition by id order by enrolldate desc, case when exitdate is null then 0 else 1 end, exitdate desc) rn
from ab )
select b1.id, b1.enrolldate, b1.exitdate from b b1
left join b b2
on b1.rn = b2.rn -1 and
b1.id = b2.id and
b1.exitdate = b2.exitdate and
b1.enrolldate = b2.enrolldate
where b1.rn = 1 and
b2.id is nULL
The left join is used to fullfill the 3) requirement. When record is returned then we don't want it.

SQL sum of all records and latest record of each user

I have a table like this:
id memberId memberType points date
---- ------------ ------------- ----------- ------------
1 1001 type1 5.5 01/01/2015
2 1002 type2 4.2 01/02/2015
3 1002 type2 2.1 01/15/2015
4 1001 type2 1.5 01/15/2015
5 1002 type1 3.6 01/17/2015
I need to make an SQL view that will show the sum of points for each memberId and their latest memberType like below:
memberId Type totalPoints
----------- -------------- -----------
1001 type2 7.0
1002 type1 9.9
I tried the query below:
SELECT memberId, MAX(memberType) as Type, SUM(points) as totalPoints
FROM dbo.PointsEarning
GROUP BY memberId
But of course this only works if the latest type is the max type. Plus some of my membertypes are purely alpha. I would appreciate if anyone can provide the most efficient way of doing this since I will be using it for a table with almost 30M records.
You can use sub-query with OVER clauses in following:
SELECT memberId,
memberType AS Type,
points AS totalPoints
FROM (SELECT memberId,
memberType,
date,
SUM(points) OVER(PARTITION BY memberId) points,
ROW_NUMBER() OVER(PARTITION BY memberId ORDER BY date DESC) rn
FROM dbo.PointsEarning
)x
WHERE rn = 1
You can test It at SQL FIDDLE
;WITH CTE as
(
SELECT
SUM(points) OVER (PARTITION BY memberId) totalPoints,
ROW_NUMBER() over (PARTITION BY memberId ORDER BY [date] DESC) rn,
id, memberId, memberType, points, [date]
FROM yourtable
)
SELECT
*
FROM CTE
WHERE rn = 1
Try this:
SELECT p.memberId,
(SELECT p2.memberType
FROM PointsEarning p2
WHERE p.memberid = p2.memberid
AND NOT EXISTS(
SELECT 'NEXT'
FROM PointsEarning p3
WHERE p3.memberid = p2.memberid
AND p3.date > p2.date)
) as Type, SUM(p.points) as totalPoints
FROM dbo.PointsEarning p
GROUP BY p.memberId
I assume that for every group you want to select the memberType of the last datein that group. You could use a subselect for this:
SELECT
memberId,
(SELECT TOP 1 memberType from PointsEarning p
WHERE p.memberId = PointsEarning.memberId ORDER BY date desc) as Type,
SUM(points) as totalPoints
FROM dbo.PointsEarning
GROUP BY memberId
Update
After our conversation in the comments, assuming nobody ever inserts values manually to your id column (using set identity insert) then you can use it to find the last record. so instead of max(date) just use max(id):
;WITH cte AS
(
SELECT memberId, MAX(id) as LastId, SUM(points) as totalPoints
FROM dbo.PointsEarning
GROUP BY memberId
)
SELECT cte.memberId, p.memberType, cte.totalPoints
FROM dbo.PointsEarning p
INNER JOIN cte ON(p.Id = cte.Id)
Note: This should provide a single record for each member, and should probably work faster then the first version (if id is actually the clustered index of the table).
First version
One way to do it is to use a common table expression:
;WITH cte AS
(
SELECT memberId, MAX(date) as LastDate, SUM(points) as totalPoints
FROM dbo.PointsEarning
GROUP BY memberId
)
SELECT memberId, memberType, totalPoints
FROM dbo.PointsEarning p
INNER JOIN cte ON(p.memberId = cte.memberId AND p.date = cte.LastDate)
Note: Code was written directly here and not tested. there might be some mistakes.

Subtract top two rows from one column using one id

does anyone know how can I subtract top two rows from one column only using one id? Here's my sample query:
SELECT top 2 a.consumption,
coalesce(a.consumption -
(SELECT b.consumption
FROM tbl_t_billing b
WHERE b.id = a.id + 1), a.consumption) AS diff
FROM tbl_t_billing a
WHERE a.customerId = '5'
ORDER BY a.dateCreated DESC
I want to know how to get the difference between the top 2 rows using one id from the consumption column using the customerId #5. I've tried but I can't get the right query for that. Can somebody help me please? Thanks!
try this:
;with cte as
(
select consumption, customerId,
row_number() over (partiton by customerid order by datecreated desc) rn
from tbl_t_billing where customerId = '5'
)
select a.customerId, a.consumption,
coalesce((a.consumption - b.consumption), a.consumption) consumption_diff
from cte a left outer join cte b on a.rn + 1 = b.rn
where a.rn = 1
declare #tbl_t_billing table(consumption int, customerId int, datecreated datetime)
insert into #tbl_t_billing
values
(10,5,'20100101'),
(7,5,'20000101'),
(9,4,'20100101'),
(5,4,'20000101'),
(8,3,'20100101'),
(3,3,'20000101'),
(7,2,'20100101'),
(3,2,'20000101'),
(4,1,'20100101'),
(2,1,'20000101')
-- get the difference between the last two consumption values for each customerId
select
customerId,
sum(consumption) diff
from(
select
customerId,
consumption *
case row_number() over(partition by customerId order by datecreated desc)
when 1 then 1 when 2 then -1
end consumption
from #tbl_t_billing
) t
group by customerId

Resources