Select all records within the last month - sql-server

Using SQL Server T-SQL syntax how can you find all records within the last/max transaction month for a specific customer?
Given the following records:
CUSTOMER_ID | TRANSACTION_DATE
------------------------------
00001 | 04/21/2013
00001 | 05/01/2013
00001 | 05/14/2013
00002 | 06/08/2013
00002 | 07/01/2013
00002 | 07/28/2013
The output of the query should look like:
CUSTOMER_ID | TRANSACTION_DATE
------------------------------
00001 | 05/01/2013
00001 | 05/14/2013
00002 | 07/01/2013
00002 | 07/28/2013
The best I've come up with is this query (not tested), which looks horribly inefficient.
select customer_id, transaction_date
from customer_table outer
where concat(month(transaction_date), year(transaction_date)) = (
select concat(month(max(transaction_date)), year(max(transaction_date)))
from customer_table inner
where outer.customer_id = inner.customer_id
)

;with CTE as (
select
customer_id, transaction_date,
rank() over(
partition by customer_id
order by year(transaction_date) desc, month(transaction_date) desc
) as row_num
from customer_table
)
select *
from CTE
where row_num = 1
SQL FIDDLE EXAMPLE
another way to do it:
;with CTE as (
select
customer_id, dateadd(month, datediff(month, 0, max(transaction_date)), 0) as date
from #customer_table
group by customer_id
)
select ct.*
from CTE as c
inner join #customer_table as ct on
ct.transaction_date >= c.date and ct.customer_id = c.customer_id
SQL FIDDLE EXAMPLE

SELECT T1.*
FROM Table1 T1
JOIN
(
SELECT CUSTOMER_ID,
MAX(TRANSACTION_DATE) AS TRANSACTION_DATE
FROM Table1
GROUP BY CUSTOMER_ID
) T2
ON T1.CUSTOMER_ID = T2.CUSTOMER_ID
WHERE MONTH(T1.TRANSACTION_DATE) = MONTH(T2.TRANSACTION_DATE)
AND YEAR(T1.TRANSACTION_DATE) = YEAR(T2.TRANSACTION_DATE)
I am leaving the above for reference.
I have come to the following:
WITH MyCTE AS
(
SELECT [CUSTOMER_ID],
MAX(DATEADD(month, DATEDIFF(month, 0, [TRANSACTION_DATE]), 0)) AS StartOfMonth
FROM Table1
GROUP BY [CUSTOMER_ID]
)
SELECT T2.*
FROM MyCTE M
Join Table1 T2
ON DATEADD(month, DATEDIFF(month, 0, [TRANSACTION_DATE]), 0) = StartOfMonth
Which is very similar to the Roman's. The difference is that i have an equality rather than greater than. The execution plan seems better, and that is why i post it.
I have here the fiddle of all, but still Roman's first seems to be the best.

Related

return last non-null value in left join

I have a query that goes like this
select
a.date_field,
b.date_field,
b.interested_data
from tbl_dates a
left join tbl_data b
on a.date_field = b.date_field
this gives me a resultset like:
a_date_field | b_date_field | b_interested_data
2022-01-01 | 2022-01-01 | data_1
2022-01-02 | 2022-01-02 | data_2
2022-01-03 | null | null
is it possible to return the last non-null value for b_date_field and b_interested_data in row 3? Ideally the result should be
a_date_field | b_date_field | b_interested_data
2022-01-01 | 2022-01-01 | data_1
2022-01-02 | 2022-01-02 | data_2
2022-01-03 | 2022-01-02 | data_2
I know that b_date_field would seem like an error but I am only interested in b_interested_data.
Basically I think there are two approaches. One is to use apply, one is to use last_value. I have prepared a small insert with two selects that return the same table, so pick the one you like most (EDIT: Pick the one that fits your solution and has the best performance in your case).
select
*
into #dates
from (values (1,cast('20220101' as date)),(2,cast('20220102' as date)),(3,cast('20220103' as date))) a(date_id,[date])
;
select
*
into #data
from (values ('data_1',cast('20220101' as date)),('data_2',cast('20220102' as date))) b(interested_data,[date])
;
--Solution 1
select distinct
last_value(a.date) over (partition by a.date order by b.date rows between current row and unbounded following)
, last_value(b.date) over (partition by a.date order by b.date rows between current row and unbounded following)
, last_value(b.interested_data) over (partition by a.date order by b.date rows between current row and unbounded following)
from #dates a
left join #data b on
b.date <= a.date
;
--Solution 2
select
a.date
, sub.date as b_date
, sub.interested_data
from #dates a
outer apply (
select top 1
b.date as date
, b.interested_data as interested_data
from #data b
where b.date <= a.date
order by b.date desc
) as sub
perhaps you can do it using a APPLY operator. Get the "last" row from table b based on date_field
select
a.date_field,
b.date_field,
b.interested_data
from tbl_dates a
cross apply
(
select TOP (1) b.date_field, b.interested_data
from tbl_data b
where a.date_field >= b.date_field
order by b.date_field desc
) b
Using lag() and coalese() we can do it: assuming a_date_Field is the order we can use to determine the "Prior" value.
WITH CTE AS (SELECT '2022-01-01' a_date_field, '2022-01-01' b_date_field, 'data_1' b_interested_data UNION ALL
SELECT '2022-01-02', '2022-01-02', 'data_2' UNION ALL
SELECT '2022-01-03', null, null)
SELECT a_date_Field,
coalesce(B_Date_Field,lag(B_date_Field) over (order by a_date_Field)),
coalesce(B_Interested_Data,lag(B_Interested_Data) over (order by a_date_Field)) FROM CTE
Giving us:
+--------------+------------------+------------------+
| a_date_Field | (No column name) | (No column name) |
+--------------+------------------+------------------+
| 2022-01-01 | 2022-01-01 | data_1 |
| 2022-01-02 | 2022-01-02 | data_2 |
| 2022-01-03 | 2022-01-02 | data_2 |
+--------------+------------------+------------------+
Thanks everyone. I realized from your answers that the join condition can also be >= or <= and not only =. This is the solution I came up with:
drop table if exists #dates;
select
*
into #dates
from (values (1,cast('20220101' as date)),(2,cast('20220102' as date)),(3,cast('20220103' as date))) a(date_id,[date])
;
drop table if exists #data;
select
*
into #data
from (values ('data_1',cast('20220101' as date)),('data_2',cast('20220102' as date))) b(interested_data,[date])
;
select
ab.a_date,
ab.b_date,
ab.interested_data
from (
select
a.date a_date,
b.date b_date,
b.interested_data,
row_number() over (
partition by
a.date
order by
a.date,
b.date desc
) rn
from #dates a
left join #data b
on a.date >= b.date
) ab
where
ab.rn = 1
;

Referencing the current row outer apply column within separate outer join

Recently I've been tasked with creating a report that outputs sales information by Date of Business and Hour of the Day.
Here is the query I have currently written.
WITH CTE AS
(
SELECT 0 AS Count
UNION ALL
SELECT Count + 1
FROM CTE
WHERE Count + 1 <= 23
),
ALLDATES AS
(
SELECT CONVERT(datetime, #startDate) AS [DOB]
UNION ALL
SELECT DATEADD(DAY, 1, [DOB])
FROM AllDates
WHERE [DOB] < #endDate
)
SELECT D.DOB, A.Count AS [Hour], CONCAT(A.Count, ':00') AS [DisplayHour]
, B.OrderModeName, COALESCE(B.Sales_Total, 0) AS [Sales]
, COALESCE(B.Comps, 0) AS Comps, COALESCE(B.Promos, 0) AS Promos
FROM CTE AS A
OUTER APPLY (SELECT DOB FROM ALLDATES) D
LEFT OUTER JOIN (
SELECT DATEPART(HH, ItemDetail.TransactionTime) AS [Hour]
, OrderMode.OrderModeName, SUM(ItemDetail.GrossPrice) Sales_Total
, SUM(CompAmount) AS Comps, SUM(PromoAmount) AS Promos
FROM ItemDetail
INNER JOIN OrderMode ON OrderMode.OrderModeID = ItemDetail.OrderModeID
WHERE ItemDetail.DOB = D.DOB /*NEED HELP HERE*/ AND LocationID IN (
SELECT LocationID
FROM LocationGroupMember
WHERE LocationGroupID = '#locationGroupID'
)
GROUP BY ItemDetail.DOB, DATEPART(HH, ItemDetail.TransactionTime), OrderMode.OrderModeName
) AS B
ON A.Count = B.Hour
ORDER BY D.DOB, A.Count
Where I am struggling is being able to reference the current row's DOB column that is coming from the OUTER APPLY.
I have tried WHERE ItemDetail.DOB = D.DOB, however I receive an error that the identifier can't be bound. Am I correct that in understanding that the outer applied data is not visible to the subquery within the join?
Here is an example of the output I'm expecting:
DOB | Hour | Display Hour | OrderModeName | Sales | Comps | Promos
1/8/2020 | 17 | 17:00 | Order | 163.17 | 0 | 0 <-- Sales for Hour and Order Mode present
1/8/2020 | 23 | 23:00 | | 0 | 0 | 0 <-- No sales at all for a given hour
Thanks in advance for any direction and advice!
The basic pattern here is to CROSS JOIN to define the result "grain" and then LEFT JOIN the fact table to populate the rows for which data exists. EG
WITH CTE AS
(
SELECT 0 AS Count
UNION ALL
SELECT Count + 1
FROM CTE
WHERE Count + 1 <= 23
),
ALLDATES AS
(
SELECT CONVERT(datetime, #startDate) AS [DOB]
UNION ALL
SELECT DATEADD(DAY, 1, [DOB])
FROM AllDates
WHERE [DOB] < #endDate
),
ALLHOURS as
(
SELECT D.DOB, A.Count AS [Hour], CONCAT(A.Count, ':00') AS [DisplayHour]
FROM CTE AS A
CROSS JOIN ALLDATES D
),
ITEM_SUMMARY as
(
SELECT DOB, DATEPART(HH, ItemDetail.TransactionTime) AS [Hour], OrderMode.OrderModeName, SUM(ItemDetail.GrossPrice) Sales_Total, SUM(CompAmount) AS Comps, SUM(PromoAmount) AS Promos
FROM ItemDetail
INNER JOIN OrderMode ON OrderMode.OrderModeID = ItemDetail.OrderModeID
AND LocationID IN (SELECT LocationID FROM LocationGroupMember WHERE LocationGroupID = #locationGroupID)
where DOB >= #startDate
and DOB < #endDate
GROUP BY ItemDetail.DOB, DATEPART(HH, ItemDetail.TransactionTime), OrderMode.OrderModeName
)
select ALLHOURS.DOB,
ALLHOURS.Count AS [Hour],
CONCAT(ALLHOURS.Count, ':00') AS [DisplayHour],
ITEM_SUMMARY.OrderModeName,
COALESCE(ITEM_SUMMARY.Sales_Total, 0) AS [Sales],
COALESCE(ITEM_SUMMARY.Comps, 0) AS Comps,
COALESCE(ITEM_SUMMARY.Promos, 0) AS Promos
from ALLHOURS
LEFT OUTER JOIN ITEM_SUMMARY
on ITEM_SUMMARY.DOB = ALLHOURS.DOB
and ITEM_SUMMARY.Hour = ALLHOURS.Hour

SQL Server how to sum max for specific category?

Got a problem when constructing a analysis SQL using SQL Server
The raw data as below
GameID | UsrRegID | Score_User
281 | 1 | 1
281 | 1 | 2
281 | 1 | 3
282 | 1 | 0
282 | 1 | 0
282 | 1 | 1
283 | 1 | 2
283 | 1 | 3
Below is the expect output result:
Distinct_Count_GameID | UsrRegID | Score_User
3 | 1 | 7
The logic for calculating the Score_user as below:
Sum(Max(Score_user) for each GemeID)
So the result need to be 3+1+3=7.
Can using the pure SQL to get the above expecting output?
I think we need to aggregate twice here. One option uses ROW_NUMBER:
WITH cte AS (
SELECT GameID, UsrRegID, Score_User,
ROW_NUMBER() OVER (PARTITION BY GameID, UsrRegID ORDER BY Score_User DESC) rn
FROM yourTable
)
SELECT
UsrRegID,
COUNT(DISTINCT GameID) AS Distinct_Count_GameID,
SUM(Score_User) AS Score_User
FROM cte
WHERE rn = 1
GROUP BY
UsrRegID;
You can't do an aggregate of an aggregate on the same SELECT, you can chain them together with CTE or subqueries.
;WITH Maxs AS
(
SELECT
T.GameID,
T.UsrRegID,
MaxScore = MAX(T.Score_User)
FROM
YourTable AS T
GROUP BY
T.GameID,
T.UsrRegID
)
SELECT
M.UsrRegID,
Distinct_Count_GameID = COUNT(DISTINCT(M.GameID)),
Score_User = SUM(M.MaxScore)
FROM
Maxs AS M
GROUP BY
M.UsrRegID
You can also try like following.
SELECT Count(DISTINCT [rgameid]) Distinct_Count_GameID,
Count(DISTINCT [usrregid]) UsrRegID,
(SELECT Sum(M)
FROM (SELECT Max([score_user]) M
FROM [TableName]
GROUP BY [rgameid])t) AS Score_User
FROM [TableName]
DEMO
First find maximum value of score for each GameId and UsrRegID and then find SUM() for the column, Score_User and group it by the columns, GameID and UsrRegID using GROUP BY clause.
Query
select count(distinct [t].[GameID]) as [GameID], [t].[UsrRegID],
sum([t].[Score_User]) as [Score_User] from(
select [GameID], [UsrRegID], max([Score_User]) as [Score_User]
from [your_table_name]
group by [GameID], [UsrRegID]
) as [t]
group by [t].[UsrRegID];
Or, give a row number based on the descending order of score value and group by GameID and UsrRegID. Then find the count of distinct GameId and sum of maximum score.
Query
;with cte as(
select [rn] = row_number() over(
partition by [GameID], [UsrRegID]
order by [Score_User] desc
), *
from [your_table_name]
)
select count(distinct [GameID]) as [GameID], [UsrRegID],
sum([Score_User]) as [Score_User] from cte
where [rn] = 1
group by [UsrRegID];
Aggregates and a COUNT(Distinct GameID):
declare #raw as table (GameID int, UsrRegID int, Score_user int)
insert into #raw values (281, 1, 1)
,(281, 1, 2)
,(281, 1, 3)
,(282, 1, 0)
,(282, 1, 0)
,(282, 1, 1)
,(283, 1, 2)
,(283, 1, 3)
select count(distinct GameID) as Distinct_Count_GameID, UsrRegID, sum(max_score_user)
from
(
select GameID
, UsrRegID
, max(score_user) as max_score_user
from #raw
group by GameID, UsrRegID
) a
group by a.UsrRegID

How to get number of employees per year

I have a table called Mst_Employee. The fields are:
Emp_No | Emp_Name | Emp_JoiningDate | Emp_ResignedDate | Emp_Status
How do I get the No. of Employees by year for each year somebody joined or resigned? (Joined and Resigned includes by year)
E.g. result should look like this:
Year No. of Employees.
------------------------
2011 125
2012 130
2013 100
One way to solve it is with a recursive cte and group by:
DECLARE #FromYear int, #ToYear int
SELECT #FromYear = YEAR(MIN(Emp_JoiningDate)),
#ToYear = YEAR(GETDATE())
FROM Mst_Employee
;WITH CTE AS
(
SELECT #FromYear As TheYear
UNION ALL
SELECT TheYear + 1
FROM CTE
WHERE TheYear < #ToYear
)
SELECT TheYear as [Year],
COUNT
(
CASE WHEN TheYear <= YEAR(COALESCE(Emp_ResignedDate, GETDATE())) THEN
1
END
) As [No. of Employees.]
FROM CTE
INNER JOIN Mst_Employee ON(TheYear >= YEAR(Emp_JoiningDate))
GROUP BY TheYear
See fiddle here
You can achieve this with:
select y as [Year], count(*) as [No. of Employees.]
from(select Emp_No, YEAR(Emp_JoiningDate) as y from Mst_Employee
union
select Emp_No, YEAR(Emp_ResignedDate) from Mst_Employee
where Emp_ResignedDate is not null)t
group by y

SQL multiple start dates to end date

I have a table with the following format (which I cannot change)
ClientID | RefAd1 | Cluster Start Date | Cluster End Date
100001 | R1234 | 2014-11-01 |
100001 | R1234 | 2014-11-10 |
100001 | R1234 | 2014-11-20 |
What I would like to come out with is:
ClientID | RefAd1 | Cluster Start Date | Cluster End Date
100001 | R1234 | 2014-11-01 | 2014-11-10
100001 | R1234 | 2014-11-10 | 2014-11-20
100001 | R1234 | 2014-11-20 | NULL
I've searched on here, and had many attempts myself, but just can't get it working.
I can't update the source table (or add another table into the database) so I'm going to do this in a view (which I can save)
Any help would be gratefully appreciated, been going round in circles with this for a day and a bit now!
Use Self join to get next record
;WITH CTE AS
(
SELECT ROW_NUMBER() OVER(ORDER BY [Cluster Start Date])RNO,*
FROM YOURTABLE
)
SELECT C1.ClientID,C1.RefAd1,C1.[Cluster Start Date],C2.[Cluster Start Date] [Cluster End Date]
FROM CTE C1
LEFT JOIN CTE C2 ON C1.RNO=C2.RNO-1
Click here to view result
EDIT :
To update the table, you can use the below query
;WITH CTE AS
(
SELECT ROW_NUMBER() OVER(ORDER BY [Cluster Start Date])RNO,*
FROM #TEMP
)
UPDATE #TEMP SET [Cluster End Date] = TAB.[Cluster End Date]
FROM
(
SELECT C1.ClientID,C1.RefAd1,C1.[Cluster Start Date],C2.[Cluster Start Date] [Cluster End Date]
FROM CTE C1
LEFT JOIN CTE C2 ON C1.RNO=C2.RNO-1
)TAB
WHERE TAB.[Cluster Start Date]=#TEMP.[Cluster Start Date]
Click here to view result
EDIT 2 :
If you want this to be done for ClientId and RefAd1.
;WITH CTE AS
(
-- Get current date and next date for each type of ClientId and RefAd1
SELECT ROW_NUMBER() OVER(PARTITION BY ClientID,RefAd1 ORDER BY [Cluster Start Date])RNO,*
FROM #TEMP
)
UPDATE #TEMP SET [Cluster End Date] = TAB.[Cluster End Date]
FROM
(
SELECT C1.ClientID,C1.RefAd1,C1.[Cluster Start Date],C2.[Cluster Start Date] [Cluster End Date]
FROM CTE C1
LEFT JOIN CTE C2 ON C1.RNO=C2.RNO-1 AND C1.ClientID=C2.ClientID AND C1.RefAd1=C2.RefAd1
)TAB
WHERE TAB.[Cluster Start Date]=#TEMP.[Cluster Start Date] AND TAB.ClientID=#TEMP.ClientID AND TAB.RefAd1=#TEMP.RefAd1
Click here to view result
If you want to do it only for ClientId, remove the conditions for RefAd1
Here is the script if you just want the view you described:
CREATE VIEW v_name as
SELECT
ClientId,
RefAd1,
[Cluster Start Date],
( SELECT
min([Cluster Start Date])
FROM yourTable
WHERE
t.[Cluster Start Date] < [Cluster Start Date]
) as [Cluster End Date]
FROM yourtable t

Resources