SQL multiple start dates to end date - sql-server

I have a table with the following format (which I cannot change)
ClientID | RefAd1 | Cluster Start Date | Cluster End Date
100001 | R1234 | 2014-11-01 |
100001 | R1234 | 2014-11-10 |
100001 | R1234 | 2014-11-20 |
What I would like to come out with is:
ClientID | RefAd1 | Cluster Start Date | Cluster End Date
100001 | R1234 | 2014-11-01 | 2014-11-10
100001 | R1234 | 2014-11-10 | 2014-11-20
100001 | R1234 | 2014-11-20 | NULL
I've searched on here, and had many attempts myself, but just can't get it working.
I can't update the source table (or add another table into the database) so I'm going to do this in a view (which I can save)
Any help would be gratefully appreciated, been going round in circles with this for a day and a bit now!

Use Self join to get next record
;WITH CTE AS
(
SELECT ROW_NUMBER() OVER(ORDER BY [Cluster Start Date])RNO,*
FROM YOURTABLE
)
SELECT C1.ClientID,C1.RefAd1,C1.[Cluster Start Date],C2.[Cluster Start Date] [Cluster End Date]
FROM CTE C1
LEFT JOIN CTE C2 ON C1.RNO=C2.RNO-1
Click here to view result
EDIT :
To update the table, you can use the below query
;WITH CTE AS
(
SELECT ROW_NUMBER() OVER(ORDER BY [Cluster Start Date])RNO,*
FROM #TEMP
)
UPDATE #TEMP SET [Cluster End Date] = TAB.[Cluster End Date]
FROM
(
SELECT C1.ClientID,C1.RefAd1,C1.[Cluster Start Date],C2.[Cluster Start Date] [Cluster End Date]
FROM CTE C1
LEFT JOIN CTE C2 ON C1.RNO=C2.RNO-1
)TAB
WHERE TAB.[Cluster Start Date]=#TEMP.[Cluster Start Date]
Click here to view result
EDIT 2 :
If you want this to be done for ClientId and RefAd1.
;WITH CTE AS
(
-- Get current date and next date for each type of ClientId and RefAd1
SELECT ROW_NUMBER() OVER(PARTITION BY ClientID,RefAd1 ORDER BY [Cluster Start Date])RNO,*
FROM #TEMP
)
UPDATE #TEMP SET [Cluster End Date] = TAB.[Cluster End Date]
FROM
(
SELECT C1.ClientID,C1.RefAd1,C1.[Cluster Start Date],C2.[Cluster Start Date] [Cluster End Date]
FROM CTE C1
LEFT JOIN CTE C2 ON C1.RNO=C2.RNO-1 AND C1.ClientID=C2.ClientID AND C1.RefAd1=C2.RefAd1
)TAB
WHERE TAB.[Cluster Start Date]=#TEMP.[Cluster Start Date] AND TAB.ClientID=#TEMP.ClientID AND TAB.RefAd1=#TEMP.RefAd1
Click here to view result
If you want to do it only for ClientId, remove the conditions for RefAd1

Here is the script if you just want the view you described:
CREATE VIEW v_name as
SELECT
ClientId,
RefAd1,
[Cluster Start Date],
( SELECT
min([Cluster Start Date])
FROM yourTable
WHERE
t.[Cluster Start Date] < [Cluster Start Date]
) as [Cluster End Date]
FROM yourtable t

Related

return last non-null value in left join

I have a query that goes like this
select
a.date_field,
b.date_field,
b.interested_data
from tbl_dates a
left join tbl_data b
on a.date_field = b.date_field
this gives me a resultset like:
a_date_field | b_date_field | b_interested_data
2022-01-01 | 2022-01-01 | data_1
2022-01-02 | 2022-01-02 | data_2
2022-01-03 | null | null
is it possible to return the last non-null value for b_date_field and b_interested_data in row 3? Ideally the result should be
a_date_field | b_date_field | b_interested_data
2022-01-01 | 2022-01-01 | data_1
2022-01-02 | 2022-01-02 | data_2
2022-01-03 | 2022-01-02 | data_2
I know that b_date_field would seem like an error but I am only interested in b_interested_data.
Basically I think there are two approaches. One is to use apply, one is to use last_value. I have prepared a small insert with two selects that return the same table, so pick the one you like most (EDIT: Pick the one that fits your solution and has the best performance in your case).
select
*
into #dates
from (values (1,cast('20220101' as date)),(2,cast('20220102' as date)),(3,cast('20220103' as date))) a(date_id,[date])
;
select
*
into #data
from (values ('data_1',cast('20220101' as date)),('data_2',cast('20220102' as date))) b(interested_data,[date])
;
--Solution 1
select distinct
last_value(a.date) over (partition by a.date order by b.date rows between current row and unbounded following)
, last_value(b.date) over (partition by a.date order by b.date rows between current row and unbounded following)
, last_value(b.interested_data) over (partition by a.date order by b.date rows between current row and unbounded following)
from #dates a
left join #data b on
b.date <= a.date
;
--Solution 2
select
a.date
, sub.date as b_date
, sub.interested_data
from #dates a
outer apply (
select top 1
b.date as date
, b.interested_data as interested_data
from #data b
where b.date <= a.date
order by b.date desc
) as sub
perhaps you can do it using a APPLY operator. Get the "last" row from table b based on date_field
select
a.date_field,
b.date_field,
b.interested_data
from tbl_dates a
cross apply
(
select TOP (1) b.date_field, b.interested_data
from tbl_data b
where a.date_field >= b.date_field
order by b.date_field desc
) b
Using lag() and coalese() we can do it: assuming a_date_Field is the order we can use to determine the "Prior" value.
WITH CTE AS (SELECT '2022-01-01' a_date_field, '2022-01-01' b_date_field, 'data_1' b_interested_data UNION ALL
SELECT '2022-01-02', '2022-01-02', 'data_2' UNION ALL
SELECT '2022-01-03', null, null)
SELECT a_date_Field,
coalesce(B_Date_Field,lag(B_date_Field) over (order by a_date_Field)),
coalesce(B_Interested_Data,lag(B_Interested_Data) over (order by a_date_Field)) FROM CTE
Giving us:
+--------------+------------------+------------------+
| a_date_Field | (No column name) | (No column name) |
+--------------+------------------+------------------+
| 2022-01-01 | 2022-01-01 | data_1 |
| 2022-01-02 | 2022-01-02 | data_2 |
| 2022-01-03 | 2022-01-02 | data_2 |
+--------------+------------------+------------------+
Thanks everyone. I realized from your answers that the join condition can also be >= or <= and not only =. This is the solution I came up with:
drop table if exists #dates;
select
*
into #dates
from (values (1,cast('20220101' as date)),(2,cast('20220102' as date)),(3,cast('20220103' as date))) a(date_id,[date])
;
drop table if exists #data;
select
*
into #data
from (values ('data_1',cast('20220101' as date)),('data_2',cast('20220102' as date))) b(interested_data,[date])
;
select
ab.a_date,
ab.b_date,
ab.interested_data
from (
select
a.date a_date,
b.date b_date,
b.interested_data,
row_number() over (
partition by
a.date
order by
a.date,
b.date desc
) rn
from #dates a
left join #data b
on a.date >= b.date
) ab
where
ab.rn = 1
;

UNION Returns NULL When First SELECT Returns Nothing

Hi I have a table: T1 that contains two columns Date and Price
T1
---------------------------
DATE | PRICE |
---------------------------
2018-07-25 |2.00 |
---------------------------
2018-06-20 |3.00 |
---------------------------
2017-05-10 |3.00 |
---------------------------
Here are my requirements:
If a user enters a date that is not in the DB I need to return the last price and date in the table T1.
If a user enters a date that is superior or inferior to one of the dates in the table T1 -- for example if a user enters '2017-05-09' which is not in the table; I have to return the next date above the given date. In this case'2017-05-10'
I am using UNION in my script but it returns empty when one of the SELECT statements returns empty.
I am using a CTE table:
DECLARE #DateEntered DATE
WITH HistoricalCTE (Date, Price, RowNumber) AS (
SELECT R.Date,
R.Price,
ROW_NUMBER() OVER (PARTITION BY R.Date, R.Price ORDER BY Date DESC)
FROM T1 R
WHERE Date = #DateEntered
UNION
SELECT R.Date,
R.Price,
ROW_NUMBER() OVER (PARTITION BY R.Date, R.Price ORDER BY Date DESC)
FROM T1 R
WHERE Date < #DateEntered
UNION
SELECT R.Date,
R.Price,
ROW_NUMBER() OVER (PARTITION BY R.Date, R.Price ORDER BY Date DESC)
FROM T1 R
WHERE Date > #DateEntered
)
The issue is when I enter superior to all the dates in the table T1, I get an empty result because the first select is returning empty. Any idea about how I would solve this?
You might be overcomplicating this. If I read your question correctly, we can just take the smallest value greater than the input, or if that doesn't exist, then just take the max of the table.
WITH cte AS (
SELECT *,
ROW_NUMBER() OVER (ORDER BY Date) rn
FROM T1
WHERE Date > #DateEntered
)
SELECT
CASE WHEN EXISTS (SELECT 1 FROM cte WHERE rn = 1)
THEN (SELECT Date FROM cte WHERE rn = 1)
ELSE (SELECT MAX(Date) FROM T1) END AS Date,
CASE WHEN EXISTS (SELECT 1 FROM cte WHERE rn = 1)
THEN (SELECT Price FROM cte WHERE rn = 1)
ELSE (SELECT Price FROM T1 WHERE Date = (SELECT MAX(Date) FROM T1)) END AS Price;
Demo
All the edge cases seem to be working in the above demo, and you may test any input date against your sample data.

Merge rows based on the same date?

I have a table that looks like the below
Date | ID | Period | ArchivedBy | ArchivedFlag | Value
2018-01-20 12:23 |23344 | Q1 | NULL | NULL | 200
2018-01-20 12:20 |23344 | NULL | P.Tills | 1 | NULL
2018-01-20 12:19 |23344 | NULL | NULL | 1 | NULL
This table represents all edits made to an agreement (each new edit gets it's own row). If a value hasn't been changed at all, it will say NULL.
so ideally the above would look like the following
Date | ID | Period | ArchivedBy | ArchivedFlag | Value
2018-01-20 |23344 | Q1 | P.Tills | 1 | 200
This returned row should show the latest state of the agreement based on the date. So for the date in my example (2018-01-20) this one row would be returned, combining all changes that were made throughout the day into 1 row which shows how it looks following all the changes throughout the day.
I hope this makes sense?
Thank you!
Here is one way using Row_Number and Group by
SELECT [Date] = Cast([Date] AS DATE),
ID,
Max(period),
Max(ArchivedBy),
Max(ArchivedFlag),
Max(CASE WHEN rn = 1 THEN [Value] END)
FROM (SELECT *,
Rn = Row_number()OVER(partition BY Cast([Date] AS DATE), ID ORDER BY [Date] DESC)
FROM Yourtable)a
GROUP BY Cast([Date] AS DATE),
ID
I would propose 2 solutions.
Simple
For each day select top 1 NOT NULL value:
SELECT G.ID, G.GD Date, Period.*, ArchivedBy.*, Value.* FROM
(SELECT DISTINCT ID, CAST(Date AS Date) GD FROM T) G
CROSS APPLY (SELECT TOP 1 Period FROM T WHERE Period IS NOT NULL AND CAST(Date AS Date)=GD ORDER BY Date DESC) Period
CROSS APPLY (SELECT TOP 1 ArchivedBy FROM T WHERE ArchivedBy IS NOT NULL AND CAST(Date AS Date)=GD ORDER BY Date DESC) ArchivedBy
CROSS APPLY (SELECT TOP 1 Value FROM T WHERE Value IS NOT NULL AND CAST(Date AS Date)=GD ORDER BY Date DESC) Value
Optimized (intuitively, not tested*)
Use varbinary sorting rules and aggregation, manually order NULLs:
SELECT CAST(Date AS Date), ID,
CAST(SUBSTRING(MAX(Arch),9, LEN(MAX(Arch))) AS varchar(10)) ArchivedBy --unbox
--other columns
FROM
(
SELECT Date, ID,
CAST(CASE WHEN ArchivedBy IS NOT NULL THEN ROW_NUMBER() OVER (PARTITION BY CAST(Date AS Date) ORDER BY Date) ELSE 0 END AS varbinary(MAX))+CAST(ArchivedBy AS varbinary(MAX)) Arch --box
--other columns
FROM T
) Tab
GROUP BY ID, CAST(Date AS Date)

SQL Find pairs of data in rows and convert to columns

I'm trying to setup a query to pull employee tenure reports. I have an employee status table that tracks information for each employee (e.g. -Hire Date, Term Date, Salary Change, etc.) The table looks like this:
EmployeeID | Date | Event
1 | 1/1/99 | 1
2 | 1/2/99 | 1
1 | 1/3/99 | 2
1 | 1/4/99 | 1
I used a pivot table to move the table from a vertical layout to a horizontal layout
SELECT [FK_EmployeeID], MAX([1]) AS [Hire Date], ISNULL(MAX([2]), DATEADD(d, 1, GETDATE())) AS [Term Date]
FROM DT_EmployeeStatusEvents PIVOT (MAX([Date]) FOR [EventType] IN ([1], [2])) T
GROUP BY [FK_EmployeeID]
I get a result like this:
EmployeeID | 1 | 2
1 | 1/4/99 | 1/3/99
2 | 1/2/99 | *null*
However, the problem I run into is that I need both sets of values for each employee. (We hire a lot of recurring seasonals) What I would like is a way to convert the columns to rows selecting the hire date (1) and the very next term date (2) for each employee like this:
EmployeeID | 1 | 2
1 | 1/1/99 | 1/3/99
2 | 1/2/99 | *null*
1 | 1/4/99 | *null*
Is this possible? I've looked at a lot of the PIVOT examples and they all show an aggregate function.
The problem is that you are attempting to pivot a datetime value so you are limited to using either max or min as the aggregate function. When you use those you will only return one row for each employeeid.
In order to get past this you will need to have some value that will be used during the grouping of your data - I would suggest using a windowing function like row_number(). You can make your subquery:
select employeeid, date, event
, row_number() over(partition by employeeid, event
order by date) seq
from DT_EmployeeStatusEvents
See SQL Fiddle with Demo. This creates a unique value for each employeeId and event combination. This new number will then be grouped on so you can return multiple rows. You full query will be:
select employeeid, [1], [2]
from
(
select employeeid, date, event
, row_number() over(partition by employeeid, event
order by date) seq
from DT_EmployeeStatusEvents
) d
pivot
(
max(date)
for event in ([1], [2])
) piv
order by employeeid;
See SQL Fiddle with Demo
This should get you started...
DECLARE #EMP TABLE (EMPID INT, dDATE DATETIME, EVENTTYPE INT)
INSERT INTO #EMP
SELECT 1,'1/1/99',1 UNION ALL
SELECT 2,'1/2/99',1 UNION ALL
SELECT 1,'1/3/99',2 UNION ALL
SELECT 1,'1/4/99',1
SELECT EMPID, HIRE, TERM
FROM (SELECT EMPID, dDATE, 'HIRE' AS X, ROW_NUMBER() OVER(PARTITION BY EMPID, EVENTTYPE ORDER BY DDATE) AS INSTANCE FROM #EMP WHERE EVENTTYPE=1
UNION ALL
SELECT EMPID, dDATE, 'TERM' AS X, ROW_NUMBER() OVER(PARTITION BY EMPID, EVENTTYPE ORDER BY DDATE) AS INSTANCE FROM #EMP WHERE EVENTTYPE=2) DATATABLE
PIVOT (MIN([DDATE])
FOR X IN ([HIRE],[TERM])) PIVOTTABLE

Select all records within the last month

Using SQL Server T-SQL syntax how can you find all records within the last/max transaction month for a specific customer?
Given the following records:
CUSTOMER_ID | TRANSACTION_DATE
------------------------------
00001 | 04/21/2013
00001 | 05/01/2013
00001 | 05/14/2013
00002 | 06/08/2013
00002 | 07/01/2013
00002 | 07/28/2013
The output of the query should look like:
CUSTOMER_ID | TRANSACTION_DATE
------------------------------
00001 | 05/01/2013
00001 | 05/14/2013
00002 | 07/01/2013
00002 | 07/28/2013
The best I've come up with is this query (not tested), which looks horribly inefficient.
select customer_id, transaction_date
from customer_table outer
where concat(month(transaction_date), year(transaction_date)) = (
select concat(month(max(transaction_date)), year(max(transaction_date)))
from customer_table inner
where outer.customer_id = inner.customer_id
)
;with CTE as (
select
customer_id, transaction_date,
rank() over(
partition by customer_id
order by year(transaction_date) desc, month(transaction_date) desc
) as row_num
from customer_table
)
select *
from CTE
where row_num = 1
SQL FIDDLE EXAMPLE
another way to do it:
;with CTE as (
select
customer_id, dateadd(month, datediff(month, 0, max(transaction_date)), 0) as date
from #customer_table
group by customer_id
)
select ct.*
from CTE as c
inner join #customer_table as ct on
ct.transaction_date >= c.date and ct.customer_id = c.customer_id
SQL FIDDLE EXAMPLE
SELECT T1.*
FROM Table1 T1
JOIN
(
SELECT CUSTOMER_ID,
MAX(TRANSACTION_DATE) AS TRANSACTION_DATE
FROM Table1
GROUP BY CUSTOMER_ID
) T2
ON T1.CUSTOMER_ID = T2.CUSTOMER_ID
WHERE MONTH(T1.TRANSACTION_DATE) = MONTH(T2.TRANSACTION_DATE)
AND YEAR(T1.TRANSACTION_DATE) = YEAR(T2.TRANSACTION_DATE)
I am leaving the above for reference.
I have come to the following:
WITH MyCTE AS
(
SELECT [CUSTOMER_ID],
MAX(DATEADD(month, DATEDIFF(month, 0, [TRANSACTION_DATE]), 0)) AS StartOfMonth
FROM Table1
GROUP BY [CUSTOMER_ID]
)
SELECT T2.*
FROM MyCTE M
Join Table1 T2
ON DATEADD(month, DATEDIFF(month, 0, [TRANSACTION_DATE]), 0) = StartOfMonth
Which is very similar to the Roman's. The difference is that i have an equality rather than greater than. The execution plan seems better, and that is why i post it.
I have here the fiddle of all, but still Roman's first seems to be the best.

Resources