Backfill dates using CTE in SQL Server

Backfill dates using CTE in SQL Server - sql-server

I have a table with IDs and with null startdate. I am trying to back fill them using CTE.
If the EndDate is '2011-12-31' for Record#1, the StartDate for Record#2 is EndDate from Record#1 + 1 day i.e. '2012-01-01'
Create table dbo.input(inputid int null,startDate smalldatetime null,endDate smalldatetime null)
insert into dbo.input values(111,null,'2011-05-31')
insert into dbo.input values(111,null,'2012-05-31')
insert into dbo.input values(111,null,'2013-05-31')
insert into dbo.input values(111,null,'2014-05-31')
insert into dbo.input values(111,null,'2015-05-31')
insert into dbo.input values(222,null,'2010-06-30')
insert into dbo.input values(222,null,'2011-06-30')
insert into dbo.input values(222,null,'2012-06-30')
insert into dbo.input values(222,null,'2013-06-30')
insert into dbo.input values(222,null,'2014-06-30')
This is the expected output.
Create table dbo.output(outputid int null,startDate smalldatetime null,endDate smalldatetime null)
insert into dbo.output(111,null,'2011-05-31')
insert into dbo.output(111,'2011-06-01','2012-05-31')
insert into dbo.output(111,'2012-05-31','2013-05-31')
insert into dbo.output(111,'2013-05-31','2014-05-31')
insert into dbo.output(111,'2014-05-31','2015-05-31')
insert into dbo.output(222,null,'2010-06-30')
insert into dbo.output(222,'2010-06-30','2011-06-30')
insert into dbo.output(222,'2011-06-30','2012-06-30')
insert into dbo.output(222,'2012-06-30','2013-06-30')
insert into dbo.output(222,'2013-06-30','2014-06-30')
This is what I tried
WITH CTE AS (
SELECT
rn = ROW_NUMBER() OVER (partition by p.inputid ORDER BY p.inputid,endDate),
p.inputid
,EndDate
FROM dbo.input p
)
SELECT distinct
DATEADD(day,1,prev.enddate) as startd,cte.inputid,cte.endDate
FROM CTE
LEFT JOIN CTE prev ON prev.rn = CTE.rn - 1
LEFT JOIN CTE nex ON nex.rn = CTE.rn + 1
order by cte.inputid,startd
It is not grouping by inputid properly and I do not know how to fix this?
Any help is appreciated.
Thanks
MR

Adjusting your existing query, but results are not exactly same as your expected. Should StartDate be next day of previous EndDate? (not same day as the previous EndDate.)
;WITH cte AS (SELECT
rn = ROW_NUMBER() OVER (partition by inputid ORDER BY endDate)
, *
FROM #input
)
SELECT a.inputid, DATEADD(DD, 1, b.endDate) as startDate , a.endDate
FROM CTE a
LEFT JOIN CTE b
ON a.inputid = b.inputid and a.rn = b.rn + 1
ORDER BY a.inputid, a.startDate;

As long as you're using SQL Server 2012 or later, you can use the following...
SELECT
i.inputid,
startDate = DATEADD(dd,
CASE WHEN ROW_NUMBER() OVER (ORDER BY i.inputid, i.endDate) = 2 THEN 1 ELSE 0 END,
LAG(i.endDate, 1) OVER (PARTITION BY i.inputid ORDER BY i.endDate)
),
i.endDate
FROM
#input i;

Related

Multi - Columns OVERLAPPING DATES

;with cte as (
select Domain_Id, Starting_Date, End_Date
from Que_Date
union all
select t.Domain_Id, cte.Starting_Date, t.End_Date
from cte
join Que_Date t on cte.Domain_Id = t.Domain_Id and cte.End_Date = t.Starting_Date),
cte2 as (
select *, rn = row_number() over (partition by Domain_Id, End_Date order by Domain_Id)
from cte
)
select DISTINCT Domain_Id, Starting_Date, max(End_Date) enddate
from cte2
where rn=1
group by Domain_Id, Starting_Date
order by Domain_Id, Starting_Date;
select * from Que_Date
This is the code that I have wrote but i am getting an extra row i.e 2nd row is extra, the expected output should have only 1st, 3rd and 4th row as output so please help me with it.
I have attached an image showing Input, Excepted Output, and the output that I am getting.

You've got so many results in your first cte. Your first cte has consisting domains. So you cannot filter domains based on your cte. So you query has unnecessary rows.
Try this solution. Cte ConsistentDomains has just consistent domains. So based on this cte, we can get not overlapped results.
Create and fill data:
CREATE TABLE FooTable
(
Domain_ID INT,
Starting_Date DATE,
End_Date Date
)
INSERT INTO dbo.FooTable
(
Domain_ID,
Starting_Date,
End_Date
)
VALUES
( 1, -- Domain_ID - int
CONVERT(datetime,'01-01-2011',103), -- Starting_Date - date
CONVERT(datetime,'05-01-2011',103) -- End_Date - date
)
, (1, CONVERT(datetime,'05-01-2011',103), CONVERT(datetime,'07-01-2011',103))
, (1, CONVERT(datetime,'07-01-2011',103), CONVERT(datetime,'15-01-2011',103))
, (2, CONVERT(datetime,'11-05-2011',103), CONVERT(datetime,'12-05-2011',103))
, (2, CONVERT(datetime,'13-05-2011',103), CONVERT(datetime,'14-05-2011',103))
Query to find not overlapping results:
DECLARE #startDate varchar(50) = '2011-01-01';
WITH ConsistentDomains AS
(
SELECT
f.Domain_ID
, f.Starting_Date
, f.End_Date
FROM FooTable f
WHERE f.Starting_Date = #startDate
UNION ALL
SELECT
s.Domain_ID
, s.Starting_Date
, s.End_Date
FROM FooTable s
INNER JOIN ConsistentDomains cd
ON s.Domain_ID = cd.Domain_ID
AND s.Starting_Date = cd.End_Date
), ConsistentDomainsRownumber AS
(
SELECT
cd.Domain_ID
, cd.Starting_Date
, cd.End_Date
, ROW_NUMBER() OVER (PARTITION BY cd.Domain_ID ORDER BY cd.Starting_Date,
cd.End_Date) RN
FROM ConsistentDomains cd
)
SELECT cd.Domain_ID
, convert(varchar, cd.Starting_Date, 105) Starting_Date
, convert(varchar, cd.End_Date, 105) End_Date
FROM ConsistentDomainsRownumber cd WHERE cd.RN = 1
UNION ALL
SELECT
ft.Domain_ID
, convert(varchar, ft.Starting_Date, 105) Starting_Date
, convert(varchar, ft.End_Date, 105) End_Date
FROM dbo.FooTable ft WHERE ft.Domain_ID NOT IN (SELECT cd.Domain_ID FROM
ConsistentDomainsRownumber cd)
Output:

I used the same table creating script as provided by #stepup, but you can also get your outcome in this way.
CREATE TABLE testtbl
(
Domain_ID INT,
Starting_Date DATE,
End_Date Date
)
INSERT INTO testtbl
VALUES
(1, convert(date, '01-01-2011' ,103), convert(date, '05-01-2011',103) )
,(1, convert(date, '05-01-2011' ,103), convert(date, '07-01-2011',103) )
,(1, convert(date, '07-01-2011' ,103), convert(date, '15-01-2011',103) )
,(2, convert(date, '11-05-2011' ,103), convert(date, '12-05-2011',103) )
,(2, convert(date, '13-05-2011' ,103), convert(date, '14-05-2011',103) )
You can make use of self join and Firs_value and last value within the group to make sure that you are comparing within the same ID and overlapping dates.
select distinct t.Domain_ID,
case when lag(t1.starting_date)over (partition by t.Domain_id order by
t.starting_date) is not null
then first_value(t.Starting_Date) over (partition by t.domain_id order by
t.starting_date)
else t.Starting_Date end StartingDate,
case when lead(t.domain_id) over (partition by t.domain_id order by t.starting_date) =
t1.Domain_ID then isnull(last_value(t.End_Date) over (partition by t.domain_id order by t.end_date rows between unbounded preceding and unbounded following),t.End_Date)
else t.End_Date end end_date
from testtbl t
left join testtbl t1 on t.Domain_ID = t1.Domain_ID
and t.End_Date = t1.Starting_Date
and t.Starting_Date < t1.Starting_Date
Output:
Domain_ID StartingDate end_date
1 2011-01-01 2011-01-15
2 2011-05-11 2011-05-12
2 2011-05-13 2011-05-14

T SQL - Count People with Visits in 3 consecutive months

With the following data:
Declare #t Table
(
Name Varchar(1),
VisitDate Date
)
Insert Into #t select 'A','2017-01-05'
Insert Into #t select 'A','2017-03-05'
Insert Into #t select 'A','2017-04-05'
Insert Into #t select 'A','2017-05-05'
Insert Into #t select 'A','2017-08-05'
Insert Into #t select 'B','2017-03-05'
Insert Into #t select 'C','2017-01-05'
Insert Into #t select 'C','2017-02-05'
Insert Into #t select 'C','2017-04-05'
Insert Into #t select 'D','2017-01-05'
Insert Into #t select 'D','2017-02-05'
Insert Into #t select 'D','2017-03-05'
Insert Into #t select 'D','2017-06-05'
Insert Into #t select 'B','2018-01-05'
Insert Into #t select 'B','2018-02-05'
Insert Into #t select 'B','2018-03-05'
Insert Into #t select 'E','2018-01-05'
Insert Into #t select 'E','2018-02-05'
Insert Into #t select 'E','2018-03-05'
Insert Into #t select 'E','2018-06-05'
I need to write a query that will return the Year & Names that have VisitDates in any three consecutive months in any year.
Based on the data, I expect to see:
2017 A
2017 D
2018 B
2018 E
To be honest, I don't know where to start with this using SQL.
I would appreciate any help I can get.
Thanks!!

You can avoid the joins, or parsing the whole data set multiple times, by using the same method as used in gaps-and-islands.
http://rextester.com/SYHJ40676
WITH
sequenced AS
(
SELECT
Name,
YEAR(VisitDate) AS VisitYear,
MONTH(VisitDate) AS VisitMonth,
ROW_NUMBER()
OVER (PARTITION BY Name, YEAR(VisitDate)
ORDER BY MONTH(VisitDate)
)
AS MonthSequenceID
FROM
#t
GROUP BY
Name,
YEAR(VisitDate),
MONTH(VisitDate)
)
SELECT DISTINCT
Name,
VisitYear
FROM
sequenced
GROUP BY
Name,
VisitYear,
VisitMonth - MonthSequenceID
HAVING
COUNT(*) >= 3

just join the two following months to the data and see where it goes:
SELECT DATEPART(year, m1.VisitDate), m1.Name
FROM #t m1
JOIN #t m2 on m2.Name = m1.Name AND DATEPART(month, m2.VisitDate) = DATEPART(month, m1.VisitDate) + 1
JOIN #t m3 on m3.Name = m1.Name AND DATEPART(month, m3.VisitDate) = DATEPART(month, m1.VisitDate) + 2
since it was asked in the comment, how to solve this problem with a year overlap, this should work:
SELECT DATEPART(year, m1.VisitDate), m1.Name
FROM #t m1
JOIN #t m2 on m2.Name = m1.Name AND EOMONTH(m1.VisitDate,1) = EOMONTH(m2.VisitDate)
JOIN #t m3 on m3.Name = m1.Name AND EOMONTH(m1.VisitDate,2) = EOMONTH(m3.VisitDate)
doc on EOMONTH: https://learn.microsoft.com/en-us/sql/t-sql/functions/eomonth-transact-sql?view=sql-server-2017
edit: my answer is just a quick hack and highly inperformant and has errors when there are multiple instances per month.
I suggest using this answer: https://stackoverflow.com/a/52669713/4903754

wrote my code as per the syntax of postgres SQL 9.5.0
first I have created the flag for consecutive months and by using that flag retrieved the required data.lag(),lead()
We need to compare the dates weather they are in consecutive or not for that I'm using lag(),lead() functions.
with temp as (
select name,visitdate,
coalesce(lag(visitdate) over (partition by name order by visitdate),lead(visitdate) over (partition by name order by visitdate))check1,
coalesce(lead(visitdate) over (partition by name order by visitdate),lag(visitdate) over (partition by name order by visitdate)) check2
from TT
order by 1
),
t2 as (
select name,
case
when
(DATE_PART('year', visitdate::date) - DATE_PART('year', check1::date)) * 12 +
(DATE_PART('month', visitdate::date) - DATE_PART('month', check1::date))=1
or
(DATE_PART('year', check2::date) - DATE_PART('year', visitdate::date)) * 12 +
(DATE_PART('month', check2::date) - DATE_PART('month', visitdate::date))=1
then 1 else 0
end as flag
from temp)
select name ,count(1) from t2 where flag=1 group by name having count(1)>=3

How to reference the current column you are defining using lag?

I have a salary table like this:
declare #t table (OrderedID int, EmpID int, EffDate date, Salary money)
insert into #t
values
(1,1234,'20150101',100)
,(2,1234,'20160101',100)
,(3,1234,'20170101',100)
,(4,1234,'20180101',300)
,(1,2351,'20150101',100)
I am trying to get an initial effective date on each row:
First 3 rows have 1/1/2015
4th row has new value 1/1/2018
Here is what I tried with a case and a lag but i can't figure out how to reference the prior value of the column I am creating.
case when OrderedID = 1 then EFFDaTe
when Salary != LAG(Salary,1) then EFFDaTe
else lag(SalaryEFFDT,1) over (order by 1)
end as SalaryEFFDT
Thanks for your help.

As you haven't provided the expected output, I think this is what you want:
declare #t table (OrderedID int, EmpID int, EffDate date, Salary money)
insert into #t
values
(1,1234,'20150101',100)
,(2,1234,'20160101',100)
,(3,1234,'20170101',100)
,(4,1234,'20180101',300)
,(1,2351,'20150101',100)
,(5,1234,'20190101',100)
;with cte as
(Select *, OrderedId - Row_Number() over (partition by EmpId,Salary order by OrderedID) as grp
from #t)
, cte1 as
(Select EmpID, grp, min(effDate) as effDate from cte c group by EmpID, grp)
Select OrderedID, t.EmpID, t.EffDate, t.Salary, c.effDate as computeddate
from cte t join cte1 c on t.EmpID = c.EmpID and t.grp = c.grp
order by OrderedID

So you are trying to get the first effective date for each EmpID? the code below should do that. If that is not your desired output can you put what the output should look like?
declare #t table (OrderedID int, EmpID int, EffDate date, Salary money)
insert into #t
values
(1,1234,'20150101',100)
,(2,1234,'20160101',100)
,(3,1234,'20170101',100)
,(4,1234,'20180101',300)
,(1,2351,'20140101',100)
,(2,2351,'20150101',100)
Select
T.*,FE.FirstEff
From #t T
inner join (Select EmpID,MIN(EffDate) as FirstEff from #t group by
The second set is if you need the first time they have that salary, however you will have issues if someone gets a raise and then a demotion.
Select
T.*,FE.FirstEff
From #t T
inner join (Select EmpID,Salary,MIN(EffDate) as FirstEff from #t group by EmpID,Salary) FE on FE.EmpID = T.EmpID
and FE.Salary = T.Salary

Trying to get DateDiff Based on One Field and Update Another Field

I am trying to update DaysInPeriod with the DateDiff function, based on the change in EFFECTIVESTARTDATE field.
Here is my DLL:
DROP TABLE Reporting_Table
CREATE TABLE Reporting_Table (
Credit_Line_NO Varchar(10),
CURRENCY VARCHAR(3),
AMOUNT INT,
StartDate DATE,
EFFECTIVESTARTDATE DATE,
EXPIRY_DATE Date,
FREQUENCY INT,
CO_CODE VARCHAR(10),
AsOfDate Date,
SOURCEID_REVISED VARCHAR(255),
PID VARCHAR(5),
DaysInPeriod INT
)
INSERT INTO Reporting_Table(CREDIT_LINE_NO,CURRENCY,AMOUNT,STARTDATE,EFFECTIVESTARTDATE,EXPIRY_DATE,FREQUENCY,CO_CODE,ASOFDATE,SourceID_Revised,PID,DaysInPeriod)
VALUES
('1026321','USD','16875','9/30/2017','9/30/2017','9/30/2019','8','US0010001','7/31/2017','','',''),
('1026321','USD','16875','9/30/2017','12/31/2017','9/30/2019','8','US0010001','7/31/2017','','',''),
('1026321','USD','16875','9/30/2017','3/31/2018','9/30/2019','8','US0010001','7/31/2017','','',''),
('1026321','USD','16875','9/30/2017','6/30/2018','9/30/2019','8','US0010001','7/31/2017','','',''),
('1026321','USD','16875','9/30/2017','9/30/2018','9/30/2019','8','US0010001','7/31/2017','','',''),
('1026321','USD','16875','9/30/2017','12/31/2018','9/30/2019','8','US0010001','7/31/2017','','',''),
('1026321','USD','16875','9/30/2017','3/31/2019','9/30/2019','8','US0010001','7/31/2017','','',''),
('1026321','USD','16875','9/30/2017','6/30/2019','9/30/2019','8','US0010001','7/31/2017','','',''),
('1026329','USD','16875','9/30/2017','9/30/2017','9/30/2019','8','US0010001','7/31/2017','','',''),
('1026329','USD','16875','9/30/2017','12/31/2017','9/30/2019','8','US0010001','7/31/2017','','',''),
('1026329','USD','16875','9/30/2017','3/31/2018','9/30/2019','8','US0010001','7/31/2017','','','')
Select *
From Reporting_Table
Select *
From Reporting_Table
I have this SQL:
with cte as
(
select *, rn = row_number() over (partition by Credit_Line_NO,ASOFDATE order by ASOFDATE)
from Reporting_Table
)
Select *
From cte
Basically, when rn=1, DaysInPeriod = 90, and then it should increment by DateDiff(days,rn-1,rn) for every next rn. It should reset based on the change in Credit_Line_NO & ASOFDATE, so I am using:
partition by Credit_Line_NO,ASOFDATE
Here is a sample of what I want to achieve.
I am using SQL Server 2008, so I can't use the Lead/Lag functions. I put together the SQL below, but it doens't execute.
SELECT T1.CREDIT_LINE_NO,
T1.CURRENCY,
T1.AMOUNT,
T1.STARTDATE,
T1.EFFECTIVESTARTDATE,
T1.EXPIRY_DATE,
T1.FREQUENCY,
T1.CO_CODE,
T1.AsOfDate
MIN(T2.EFFECTIVESTARTDATE) AS Date2,
DATEDIFF("D", T1.EFFECTIVESTARTDATE, MIN(T2.EFFECTIVESTARTDATE)) AS DaysDiff
FROM Reporting_Table T1
LEFT JOIN Reporting_Table T2
ON T1.CREDIT_LINE_NO = T2.CREDIT_LINE_NO
AND T2.EFFECTIVESTARTDATE > T1.EFFECTIVESTARTDATE
GROUP BY T1.CREDIT_LINE_NO,
T1.CURRENCY,
T1.AMOUNT,
T1.STARTDATE,
T1.EFFECTIVESTARTDATE,
T1.EXPIRY_DATE,
T1.FREQUENCY,
T1.CO_CODE,
T1.AsOfDate
Finally, I want to run an UPDATE query, or SELECT * INTO NEW_TABLE query.

Your query fails because line 9 T1.AsOfDate is missing a comma. Joining on AND T2.EFFECTIVESTARTDATE > T1.EFFECTIVESTARTDATE creates a 1 to many join which is not necessary. We can imitate a LAG function by applying row_number in a CTE then joining on T1.rn = T2.rn +1.
Edit: I updated your ROW_NUMBER to order by EFFECTIVESTARTDATE since ASOFDATE is a partition column and will always be the same within a window.
Here is the SQL fiddle for this solution.
You can SELECT INTO this result set into a new table or UPDATE an existing table.
WITH cte AS (
SELECT
Credit_Line_NO,
CURRENCY,
AMOUNT,
StartDate,
EFFECTIVESTARTDATE,
EXPIRY_DATE,
FREQUENCY,
CO_CODE,
AsOfDate,
SOURCEID_REVISED,
PID,
DaysInPeriod,
ROW_NUMBER() OVER (PARTITION BY Credit_Line_NO, ASOFDATE ORDER BY EFFECTIVESTARTDATE) AS rn
FROM Reporting_Table
)
SELECT
T1.Credit_Line_NO,
T1.CURRENCY,
T1.AMOUNT,
T1.StartDate,
T1.EFFECTIVESTARTDATE,
T1.EXPIRY_DATE,
T1.FREQUENCY,
T1.CO_CODE,
T1.AsOfDate,
T1.SOURCEID_REVISED,
T1.PID,
CASE
WHEN T1.rn = 1 THEN 90
ELSE DATEDIFF("D", t2.effectivestartdate, t1.effectivestartdate)
END AS DaysInPreiod,
T1.rn
FROM cte AS t1
LEFT JOIN cte AS t2 ON
t1.credit_line_no = t2.credit_line_no
AND t1.rn = t2.rn + 1

Concatenate date ranges in SQL (T/SQL preferred)

I need to concatenate rows with a date and a code into a date range
Table with two columns that are a composite primary key (date and a code )
Date Code
1/1/2011 A
1/2/2011 A
1/3/2011 A
1/1/2011 B
1/2/2011 B
2/1/2011 A
2/2/2011 A
2/27/2011 A
2/28/2011 A
3/1/2011 A
3/2/2011 A
3/3/2011 A
3/4/2011 A
Needs to be converted to
Start Date End Date Code
1/1/2011 1/3/2011 A
2/1/2011 2/2/2011 A
1/1/2011 1/2/2011 B
2/27/2011 3/4/2011 A
Is there any other way or is a cursor loop the only way?

declare #T table
(
[Date] date,
Code char(1)
)
insert into #T values
('1/1/2011','A'),
('1/2/2011','A'),
('1/3/2011','A'),
('1/1/2011','B'),
('1/2/2011','B'),
('3/1/2011','A'),
('3/2/2011','A'),
('3/3/2011','A'),
('3/4/2011','A')
;with C as
(
select *,
datediff(day, 0, [Date]) - row_number() over(partition by Code
order by [Date]) as rn
from #T
)
select min([Date]) as StartDate,
max([Date]) as EndDate,
Code
from C
group by Code, rn

sql server 2000 has it limitations. Rewrote the solution to make it more readable.
declare #t table
(
[Date] datetime,
Code char(1)
)
insert into #T values
('1/1/2011','A'),
('1/2/2011','A'),
('1/3/2011','A'),
('1/1/2011','B'),
('1/2/2011','B'),
('3/1/2011','A'),
('3/2/2011','A'),
('3/3/2011','A'),
('3/4/2011','A')
select a.code, a.date, min(b.date)
from
(
select *
from #t t
where not exists (select 1 from #t where t.code = code and t.date -1 = date)
) a
join
(
select *
from #t t
where not exists (select 1 from #t where t.code = code and t.date = date -1)
) b
on a.code = b.code and a.date <= b.date
group by a.code, a.date

Using a DatePart function for month will get you the "groups" you want
SELECT Min(Date) as StartDate, Max(Date) as EndDate, Code
FROM ThisTable Group By DatePart(m, Date), Code

Develop Reference

c reactjs sql-server angularjs arrays wpf database batch-file google-app-engine silverlight

Backfill dates using CTE in SQL Server - sql-server

As long as you're using SQL Server 2012 or later, you can use the following... SELECT i.inputid, startDate = DATEADD(dd, CASE WHEN ROW_NUMBER() OVER (ORDER BY i.inputid, i.endDate) = 2 THEN 1 ELSE 0 END, LAG(i.endDate, 1) OVER (PARTITION BY i.inputid ORDER BY i.endDate) ), i.endDate FROM #input i;

Related

Multi - Columns OVERLAPPING DATES

T SQL - Count People with Visits in 3 consecutive months

How to reference the current column you are defining using lag?

Trying to get DateDiff Based on One Field and Update Another Field

Concatenate date ranges in SQL (T/SQL preferred)

Categories

Resources