Collapsing records with adjacent start and end dates - sql-server

I have the following data in a table in SQL Server 2008 R2:
ID Code StartDate EndDate
10001 3 2014-07-25 2014-07-28
10001 3 2014-07-29 2014-10-06
10001 3 2014-10-07 2014-10-10
10001 1 2014-10-11 2014-10-31
10001 1 2014-11-01 2014-11-15
10001 3 2014-11-16 2014-11-25
10001 3 2014-11-26 NULL
20002 3 2014-07-25 2014-07-28
20002 3 2014-07-29 2014-10-06
20002 3 2014-10-07 NULL
30003 3 2014-07-25 2014-11-13
30003 3 2014-11-14 2014-11-24
30003 2 2014-11-25 NULL
I want to "collapse" any records with the same Code, and adjacent EndDate and StartDate. The results should be:
ID Code StartDate EndDate
10001 3 2014-07-25 2014-10-10
10001 1 2014-10-11 2014-11-15
10001 3 2014-11-16 NULL
20002 3 2014-07-25 NULL
30003 3 2014-07-25 2014-11-24
30003 2 2014-11-25 NULL
I've been trying to use various sub-queries and the ROW_NUMBER() function, but just can't get it to work. I suspect this would be easily done with a CTE, but I haven't been able to wrap my head around how those work in order to try it here. Any ideas?

Since your ranges are continuous, the problem essentially becomes a gaps-and-islands one.
© Andriy M
Thanks to Steve Ford for the table:
declare #EventLog table
(
Id int,
Code tinyint,
StartDate date,
EndDate date null
)
insert into #EventLog
values
(10001, 3, '2014-07-25', '2014-07-28'),
(10001, 3, '2014-07-29', '2014-10-06'),
(10001, 3, '2014-10-07', '2014-10-10'),
(10001, 1, '2014-10-11', '2014-10-31'),
(10001, 1, '2014-11-01', '2014-11-15'),
(10001, 3, '2014-11-16', '2014-11-25'),
(10001, 3, '2014-11-26', null),
(20002, 3, '2014-07-25', '2014-07-28'),
(20002, 3, '2014-07-29', '2014-10-06'),
(20002, 3, '2014-10-07', null),
(30003, 3, '2014-07-25', '2014-11-13'),
(30003, 3, '2014-11-14', '2014-11-24'),
(30003, 2, '2014-11-25', null);
Thanks Andriy M for the solution:
declare #MaxDate date = '9999-12-31';
with cte as
(
select *,
g = row_number() over (partition by Id order by StartDate)
- row_number() over (partition by Id, Code order by StartDate)
from #EventLog
)
select
Id,
Code,
StartDate = min(StartDate),
EndDate = nullif(max(isnull(EndDate, #MaxDate)), #MaxDate)
from cte
group by
Id, Code, g;

Try this,
CREATE TABLE #TEMP
(
ID INT,
CODE INT,
STARTDATE DATE,
ENDDATE DATE
)
INSERT INTO #TEMP VALUES
(10001,3,'2014-07-25','2014-07-28'),
(10001,3,'2014-07-29','2014-10-06'),
(10001,3,'2014-10-07','2014-10-10'),
(10001,1,'2014-10-11','2014-10-31'),
(10001,1,'2014-11-01','2014-11-15'),
(10001,3,'2014-11-16','2014-11-25'),
(10001,3,'2014-11-26',NULL),
(20002,3,'2014-07-25','2014-07-28'),
(20002,3,'2014-07-29','2014-10-06'),
(20002,3,'2014-10-07',NULL),
(30003,3,'2014-07-25','2014-11-13'),
(30003,3,'2014-11-14','2014-11-24'),
(30003,2,'2014-11-25',NULL)
SELECT T1.ID,T1.CODE,T1.STARTDATE,A.ENDDATE FROM (SELECT L.ID,L.CODE,MIN(STARTDATE) AS STARTDATE,DIFF FROM (SELECT ID,
CODE,
STARTDATE,
ENDDATE,
IsNull(Lag(CODE, 2)
OVER (
ORDER BY ID, STARTDATE, ENDDATE), CODE) AS T_LAG,
CODE - IsNull(Lag(CODE, 2)
OVER (
ORDER BY ID, STARTDATE, ENDDATE), CODE) AS DIFF
FROM #TEMP ) L
GROUP BY L.ID,L.CODE,DIFF) T1
CROSS APPLY(
SELECT ID,CODE,ENDDATE,DIFF FROM (SELECT ID,CODE,ENDDATE,DIFF,ROW_NUMBER() OVER (PARTITION BY ID,CODE,DIFF ORDER BY ID,CODE,STARTDATE DESC,ENDDATE DESC) AS T_R FROM (SELECT ID,
CODE,
STARTDATE,
ENDDATE,
IsNull(Lag(CODE, 2)
OVER (
ORDER BY ID, STARTDATE, ENDDATE), CODE) AS T_LAG,
CODE - IsNull(Lag(CODE, 2)
OVER (
ORDER BY ID, STARTDATE, ENDDATE), CODE) AS DIFF
FROM #TEMP ) A) A
WHERE T_R=1 AND ID=T1.ID AND CODE=T1.CODE AND DIFF=T1.DIFF)A
ORDER BY T1.ID,T1.STARTDATE

Try this (I'm replicating SQL 2012 Lead And Lag functionality using Row_Number() in 2008):
SQL Fiddle
MS SQL Server 2008 Schema Setup:
CREATE TABLE EventLog
(
ID Int,
Code tinyint,
StartDate Date,
EndDate Date Null
)
INSERT INTO EventLog
Values
(10001, 3, '2014-07-25', '2014-07-28'),
(10001, 3, '2014-07-29', '2014-10-06'),
(10001, 3, '2014-10-07', '2014-10-10'),
(10001, 1, '2014-10-11', '2014-10-31'),
(10001, 1, '2014-11-01', '2014-11-15'),
(10001, 3, '2014-11-16', '2014-11-25'),
(10001, 3, '2014-11-26', NULL),
(20002, 3, '2014-07-25', '2014-07-28'),
(20002, 3, '2014-07-29', '2014-10-06'),
(20002, 3, '2014-10-07', NULL),
(30003, 3, '2014-07-25', '2014-11-13'),
(30003, 3, '2014-11-14', '2014-11-24'),
(30003, 2, '2014-11-25', NULL)
Query 1:
WITH CTE
AS
(
SELECT ID,
Code,
StartDate,
EndDate,
ROW_NUMBER() OVER (PARTITION BY Id, Code ORDER BY Id, Code, StartDate) As RN
FROM EventLog
),
CTE2
AS
(
SELECT CTE.Id, CTE.Code, CTE.StartDate, CTE.EndDate,
CASE WHEN DATEDIFF(d, LAG.EndDate, CTE.StartDate) = 1
THEN Lag.EndDate
ELSE NULL
END AS PrevEndDate,
CASE WHEN DateDiff(d, LEAD.StartDate, CTE.EndDate) = -1
THEN Lead.StartDate
ELSE NULL
END As NextStartDate
FROM CTE
LEFT OUTER JOIN CTE AS Lag
ON CTE.ID = Lag.ID AND CTE.Code = Lag.Code AND Lag.Rn = CTE.RN - 1
LEFT OUTER JOIN CTE AS Lead
ON CTE.ID = Lead.ID AND CTE.Code = Lead.Code AND Lead.Rn = CTE.RN + 1
),
StartAndEnd
As
(
SELECT ID,
Code,
StartDate,
EndDate,
PrevEndDate,
NextStartDate,
ROW_NUMBER() OVER (PARTITION BY Id, Code ORDER BY ID, Code, StartDate) As RN
FROM CTE2
WHERE (PrevEndDate IS NULL Or NextStartDate IS NULL)
)
SELECT S.ID, s.Code, S.StartDate, E.EndDate
FROM StartAndEnd as S
LEFT JOIN StartAndEnd E
ON S.ID = E.ID AND S.Code = E.Code AND E.RN = S.Rn + 1
WHERE S.PrevEndDate Is Null
ORDER By s.Id, S.StartDate
Results:
| ID | CODE | STARTDATE | ENDDATE |
|-------|------|------------|------------|
| 10001 | 3 | 2014-07-25 | 2014-10-10 |
| 10001 | 1 | 2014-10-11 | 2014-11-15 |
| 10001 | 3 | 2014-11-16 | (null) |
| 20002 | 3 | 2014-07-25 | (null) |
| 30003 | 3 | 2014-07-25 | 2014-11-24 |
| 30003 | 2 | 2014-11-25 | (null) |

;WITH StartDates
AS(
Select e1.ID, e1.Code, e1.StartDate, ROW_NUMBER() OVER (Order By e1.ID asc) as RowNumber
From #EventLog e1
LEFT JOIN #EventLog e2 ON e1.Code = e2.Code and e2.EndDate = DATEADD(day,-1,e1.StartDate)
WHERE e2.Id is null
),
EndDates as(
Select e1.ID, e1.Code, e1.EndDate, ROW_NUMBER() OVER (Order by e1.ID asc) as RowNumber
FROM #EventLog e1
LEFT JOIN #EventLog e2 ON e1.Code = e2.Code and e2.StartDate = DATEADD(day,1,e1.EndDate)
WHERE e2.Id is null
)
Select s.ID, s.Code, s.StartDate, e.EndDate
FROM StartDates s
JOIN EndDates e ON s.Code = e.Code and s.RowNumber = e.RowNumber

Related

If Value is present in two consecutive months , display only one month in sql

I would want to check ID in consecutive months, IF Same ID is present in two consecutive months then consider that ID only for 1st month.
If ID's are not in consecutive month then show the distinct ID's grouped by start date month.(We consider only start date)
For example, ID 1 is present in start date months january and Feb , then Distinct count of this ID will be 1 in Jan, how ever ID 2 and 3 are
present in Jan and March and Feb and May Resp, now I would like to see this distinct count of ID in Jan and March.
Current Data
Table1:
ID StartDate EndDate
1 2017-01-12 2017-01-28
1 2017-01-19 2017-01-28
1 2017-01-29 2017-02-11
1 2017-02-01 2017-02-11
1 2017-02-19 2017-02-24
2 2017-01-12 2017-01-28
2 2017-01-19 2017-01-28
2 2017-03-09 2017-03-20
3 2017-02-12 2017-02-28
3 2017-02-19 2017-02-28
3 2017-05-05 2017-05-29
3 2017-05-09 2017-05-29
I tried with below logic bt I know I am missing on something here.
select t.* from Table1 t
join Table1 t t1
on t1.ID=t.ID
and datepart(mm,t.StartDate)<> datepart(mm,t1.StartDate)+1
Expected Result:
DistinctCount StartDateMonth(In Numbers)
1 1(Jan)
2 1(Jan)
2 3(March)
3 2(Feb)
3 5(May)
Any help is appreciated!
Here's my solution. The thinking for this is:
1) Round all the dates to the first of the month, then work with the distinct dataset of (ID, StartDateRounded). From your dataset, the result should look like this:
ID StartDateRounded
1 2017-01-01
1 2017-02-01
2 2017-01-01
2 2017-03-01
3 2017-02-01
3 2017-05-01
2) From this consolidated dataset, find all records by ID that do not have a record for the previous month (which means it's not a consecutive month and thus is a beginning of a new data point). This is your final dataset
with DatesTable AS
(
SELECT DISTINCT ID
,DATEADD(month,DateDiff(month,0,StartDate),0) StartDateRounded
,DATEADD(month,DateDiff(month,0,StartDate)+1,0) StartDateRoundedPlusOne
FROM Table1
)
SELECT t1.ID, DatePart(month,t1.StartDateRounded) AS StartDateMonth
FROM DatesTable t1
LEFT JOIN DatesTable t2
ON t1.ID = t2.ID
AND t1.StartDateRounded = t2.StartDateRoundedPlusOne
WHERE t2.ID IS NULL; --Verify no record exists for prior month
sqlfiddler for reference. Let me know if this helps
Just need to take advantage of the lag on the inner query to compare values between rows, and apply the logic in question on the middle query, and then do a final select.
/*SAMPLE DATA*/
create table #table1
(
ID int not null
, StartDate date not null
, EndDate date null
)
insert into #table1
values (1, '2017-01-12', '2017-01-28')
, (1, '2017-01-19', '2017-01-28')
, (1, '2017-01-29', '2017-02-11')
, (1, '2017-02-01', '2017-02-11')
, (1, '2017-02-19', '2017-02-24')
, (2, '2017-01-12', '2017-01-28')
, (2, '2017-01-19', '2017-01-28')
, (2, '2017-03-09', '2017-03-20')
, (3, '2017-02-12', '2017-02-28')
, (3, '2017-02-19', '2017-02-28')
, (3, '2017-05-05', '2017-05-29')
, (3, '2017-05-09', '2017-05-29')
/*ANSWER*/
--Final Select
select c.ID
, c.StartDateMonth
from (
--Compare record values to rule a record in/out based on OP's logic
select b.ID
, b.StartDateMonth
, case when b.StartDateMonth = b.StartDateMonthPrev then 0 --still the same month?
when b.StartDateMonth = b.StartDateMonthPrev + 1 then 0 --immediately prior month?
when b.StartDateMonth = 1 and b.StartDateMonthPrev = 12 then 0 --Dec/Jan combo
else 1
end as IncludeFlag
from (
--pull StartDateMonth of previous record into current record
select a.ID
, datepart(mm, a.StartDate) as StartDateMonth
, lag(datepart(mm, a.StartDate), 1, NULL) over (partition by a.ID order by a.StartDate asc) as StartDateMonthPrev
from #table1 as a
) as b
) as c
where 1=1
and c.IncludeFlag = 1
Output:
+----+----------------+
| ID | StartDateMonth |
+----+----------------+
| 1 | 1 |
| 2 | 1 |
| 2 | 3 |
| 3 | 2 |
| 3 | 5 |
+----+----------------+
Try the below query,
SELECT ID,MIN(YEARMONTH) AS YEARMONTH
FROM (
SELECT ID
,YEAR([StartDate])*100+MONTH([StartDate]) AS YEARMONTH
,LAG(YEAR([StartDate])*100+MONTH([StartDate]))
OVER(ORDER BY ID) AS PREVYEARMONTH
,ROW_NUMBER() OVER(ORDER BY ID) AS ROW_NO
FROM #Table1
GROUP BY ID,((YEAR([StartDate])*100)+MONTH([StartDate]))
) AS T
GROUP BY ID
,(CASE WHEN YEARMONTH - PREVYEARMONTH > 1 THEN ROW_NO ELSE 0 END)
ORDER BY ID
Output:
ID YEARMONTH
1 201701
2 201701
2 201703
3 201702
3 201705
Thank you all guys. most of the logic seemed to work..but I tried just with below one and I Was good with thiis.
SELECT t1.ID, DatePart(month,t1.Startdate) AS StartDateMonth
FROM DatesTable t1
LEFT JOIN DatesTable t2
ON t1.ID = t2.ID
AND DatePart(month,t1.Startdate) = DatePart(month,t2.Startdate)+1
WHERE t2.ID IS NULL;
Thanks again
Ok, I wrote my first query without checking, believed that will work correctly. This is my updated version, should be faster than other solutions
select
id
, min(st)%12 --this will return start month
, min(st)/12 + 1 --this will return year, just in case if you need it
from (
select
id, st, gr = st - row_number() over (partition by ID order by st)
from (
select
distinct ID, st = (year(StartDate) - 1) * 12 + month(StartDate)
from
#table2
) t
) t
group by id, gr

how can i find changes in a specific column and get the old value

Good morning all
I would appreciate any help you can give me in this subject
I have a table that grows in time with the same Id1
but some time Id2 change , like a historic of a park.
I would like to find the best way with a query to retrieve
the rows where id2 changes and time
example if table contents are
Id1 Id2 time
1 1 10:00
1 1 10:30
1 2 10:40
1 2 10:45
1 2 11:00
1 3 11:45
1 3 12:45
query output would be
Id1 oldId2 newId2 time
1 1 2 10:40
1 2 3 11:45
i have done with a stored procedure, but I was wondering of there is a faster/cleaner way to get this
thanks in advance
You can do this by Ranking functions..
Schema:
CREATE TABLE #TAB (Id1 INT,Id2 INT, timeS TIME )
INSERT INTO #TAB
SELECT 1 AS Id1 , 1 Id2, '10:00' AS timeS
UNION ALL
SELECT 1, 1, '10:30'
UNION ALL
SELECT 1, 2, '10:40'
UNION ALL
SELECT 1, 2, '10:45'
UNION ALL
SELECT 1, 2, '11:00'
UNION ALL
SELECT 1, 3, '11:45'
UNION ALL
SELECT 1, 3, '12:45'
Now do select with ROW_NUMBER and CTE for retrieving previous/next row values.
;WITH CTE
AS (
SELECT ROW_NUMBER() OVER (ORDER BY (SELECT 1)) AS RNO
,ID1
,ID2
,timeS
FROM (
SELECT ROW_NUMBER() OVER (PARTITION BY ID2 ORDER BY TIMES) AS SNO
,*
FROM #TAB
) A
WHERE SNO = 1
)
SELECT C1.Id1
,C1.Id2 AS OLD_ID2
,C2.Id2 AS NEW_ID2
,C2.timeS
FROM CTE C1
LEFT JOIN CTE C2 ON C1.RNO + 1 = C2.RNO
WHERE C2.Id1 IS NOT NULL
Result:
+-----+---------+---------+------------------+
| Id1 | OLD_ID2 | NEW_ID2 | timeS |
+-----+---------+---------+------------------+
| 1 | 1 | 2 | 10:40:00.0000000 |
| 1 | 2 | 3 | 11:45:00.0000000 |
+-----+---------+---------+------------------+
Note: If you want to get Previous/Next Row values into current row, you can use LEAD LAG functions. But they support only in SQL Server 2012+.
The above Left Join with CTE will work for lower versions too.
declare #t table (Id1 int, Id2 int, [time] time)
insert into #t
select 1, 1, '10:00' union
select 1, 1, '10:30' union
select 1, 2, '10:40' union
select 1, 2, '10:45' union
select 1, 2, '11:00' union
select 1, 3, '11:45' union
select 1, 3, '12:45'
select Id1, oldId = (select top 1 id2 from #t where Id1=t.Id1 and Id2 < t.Id2 order by id2, time desc), newId = id2, time = min(time)
from #t t
where id2 > 1
group by Id1, id2
i have done some changes to the code from Shakeer Mirza.
the pratical problem that originated the question in the first place is:
i have a table that represents the history of an equipment. Being machine internal id(Num_TPA).
Each time there is a malfunction, the machine is replaced by another it keeps the same Num_TPA but Serial_number changes
i needed to know what is the historic on internal_id->Num_TPA . the new and the old serial_number , and the date of replacement
and this is what it came out.
;WITH CTE
AS (
SELECT ROW_NUMBER() OVER (ORDER BY (SELECT 1)) AS RNO
,[Num_TPA]
,[Serial_number]
,[Time]
,a.SNO
FROM (
SELECT ROW_NUMBER() OVER (PARTITION BY [Num_TPA]
ORDER BY [Data_Hora_Ficheiro]) AS SNO
,*
FROM tab_values
) A
WHERE SNO > 1
)
SELECT C1.[Num_TPA]
,C1.[Serial_number] AS OLD_ID2
,C2.[Serial_number] AS NEW_ID2
,C2.[Data_Hora_Ficheiro]
,c2.SNO
,c2.RNO
FROM tab_values C1
LEFT JOIN CTE C2 ON (
C1.[Num_TPA] = C2.[Num_TPA]
AND c1.[Serial_number] != c2.[Serial_number]
AND C2.[Time] > c1.TIME
)
WHERE C2.[Num_TPA] IS NOT NULL
AND SNO = 2
UNION
SELECT C1.[Num_TPA]
,C1.[Serial_number] AS OLD_ID2
,C2.[Serial_number] AS NEW_ID2
,C2.[Data_Hora_Ficheiro]
,c2.SNO
,c2.RNO
FROM CTE C1
LEFT JOIN CTE C2 ON (
C1.SNO + 1 = C2.SNO
AND C1.[Num_TPA] = C2.[Num_TPA]
)
WHERE C2.[Num_TPA] IS NOT NULL
AND C2.SNO > 2

Group by a set of Values (Check-In or Check-Out)

I have a simple table which records people clocking-in and clocking out like so.
Id | EmployeeNumber | InOutId | InOutDateTime
-----------------------------------------------------
1 | 505 | IN | 2015-03-24 08:32:42:000
1 | 506 | IN | 2015-03-24 08:35:47:000
1 | 507 | IN | 2015-03-24 08:46:12:000
1 | 505 | OUT | 2015-03-24 16:59:00:000
1 | 506 | OUT | 2015-03-24 17:05:00:000
I want to show the total people currently IN and those currently OUT. In other words:
- Total IN means those that do not have a corresponding OUT for that given day. - Total OUT means those that do have an IN and an OUT for that given day.
So, based on my table above, I want to get the following results:
TotalCurrentlyIn | TotalCurrentlyOut
-----------------------------------------
1 | 2
This is what I have so far:
DECLARE #d date;
set #d = cast('2015-03-24 15:02:42.000' as date)
select EmployeeNumber, InOutId, InOutDateTime from MyAttendance
where
InOutDateTime >= DATEADD(day, DATEDIFF(day, 0, #d), 0)
and InOutDateTime < DATEADD(day, DATEDIFF(day, 0, #d) +1, 0)
order by
EmployeeNumber, InOutId
I need to be able to sum and group by - any ideas?
try,
DECLARE #d date;
set #d = cast('2015-03-24 15:02:42.000' as date)
;with cte as(
select t.EmployeeNumber,t.InOutId as in1,
t1.InOutId out1,t.InOutDateTime from #t t
left join (select EmployeeNumber,InOutId,InOutDateTime from #t
where InOutId='OUT' and cast(InOutDateTime as date)=cast(#d as date) ) t1
on t.EmployeeNumber=t1.EmployeeNumber and
cast(t.InOutDateTime as date)=cast(t1.InOutDateTime as date)
where t.InOutId='IN' and cast(t.InOutDateTime as date)=cast(#d as date))
select count(in1) Totalin,count(out1) Totalout, sum(case when out1 is null then 1 else 0 end) TotalCurrentlyIn
,count(out1) TotalCurrentlyOut from cte
data
declare #t table (Id int,EmployeeNumber int, InOutId varchar(3), InOutDateTime datetime)
insert into #t(Id, EmployeeNumber,InOutId, InOutDateTime) values
(1 , 505 , 'IN' , '2015-03-24 08:32:42:000'),
(1 , 506 , 'IN' , '2015-03-24 08:35:47:000'),
(1 , 507 , 'IN' , '2015-03-24 08:46:12:000'),
(1 , 505 , 'OUT' , '2015-03-24 16:59:00:000'),
(1 , 506 , 'OUT' , '2015-03-24 17:05:00:000')
CheckIn = 1 and CheckOut = 2 so you need to check last entry of all the uses.
Select EmployeeId, ActionType, Max(ActionDateTime)
From AttendanceLog
Where
ActionDateTime >= DATEADD(day, DATEDIFF(day, 0, #d), 0)
and ActionDateTime < DATEADD(day, DATEDIFF(day, 0, #d) +1, 0)
Group by
EmployeeId, ActionType
Order by
EmployeeId,ActionType
If I understand the question. you need to know how much person is in the office right now:
the first query return the max date for any employee, than you join it with the actionType
select
EmployeeId , max(ActionDateTime) as MaxActionDateTime into #temptable
from table
group by EmployeeId
select count (EmployeeId), ActionType
from table inner join #temptable
on table.EmployerId == #temptable.EmployerId
and table.ActionDateTime == #temptable.MaxActionDateTime
group by ActionType
Using a windowing function you can get the last action for every employee and count those
With data As (
Select id, EmployeeNumber, InOutId
, lastAction = ROW_NUMBER() OVER (PARTITION BY EmployeeNumber
ORDER BY InOutDateTime DESC)
From table1
)
Select Count(CASE InOutId WHEN 'IN' THEN 1 END) TotalCurrentlyIn
, Count(CASE InOutId WHEN 'OUT' THEN 1 END) TotalCurrentlyOut
From data
Where lastAction = 1

Update table with overlap date range and change status

I have a table with following column and I would like to update it as following.
The Logic is the start date take the date will be updated if overlap with following rules: take the earliest start date and enddate of the latest row with overlapping date based on member id. And the status of the remaining overlap column will be updated to 2. Hope someone could help.
ID MemberID StartDate EndDate Status
1 2 2015-01-01 2015-02-28 1
2 2 2015-02-01 2015-02-03 1
3 2 2015-02-01 2015-03-01 1
4 1 2015-02-01 2015-02-28 1
5 3 2015-02-01 2015-02-28 1
6 2 2015-05-01 2015-05-20 1
I would like to update to
ID MemberID StartDate EndDate Status
1 2 2015-01-01 2015-03-01 1
2 2 2015-01-01 2015-03-01 2
3 2 2015-01-01 2015-03-01 2
4 1 2015-02-01 2015-02-28 1
5 3 2015-02-01 2015-02-28 1
6 2 2015-05-01 2015-05-20 1
I think this should do it :
update a set
a.startdate =
(select min(startdate) from #table where memberID = a.memberID),
a.enddate =
(select max(enddate) from #table where memberID = a.memberID),
a.status =
case when a.id =
(select min(id) from #table where memberID = a.memberID)
then status else 2
end
from #table a
Try this,
---- Creating CTE for finding overlapped dates
;WITH CTE AS (
SELECT A.ID,
B.ID AS MAPPED_ID,
A.MEMBERID,
B.STARTDATE,
B.ENDDATE,
B.STATUS
FROM #YOUR_TABLE A
JOIN #YOUR_TABLE B ON B.STARTDATE <= A.ENDDATE-- Condition for finding the overlapped dates
AND B.ENDDATE >= A.STARTDATE
AND A.MEMBERID = B.MEMBERID)-- end here
UPDATE T
SET T.STARTDATE = A.STARTDATE,
T.ENDDATE = A.ENDDATE,
T.STATUS = A.STATUS
FROM #YOUR_TABLE T
JOIN (SELECT ID,
MEMBERID,
STARTDATE,
ENDDATE,
STATUS=CASE
WHEN RN > 1 THEN 2
ELSE 1
END
FROM (SELECT T.ID,
T.MEMBERID,
CS1.STARTDATE,
CS2.ENDDATE,
ROW_NUMBER() -- ROWNUMBER FOR FINDING THE STATUS
OVER(
PARTITION BY T.MEMBERID, CS1.STARTDATE, CS2.ENDDATE
ORDER BY T.ID) AS RN
FROM #YOUR_TABLE T
CROSS APPLY (SELECT CAST(MIN(STARTDATE)AS DATETIME) AS STARTDATE --- FINDING MIN(STARTDATE) FOR THE OVERLAPPED GROUP
FROM CTE A
WHERE A.ID = T.ID) CS1
CROSS APPLY (SELECT ENDDATE -- FINDING LAST ENDDATE FOR THE OVERLAPPED GROUP (IE RN=1)
FROM (SELECT ENDDATE,--- ROW_NUMBER FOR THE OVERLAPPED GROUPS
ROW_NUMBER()
OVER(
ORDER BY B.MAPPED_ID DESC) AS RN
FROM CTE B
WHERE B.ID = T.ID)A
WHERE A.RN = 1)CS2)A)A ON A.ID = T.ID
SELECT *
FROM #YOUR_TABLE

issue in a date range creation

I have been creating a date range, but in some cases a have a problem:
This is what I have: TABLE_1
date customer_id status total
---- ------------- -------- -------
20120201 1 a 10
20120202 1 a 20
20120203 1 b 20
20120204 1 b 20
20120205 1 a 20
20120206 1 a 20
20120201 2 d 30
20120202 2 e 40
After the execution of my procedure, I have this: TABLE_2
customer_id status start_date end_date
------------- -------- ----------- ---------
1 a 20120201 NULL
1 b 20120203 20120131
2 d 20120201 20120201
2 e 20120202 NULL
But this is what i want, a table with date ranges based on customer_id and status (end_date represents register with most recent date): TABLE_3
customer_id status start_date end_date
------------- -------- ----------- ---------
1 a 20120201 20120202
1 b 20120203 20120204
1 a 20120205 NULL
2 d 20120201 20120201
2 e 20120202 NULL
My store procedure look like this:
;WITH TEMP AS (
SELECT
Date
customer_id
status
FROM table_1
GROUP BY
date,
customer_id,
status
)
,TEMP2 AS (
SELECT
ID = ROW_NUMBER() OVER(PARTITION BY customer_id ORDER BY MAX(date) DESC),
start_date = MIN(date),
end_date = MAX(date),
[customer_id],
[status]
FROM TEMP
GROUP BY
[customer_id],
[status]
)
SELECT
A.customer_id,
A.status,
A.start_date,
end_date = DATEADD(DAY,-1,B.start_date)
FROM TEMP2 A
LEFT JOIN TEMP2 B
ON A.customer_id = B.customer_id
AND A.ID = B.ID + 1
I know my error is in the creation of CTE TEMP2, because this code can´t discriminate for a customer_id with a status with two occurrences in different ranges of time, based on the 'group by' sentence
I can´t figure out how to do that...
Try this. Hope it works now.
DECLARE #table_1 TABLE (
date DATETIME,
customer_id INT,
status CHAR(1),
total INT
)
INSERT #table_1 (date, customer_id, status, total)
VALUES
('20120201', 1, 'a', 10),
('20120202', 1, 'a', 20),
('20120203', 1, 'b', 20),
('20120204', 1, 'b', 20),
('20120205', 1, 'a', 20),
('20120206', 1, 'a', 20),
('20120201', 2, 'd', 30),
('20120202', 2, 'e', 40)
;WITH CTE_1 AS (
SELECT
customer_id,
status,
date,
ROW_NUMBER() OVER(PARTITION BY customer_id ORDER BY date ASC) AS seq
FROM #table_1
),
CTE_2 AS (
SELECT
customer_id,
status,
date,
seq,
1 AS flg,
1 AS seq2
FROM CTE_1
WHERE
seq = 1
UNION ALL
SELECT
CTE_1.customer_id,
CTE_1.status,
CTE_1.date,
CTE_1.seq,
CASE WHEN CTE_2.status = CTE_1.status THEN 0 ELSE 1 END,
CASE WHEN CTE_2.status = CTE_1.status THEN CTE_2.seq2 ELSE CTE_2.seq2 + 1 END
FROM CTE_1
INNER JOIN CTE_2
ON CTE_1.customer_id = CTE_2.customer_id
AND CTE_1.seq = CTE_2.seq + 1
)
SELECT
st.customer_id,
st.status,
st.date AS start_date,
DATEADD(DAY, -1, en.date) AS end_date
FROM CTE_2 AS st
LEFT JOIN CTE_2 AS en
ON st.customer_id = en.customer_id
AND st.seq2 = en.seq2 - 1
AND en.flg = 1
WHERE
st.flg = 1
ORDER BY
st.customer_id,
st.seq2

Resources