Finding the Datediff between Records in same Table - sql-server

IP QID ScanDate Rank
101.110.32.80 6 2016-09-28 18:33:21.000 3
101.110.32.80 6 2016-08-28 18:33:21.000 2
101.110.32.80 6 2016-05-30 00:30:33.000 1
I have a Table with certain records, grouped by Ipaddress and QID.. My requirement is to find out which record missed the sequence in the date column or other words the date difference is more than 30 days. In the above table date diff between rank 1 and rank 2 is more than 30 days.So, i should flag the rank 2 record.

You can use LAG in Sql 2012+
declare #Tbl Table (Ip VARCHAR(50), QID INT, ScanDate DATETIME,[Rank] INT)
INSERT INTO #Tbl
VALUES
('101.110.32.80', 6, '2016-09-28 18:33:21.000', 3),
('101.110.32.80', 6, '2016-08-28 18:33:21.000', 2),
('101.110.32.80', 6, '2016-05-30 00:30:33.000', 1)
;WITH Result
AS
(
SELECT
T.Ip ,
T.QID ,
T.ScanDate ,
T.[Rank],
LAG(T.[Rank]) OVER (ORDER BY T.[Rank]) PrivSRank,
LAG(T.ScanDate) OVER (ORDER BY T.[Rank]) PrivScanDate
FROM
#Tbl T
)
SELECT
R.Ip ,
R.QID ,
R.ScanDate ,
R.Rank ,
R.PrivScanDate,
IIF(DATEDIFF(DAY, R.PrivScanDate, R.ScanDate) > 30, 'This is greater than 30 day. Rank ' + CAST(R.PrivSRank AS VARCHAR(10)), '') CFlag
FROM
Result R
Result:
Ip QID ScanDate Rank CFlag
------------------------ ----------- ----------------------- ----------- --------------------------------------------
101.110.32.80 6 2016-05-30 00:30:33.000 1
101.110.32.80 6 2016-08-28 18:33:21.000 2 This is greater than 30 day. Rank 1
101.110.32.80 6 2016-09-28 18:33:21.000 3 This is greater than 30 day. Rank 2

While Window Functions could be used here, I think a self join might be more straight forward and easier to understand:
SELECT
t1.IP,
t1.QID,
t1.Rank,
t1.ScanDate as endScanDate,
t2.ScanDate as beginScanDate,
datediff(day, t2.scandate, t1.scandate) as scanDateDays
FROM
table as t1
INNER JOIN table as t2 ON
t1.ip = t2.ip
t1.rank - 1 = t2.rank --get the record from t2 and is one less in rank
WHERE datediff(day, t2.scandate, t1.scandate) > 30 --only records greater than 30 days
It's pretty self-explanatory. We are joining the table to itself and joining the ranks together where rank 2 gets joined to rank 1, rank 3 gets joined to rank 2, and so on. Then we just test for records that are greater than 30 days using the datediff function.

I would use windowed function to avoid self join which in many case will perform better.
WITH cte
AS (
SELECT
t.IP
, t.QID
, LAG(t.ScanDate) OVER (PARTITION BY t.IP ORDER BY T.ScanDate) AS beginScanDate
, t.ScanDate AS endScanDate
, DATEDIFF(DAY,
LAG(t.ScanDate) OVER (PARTITION BY t.IP ORDER BY t.ScanDate),
t.ScanDate) AS Diff
FROM
MyTable AS t
)
SELECT
*
FROM
cte c
WHERE
Diff > 30;

Related

How to Sum (MAX values) from different value groups in same column SQL Server

I have a table like this:
Date
Consec_Days
2015-01-01
1
2015-01-03
1
2015-01-06
1
2015-01-07
2
2015-01-09
1
2015-01-12
1
2015-01-13
2
2015-01-14
3
2015-01-17
1
I need to Sum the max value (days) for each of the consecutive groupings where Consec_Days are > 1. So the correct result would be 5 days.
This is a type of gaps-and-islands problem.
There are many solutions, here is one simple one
Get the start points of each group using LAG
Calculate a grouping ID using a windowed conditional count
Group by that ID and take the highest sum
WITH StartPoints AS (
SELECT *,
IsStart = CASE WHEN LAG(Consec_Days) OVER (ORDER BY Date) = 1 THEN 1 END
FROM YourTable t
),
Groupings AS (
SELECT *,
GroupId = COUNT(IsStart) OVER (ORDER BY Date)
FROM StartPoints
WHERE Consec_Days > 1
)
SELECT TOP (1)
SUM(Consec_Days)
FROM Groupings
GROUP BY
GroupId
ORDER BY
SUM(Consec_Days) DESC;
db<>fiddle
with cte as (
select Consec_Days,
coalesce(lead(Consec_Days) over (order by Date), 1) as next
from YourTable
)
select sum(Consec_Days)
from cte
where Consec_Days <> 1 and next = 1
db<>fiddle

Group records based on time interval starting from timestamp of first record in each group

Struggling with this; need to group records within a specific time interval starting from the first timestamp (FREEZE_TIME) - but the first record outside the first group is the starting point for the time interval for the next group and so on. Expected result, THAW_COUNT, is the count of all groups for a PARENT_SAMPLE_ID. So for table:
SAMPLE_ID
FREEZE_TIME
PARENT_SAMPLE_ID
1
null
null
2
2015-11-27 10:23:10
1
3
2015-11-27 10:59:23
1
4
2015-11-27 11:05:43
1
5
2015-11-27 12:53:48
1
6
2015-11-27 13:42:25
1
I would like to get a result of:
PARENT_SAMPLE_ID
THAW_COUNT
1
2
So sample_id:s 2,3 and 4 should be in the same group and sample id:s 5 and 6 are in the next group.
I have tried something like:
with SampleList as
(
select PARENT_SAMPLE_ID, FREEZE_TIME,
ROW_NUMBER() OVER (partition by PARENT_SAMPLE_ID order by FREEZE_TIME asc) RN
from
SAMPLE
)
,
FirstSample as
(
select PARENT_SAMPLE_ID, FREEZE_TIME
from SampleList
where RN = 1
)
,
SelectedSample as
(
select
s.PARENT_SAMPLE_ID,
ABS(DATEDIFF(MINUTE, s.FREEZE_TIME, sFirst.FREEZE_TIME))/60 DiffToFirst
from SampleList s
inner join FirstSample sFirst ON s.PARENT_SAMPLE_ID = sFirst.PARENT_SAMPLE_ID
group by s.PARENT_SAMPLE_ID, ABS(DATEDIFF(MINUTE, s.FREEZE_TIME, sFirst.FREEZE_TIME))/60
)
select PARENT_SAMPLE_ID, count(*) THAW_COUNT
from SelectedSample
group by PARENT_SAMPLE_ID
But this will return a THAW_COUNT of 3 as sampleId:s 5 and 6 will be in different groups because the grouping is based on hour intervals from freeze time of sampleId 2 only. How do I get the grouping for group 2 to start from the first record outside the first group (sampleId 5) and so on?
This can be treated as a gaps and islands problem. Using some windows functions to check counts and using LAG to look at the "previous" row we can solve this. If you have multiple values for SAMPLE_ID you will want to add some partitioning.
create table #Something
(
SAMPLE_ID int
, FREEZE_TIME datetime
, PARENT_SAMPLE_ID int
)
insert #Something
select 1, null, null union all
select 2, '2015-11-27 10:23:10', 1 union all
select 3, '2015-11-27 10:59:23', 1 union all
select 4, '2015-11-27 11:05:43', 1 union all
select 5, '2015-11-27 12:53:48', 1 union all
select 6, '2015-11-27 13:42:25', 1;
with MyGroups as
(
select *
, GroupNum = count(IsNewGroup) over (order by FREEZE_TIME rows unbounded preceding)
from
(
select *
, IsNewGroup = case when LAG(FREEZE_TIME, 1, '') over(order by FREEZE_TIME) < dateadd(hour, -1, FREEZE_TIME) then 1 end
from #Something
) x
)
select coalesce(PARENT_SAMPLE_ID, SAMPLE_ID)
, count(distinct GroupNum)
from MyGroups
group by coalesce(PARENT_SAMPLE_ID, SAMPLE_ID)
drop table #Something

Is it possible to use the SQL DATEADD function but exclude dates from a table in the calculation?

Is it possible to use the DATEADD function but exclude dates from a table?
We already have a table with all dates we need to exclude. Basically, I need to add number of days to a date but exclude dates within a table.
Example: Add 5 days to 01/08/2021. Dates 03/08/2021 and 04/08/2021 exist in the exclusion table. So, resultant date should be: 08/08/2021.
Thank you
A bit of a "wonky" solution, but it works. Firstly we use a tally to create a Calendar table of dates, that exclude your dates in the table, then we get the nth row, where n is the number of days to add:
DECLARE #DaysToAdd int = 5,
#StartDate date = '20210801';
WITH N AS(
SELECT N
FROM (VALUES(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL))N(N)),
Tally AS(
SELECT 0 AS I
UNION ALL
SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS I
FROM N N1, N N2, N N3), --Up to 1,000
Calendar AS(
SELECT DATEADD(DAY,T.I, #StartDate) AS D,
ROW_NUMBER() OVER (ORDER BY T.I) AS I
FROM Tally T
WHERE NOT EXISTS (SELECT 1
FROM dbo.DatesTable DT
WHERE DT.YourDate = DATEADD(DAY,T.I, #StartDate)))
SELECT D
FROM Calendar
WHERE I = #DaysToAdd+1;
A best solution is probably a calendar table.
But if you're willing to traverse through every date, then a recursive CTE can work. It would require tracking the total iterations and another column to substract if any traversed date was in the table. The exit condition uses the total difference.
An example dataset would be:
CREATE TABLE mytable(mydate date); INSERT INTO mytable VALUES ('20210803'), ('20210804');
And an example function run in it's own batch:
ALTER FUNCTION dbo.fn_getDays (#mydate date, #daysadd int)
RETURNS date
AS
BEGIN
DECLARE #newdate date;
WITH CTE(num, diff, mydate) AS (
SELECT 0 AS [num]
,0 AS [diff]
,DATEADD(DAY, 0, #mydate) [mydate]
UNION ALL
SELECT num + 1 AS [num]
,CTE.diff +
CASE WHEN DATEADD(DAY, num+1, #mydate) IN (SELECT mydate FROM mytable)
THEN 0 ELSE 1 END
AS [diff]
,DATEADD(DAY, num+1, #mydate) [mydate]
FROM CTE
WHERE (CTE.diff +
CASE WHEN DATEADD(DAY, num+1, #mydate) IN (SELECT mydate FROM mytable)
THEN 0 ELSE 1 END) <= #daysadd
)
SELECT #newdate = (SELECT MAX(mydate) AS [mydate] FROM CTE);
RETURN #newdate;
END
Running the function:
SELECT dbo.fn_getDays('20210801', 5)
Produces output, which is the MAX(mydate) from the function:
----------
2021-08-08
For reference the MAX(mydate) is taken from this dataset:
n diff mydate
----------- ----------- ----------
0 0 2021-08-01
1 1 2021-08-02
2 1 2021-08-03
3 1 2021-08-04
4 2 2021-08-05
5 3 2021-08-06
6 4 2021-08-07
7 5 2021-08-08
You can use the IN clause.
To perform the test, I used a W3Schools Test DB
SELECT DATE_ADD(BirthDate, INTERVAL 10 DAY) FROM Employees WHERE FirstName NOT IN (Select FirstName FROM Employees WHERE FirstName LIKE 'N%')
This query shows all the birth dates + 10 days except for the only employee with name starting with N (Nancy)

Accumulative Update for previous records

I have table that shows these information
Month NewClients OnHoldClients
5-2017 10 2
6-2017 16 4
7-2017 11 1
8-2017 15 6
9-2017 18 7
I am trying to find the accumulative total for each month
which is
(NewClients - OnHoldClients) + Previous Month Total
Something like this
Month NewClients OnHoldClients Total
5-2017 10 2 8
6-2017 16 4 20
7-2017 11 1 30
8-2017 15 6 39
9-2017 18 7 50
the query i tried to build was something like this but I think should be an easier way to do that
UPDATE MyTable
SET Total = (SELECT TOP 1 Total FROM MyTable B WHERE B.Month < A.Month) + NewClients - OnHoldClients
FROM MyTable A
Before we begin, note the mere fact that you're facing such calculative problem is a symptom that maybe you don't have the best possible design. Normally for this purpose calculated values are being stored along the way as the records are inserted. So i'd say you'd better have a total field to begin with and calculate it as records amass.
Now let's get down to the problem at hand. i composed a query which does that nicely but it's a bit verbose due to recursive nature of the problem. However, it yields the exact expected result:
DECLARE #dmin AS date = (SELECT min(mt.[Month]) from dbo.MyTable mt);
;WITH cte(_Month, _Total) AS (
SELECT mt.[Month] AS _Month, (mt.NewClients - mt.OnHoldClients) AS _Total
FROM dbo.MyTable mt
WHERE mt.[Month] = #dmin
UNION ALL
SELECT mt.[Month] AS _Month, ((mt.NewClients - mt.OnHoldClients) + ccc._Total) AS _Total
FROM dbo.MyTable mt
CROSS APPLY (SELECT cc._Total FROM (SELECT c._Total,
CAST((row_number() OVER (ORDER BY c._Month DESC)) AS int) as _Rank
FROM cte c WHERE c._Month < mt.[Month]) as cc
WHERE cc._Rank = 1) AS ccc
WHERE mt.[Month] > #dmin
)
SELECT c._Month, max(c._Total) AS Total
FROM cte c
GROUP BY c._Month
It is a recursive CTE structure that goes about each record all along the way to the initial month and adds up to the final Total value. This query only includes Month and Total fields but you can easily add the other 2 to the list of projection.
Try this
;WITH CTE([Month],NewClients,OnHoldClients)
AS
(
SELECT '5-2017',10,2 UNION ALL
SELECT '6-2017',16,4 UNION ALL
SELECT '7-2017',11,1 UNION ALL
SELECT '8-2017',15,6 UNION ALL
SELECT '9-2017',18,7
)
SELECT [Month],
NewClients,
OnHoldClients,
SUM(MonthTotal)OVER( ORDER BY [Month]) AS Total
FROM
(
SELECT [Month],
NewClients,
OnHoldClients,
SUM(NewClients-OnHoldClients)OVER(PArtition by [Month] Order by [Month]) AS MonthTotal
FROM CTE
)dt
Result,Demo:http://rextester.com/DKLG54359
Month NewClients OnHoldClients Total
--------------------------------------------
5-2017 10 2 8
6-2017 16 4 20
7-2017 11 1 30
8-2017 15 6 39
9-2017 18 7 50

reset window function when the time gap is over one hour

I have a dataset already sorted by a window function in sql:
ROW_NUMBER() OVER (PARTITION BY LOAN_NUMBER, CAST(CREATED_DATE AS DATE) ORDER BY LOAN_NUMBER, CREATED_DATE) AS ROW_IDX
shown as above. I wonder if there's a way that reset the ROW_IDX when the CREATED_DATE has begun to have a value with over one hour gap to the minimum datetime in a specific day.
For example, the row index for row 3 should be 1 because the time gap between 2016-11-03 15:39:16.000 and 2016-11-03 12:44:11.000 is over one hour.And row index of row 4 will be 2.
I've tried several ways to manipulate the datatime column, since the consideration is about 'gap' instead of moments of the day, no rounding methods worked perfectly.
Are mean ,when the gap more than 60 minutes, will restart at 1?
Which version are you use? If it is SQL Server 2012+, you can try this.
The following query is not satisfying, but wish can give you help.
Calculating the diff minutes between continuous two line.
Check the diff minutes whether greater than one hour
Get row number base on the gap time has same situation continuously.
Sorry if I can not describe clear. My english is not well.
;WITH tb(RptDate,ISSUE_ID,ACCOUNT,CREATED_DATE )AS(
select '2017-01-17','35775','76505156','2016-11-03 12:44:11.000' UNION
select '2017-01-17','35793','76505156','2016-11-03 12:51:43.000' UNION
-- select '2017-01-17','35793','76505156','2016-11-03 13:47:43.000' UNION
-- select '2017-01-17','35793','76505156','2016-11-03 14:45:43.000' UNION
select '2017-01-17','36097','76505156','2016-11-03 15:39:16.000' UNION
select '2017-01-17','36132','76505156','2016-11-03 15:52:51.000' UNION
select '2017-01-17','41391','76505156','2016-11-10 10:49:30.000'
)
SELECT *,ROW_NUMBER()OVER(PARTITION BY tt.ACCOUNT,a ORDER BY tt.ACCOUNT, rn) AS ROW_IDX FROM (
SELECT * ,rn-ROW_NUMBER () OVER (PARTITION BY ACCOUNT, CAST(CREATED_DATE AS DATE),n ORDER BY rn) AS a
FROM (
SELECT *, ROW_NUMBER()OVER(PARTITION BY ACCOUNT ORDER BY CREATED_DATE) AS rn
,CASE WHEN DATEDIFF(MINUTE, LAG(CREATED_DATE)OVER(PARTITION BY ACCOUNT ORDER BY CREATED_DATE),tb.CREATED_DATE)>60 THEN 1 ELSE 0 END AS n
,ISNULL(DATEDIFF(MINUTE, LAG(CREATED_DATE)OVER(PARTITION BY ACCOUNT ORDER BY CREATED_DATE),tb.CREATED_DATE),0) AS DiffMin
FROM tb
) AS t
) AS tt
ORDER BY rn
RptDate ISSUE_ID ACCOUNT CREATED_DATE rn n DiffMin a ROW_IDX
---------- -------- -------- ----------------------- -------------------- ----------- ----------- -------------------- --------------------
2017-01-17 35775 76505156 2016-11-03 12:44:11.000 1 0 0 0 1
2017-01-17 35793 76505156 2016-11-03 12:51:43.000 2 0 7 0 2
2017-01-17 36097 76505156 2016-11-03 15:39:16.000 3 1 168 2 1
2017-01-17 36132 76505156 2016-11-03 15:52:51.000 4 0 13 1 1
2017-01-17 41391 76505156 2016-11-10 10:49:30.000 5 1 9777 4 1
It is another script,Do not use the LAG function, Each step has a statement:
;WITH tb(RptDate,ISSUE_ID,ACCOUNT,CREATED_DATE )AS(
select '2017-01-17','35775','76505156','2016-11-03 12:44:11.000' UNION
select '2017-01-17','35793','76505156','2016-11-03 12:51:43.000' UNION
-- select '2017-01-17','35793','76505156','2016-11-03 13:47:43.000' UNION
-- select '2017-01-17','35793','76505156','2016-11-03 14:45:43.000' UNION
select '2017-01-17','36097','76505156','2016-11-03 15:39:16.000' UNION
select '2017-01-17','36132','76505156','2016-11-03 15:52:51.000' UNION
select '2017-01-17','41391','76505156','2016-11-10 10:49:30.000'
),t1 AS(
SELECT *, ROW_NUMBER()OVER(PARTITION BY ACCOUNT ORDER BY CREATED_DATE) AS rn FROM tb
),t2 AS (
SELECT t1.*,CASE WHEN DATEDIFF(MINUTE,tt.CREATED_DATE,t1.CREATED_DATE)>60 THEN 1 ELSE 0 END AS m
,t1.rn-ROW_NUMBER()OVER(PARTITION BY t1.ACCOUNT,CASE WHEN DATEDIFF(MINUTE,tt.CREATED_DATE,t1.CREATED_DATE)>60 THEN 1 ELSE 0 END ORDER BY t1.CREATED_DATE) AS a
FROM t1 LEFT JOIN t1 AS tt ON tt.ACCOUNT=t1.ACCOUNT AND tt.rn=t1.rn-1
),t3 AS(
SELECT *,ROW_NUMBER()OVER(PARTITION BY ACCOUNT,t2.a ORDER BY CREATED_DATE) AS ROW_IDX
FROM t2
)
SELECT * FROM t3
ORDER BY t3.ACCOUNT,t3.CREATED_DATE

Resources