T SQL - Count People with Visits in 3 consecutive months - sql-server

With the following data:
Declare #t Table
(
Name Varchar(1),
VisitDate Date
)
Insert Into #t select 'A','2017-01-05'
Insert Into #t select 'A','2017-03-05'
Insert Into #t select 'A','2017-04-05'
Insert Into #t select 'A','2017-05-05'
Insert Into #t select 'A','2017-08-05'
Insert Into #t select 'B','2017-03-05'
Insert Into #t select 'C','2017-01-05'
Insert Into #t select 'C','2017-02-05'
Insert Into #t select 'C','2017-04-05'
Insert Into #t select 'D','2017-01-05'
Insert Into #t select 'D','2017-02-05'
Insert Into #t select 'D','2017-03-05'
Insert Into #t select 'D','2017-06-05'
Insert Into #t select 'B','2018-01-05'
Insert Into #t select 'B','2018-02-05'
Insert Into #t select 'B','2018-03-05'
Insert Into #t select 'E','2018-01-05'
Insert Into #t select 'E','2018-02-05'
Insert Into #t select 'E','2018-03-05'
Insert Into #t select 'E','2018-06-05'
I need to write a query that will return the Year & Names that have VisitDates in any three consecutive months in any year.
Based on the data, I expect to see:
2017 A
2017 D
2018 B
2018 E
To be honest, I don't know where to start with this using SQL.
I would appreciate any help I can get.
Thanks!!

You can avoid the joins, or parsing the whole data set multiple times, by using the same method as used in gaps-and-islands.
http://rextester.com/SYHJ40676
WITH
sequenced AS
(
SELECT
Name,
YEAR(VisitDate) AS VisitYear,
MONTH(VisitDate) AS VisitMonth,
ROW_NUMBER()
OVER (PARTITION BY Name, YEAR(VisitDate)
ORDER BY MONTH(VisitDate)
)
AS MonthSequenceID
FROM
#t
GROUP BY
Name,
YEAR(VisitDate),
MONTH(VisitDate)
)
SELECT DISTINCT
Name,
VisitYear
FROM
sequenced
GROUP BY
Name,
VisitYear,
VisitMonth - MonthSequenceID
HAVING
COUNT(*) >= 3

just join the two following months to the data and see where it goes:
SELECT DATEPART(year, m1.VisitDate), m1.Name
FROM #t m1
JOIN #t m2 on m2.Name = m1.Name AND DATEPART(month, m2.VisitDate) = DATEPART(month, m1.VisitDate) + 1
JOIN #t m3 on m3.Name = m1.Name AND DATEPART(month, m3.VisitDate) = DATEPART(month, m1.VisitDate) + 2
since it was asked in the comment, how to solve this problem with a year overlap, this should work:
SELECT DATEPART(year, m1.VisitDate), m1.Name
FROM #t m1
JOIN #t m2 on m2.Name = m1.Name AND EOMONTH(m1.VisitDate,1) = EOMONTH(m2.VisitDate)
JOIN #t m3 on m3.Name = m1.Name AND EOMONTH(m1.VisitDate,2) = EOMONTH(m3.VisitDate)
doc on EOMONTH: https://learn.microsoft.com/en-us/sql/t-sql/functions/eomonth-transact-sql?view=sql-server-2017
edit: my answer is just a quick hack and highly inperformant and has errors when there are multiple instances per month.
I suggest using this answer: https://stackoverflow.com/a/52669713/4903754

wrote my code as per the syntax of postgres SQL 9.5.0
first I have created the flag for consecutive months and by using that flag retrieved the required data.lag(),lead()
We need to compare the dates weather they are in consecutive or not for that I'm using lag(),lead() functions.
with temp as (
select name,visitdate,
coalesce(lag(visitdate) over (partition by name order by visitdate),lead(visitdate) over (partition by name order by visitdate))check1,
coalesce(lead(visitdate) over (partition by name order by visitdate),lag(visitdate) over (partition by name order by visitdate)) check2
from TT
order by 1
),
t2 as (
select name,
case
when
(DATE_PART('year', visitdate::date) - DATE_PART('year', check1::date)) * 12 +
(DATE_PART('month', visitdate::date) - DATE_PART('month', check1::date))=1
or
(DATE_PART('year', check2::date) - DATE_PART('year', visitdate::date)) * 12 +
(DATE_PART('month', check2::date) - DATE_PART('month', visitdate::date))=1
then 1 else 0
end as flag
from temp)
select name ,count(1) from t2 where flag=1 group by name having count(1)>=3

Related

Sum values if they are between date range sql

I want to sum values where date is between de creationdate and endDate,, hence ValueEnd.
For instances the second row, the creationDate is the same as the endDate, so I have to sum the ValuePerDay of this day to the previsou value. So in the column ValueEnd it is 3.4+1.17 = 4.57
I started by calculating the sum from the days where de Difference is 1, like this:
SELECT
CONVERT(CHAR(10), CreationDate,103) CreationDate
,CONVERT(CHAR(10), EndDate,103) EndDate
,SUM(Values_an) Values_an
FROM Dat1
WHERE Difference=1
GROUP BY CONVERT(CHAR(10), CreationDate,103), CONVERT(CHAR(10), EndDate,103), Difference
However, I'm having trouble sum the values where the difference if higher than 1. Can someone help me please?
OK, judging by the provided information - and as far as I understood everything right - the following approach might solve your problem:
DECLARE #t TABLE(
CreationDate date,
EndDate date,
Value_An decimal(19,4)
)
INSERT INTO #t VALUES
('2019-03-01', '2019-03-01', 3.4)
,('2019-03-01', '2019-03-03', 3.5)
,('2019-05-01', '2019-05-01', 3.6)
,('2019-06-01', '2019-06-04', 3.7)
;WITH cteMultiRow AS(
SELECT CreationDate, COUNT(*) cntRows
FROM #t
GROUP BY CreationDate
HAVING COUNT(*) > 1
),
cte AS(
SELECT t.*
,ROW_NUMBER() OVER (PARTITION BY t.CreationDate ORDER BY t.EndDate) AS rn
,DATEDIFF(d, t.CreationDate, t.EndDate)+1 AS Difference
,CASE WHEN m.CreationDate IS NOT NULL THEN t.Value_An/(DATEDIFF(d, t.CreationDate, t.EndDate)+1) ELSE t.Value_An END AS ValuePerD
FROM #t t
LEFT JOIN cteMultiRow m ON t.CreationDate = m.CreationDate
),
cteSums AS(
SELECT c.CreationDate, SUM(c.ValuePerD) AS ValuePerD
FROM cte c
GROUP BY c.CreationDate
)
SELECT c.CreationDate, c.EndDate, c.Value_An, c.Difference, c.ValuePerD, ISNULL(s.ValuePerD, c.Value_An) AS ValueEnd
FROM cte c
LEFT JOIN cteSums s ON c.CreationDate = s.CreationDate AND c.rn = 1

Join for tally table. Need for each day each Cust_ID

Can you help to figure the way to produce that output table like on the pic below. This is part of membership/gap tricky processing I need. Could not figure out how to do this for EACH Cust_ID to have entry for each tally date.
Sample code: (* Last select need to be improved))
CREATE TABLE #test
(
Cust_ID VARCHAR(14),
Contr_ID INT,
ENR_START DATE,
ENR_END DATE
)
INSERT INTO #test
VALUES (1, 1, '2018-1-2', '2018-01-5'),
(1, 2, '2018-01-7', '2018-1-8'),
(2, 1, '2018-01-6', '2019-1-10') ----- select * from #test
SELECT TOP (DATEDIFF(DAY, #Period_Start, #Period_End + 1)) ----- create tally
DATEADD(dd, ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) - 1, #Period_Start) dt
INTO
#c -- select * from #c -- 10 days
FROM
master..spt_values
SELECT
t.*, c.dt
FROM
#c c
LEFT JOIN
#test t ON c.dt BETWEEN t.ENR_START AND t.ENR_END
ORDER BY
1, 5
I think this is the logic that you want:
select cu.cust_id, c.dt, t.*
from (select distinct cust_id from test) cu join
c
on c.dt between '2018-01-01' and '2018-01-10' left join
test t
on t.cust_id = cu.cust_id and c.dt between enr_start and enr_end
order by cu.cust_id, c.dt;
The idea is to generate all cust_id/date combinations and then left join to the original data to get any matches.
Here is a db<>fiddle.

How to reference the current column you are defining using lag?

I have a salary table like this:
declare #t table (OrderedID int, EmpID int, EffDate date, Salary money)
insert into #t
values
(1,1234,'20150101',100)
,(2,1234,'20160101',100)
,(3,1234,'20170101',100)
,(4,1234,'20180101',300)
,(1,2351,'20150101',100)
I am trying to get an initial effective date on each row:
First 3 rows have 1/1/2015
4th row has new value 1/1/2018
Here is what I tried with a case and a lag but i can't figure out how to reference the prior value of the column I am creating.
case when OrderedID = 1 then EFFDaTe
when Salary != LAG(Salary,1) then EFFDaTe
else lag(SalaryEFFDT,1) over (order by 1)
end as SalaryEFFDT
Thanks for your help.
As you haven't provided the expected output, I think this is what you want:
declare #t table (OrderedID int, EmpID int, EffDate date, Salary money)
insert into #t
values
(1,1234,'20150101',100)
,(2,1234,'20160101',100)
,(3,1234,'20170101',100)
,(4,1234,'20180101',300)
,(1,2351,'20150101',100)
,(5,1234,'20190101',100)
;with cte as
(Select *, OrderedId - Row_Number() over (partition by EmpId,Salary order by OrderedID) as grp
from #t)
, cte1 as
(Select EmpID, grp, min(effDate) as effDate from cte c group by EmpID, grp)
Select OrderedID, t.EmpID, t.EffDate, t.Salary, c.effDate as computeddate
from cte t join cte1 c on t.EmpID = c.EmpID and t.grp = c.grp
order by OrderedID
So you are trying to get the first effective date for each EmpID? the code below should do that. If that is not your desired output can you put what the output should look like?
declare #t table (OrderedID int, EmpID int, EffDate date, Salary money)
insert into #t
values
(1,1234,'20150101',100)
,(2,1234,'20160101',100)
,(3,1234,'20170101',100)
,(4,1234,'20180101',300)
,(1,2351,'20140101',100)
,(2,2351,'20150101',100)
Select
T.*,FE.FirstEff
From #t T
inner join (Select EmpID,MIN(EffDate) as FirstEff from #t group by
The second set is if you need the first time they have that salary, however you will have issues if someone gets a raise and then a demotion.
Select
T.*,FE.FirstEff
From #t T
inner join (Select EmpID,Salary,MIN(EffDate) as FirstEff from #t group by EmpID,Salary) FE on FE.EmpID = T.EmpID
and FE.Salary = T.Salary

sql query that gets the difference between 2 recent rows for every row item that occurs more than once in a table

Sql query that gets the difference between 2 recent rows for every value that occurs more than once in a table.
for example
book value date
A 4 2017-07-17 09:16:44.480
A 2 2017-08-15 10:05:58.273
B 3 2017-04-15 10:05:58.273
C 2 2017-08-15 10:05:58.273
B 3 2017-04-13 10:05:58.273
B 3 2017-04-12 10:05:58.273
should return
A 2
B 0
Here is a solution:
SELECT book, MAX(value) - MIN(value) AS difference FROM (
SELECT book, value, ROW_NUMBER() OVER (PARTITION BY book ORDER BY date DESC) AS rownum FROM t
) AS a WHERE rownum <= 2 GROUP BY book HAVING MAX(rownum) >= 2
And here it is in SQLFiddle
SELECT id_pk FROM [table] GROUP BY [fields you whant to compare by] HAVING COUNT(*) > 1)
this select returns you the list of pk from element that are repited
so, in other select you migth get another Select like
Select * from [table] where id_pk in(
SELECT id_pk FROM [table] GROUP BY [fields you whant to compare by] HAVING COUNT(*) > 1)) limit 2
this is functional, still not good as i'm not analising complexity.
Add a rownumber before calculating:
create table #test ([book] char(1), [value] int, [date] datetime)
insert into #test values ('A', 4, '2017-07-17 09:16:44.480')
insert into #test values ('A', 2, '2017-08-15 10:05:58.273')
insert into #test values ('B', 3, '2017-04-15 10:05:58.273')
insert into #test values ('C', 2, '2017-08-15 10:05:58.273')
insert into #test values ('B', 3, '2017-04-13 10:05:58.273')
insert into #test values ('B', 3, '2017-04-12 10:05:58.273')
;with cte as(
Select ROW_NUMBER () OVER (order by [book], [date] ) as rownumber, *
from #test)
select distinct [1].book, abs(first_value([1].[Value]) over (partition by [1].book order by [1].rownumber desc) - [2].val2) as [Difference]
from cte [1]
inner join
(select rownumber, book, first_value([Value]) over (partition by book order by rownumber desc) as val2
from cte) [2] on [1].book = [2].book and [1].rownumber < [2].rownumber
I would use analytic functions:
;with CTE as (
SELECT book
,value
,LAG(value) OVER (PARTITION BY book ORDER BY date) last_value
,ROW_NUMBER() OVER (PARTITION BY book ORDER BY date DESC) rn
FROM MyTable
)
SELECT book
,value - last_value as value_change
FROM CTE
WHERE rn = 1
AND last_value IS NOT NULL
LAG() was added in SQL Server 2012, but even if you're on a higher version, your database must have the compatibility version set to 110 or higher for them to be available. Here's an alternative that should work on SQL Server 2005 or higher, or a database compatibility 90 or higher.
;with CTE as (
SELECT book
,value
,ROW_NUMBER() OVER (PARTITION BY book ORDER BY date DESC) rn
FROM MyTable
)
SELECT c1.book
c1.value - c2.value as value_change
FROM CTE c1
INNER JOIN CTE c2
ON c1.book = c2.book
WHERE c1.rn = 1
AND c2.rn = 2

Backfill dates using CTE in SQL Server

I have a table with IDs and with null startdate. I am trying to back fill them using CTE.
If the EndDate is '2011-12-31' for Record#1, the StartDate for Record#2 is EndDate from Record#1 + 1 day i.e. '2012-01-01'
Create table dbo.input(inputid int null,startDate smalldatetime null,endDate smalldatetime null)
insert into dbo.input values(111,null,'2011-05-31')
insert into dbo.input values(111,null,'2012-05-31')
insert into dbo.input values(111,null,'2013-05-31')
insert into dbo.input values(111,null,'2014-05-31')
insert into dbo.input values(111,null,'2015-05-31')
insert into dbo.input values(222,null,'2010-06-30')
insert into dbo.input values(222,null,'2011-06-30')
insert into dbo.input values(222,null,'2012-06-30')
insert into dbo.input values(222,null,'2013-06-30')
insert into dbo.input values(222,null,'2014-06-30')
This is the expected output.
Create table dbo.output(outputid int null,startDate smalldatetime null,endDate smalldatetime null)
insert into dbo.output(111,null,'2011-05-31')
insert into dbo.output(111,'2011-06-01','2012-05-31')
insert into dbo.output(111,'2012-05-31','2013-05-31')
insert into dbo.output(111,'2013-05-31','2014-05-31')
insert into dbo.output(111,'2014-05-31','2015-05-31')
insert into dbo.output(222,null,'2010-06-30')
insert into dbo.output(222,'2010-06-30','2011-06-30')
insert into dbo.output(222,'2011-06-30','2012-06-30')
insert into dbo.output(222,'2012-06-30','2013-06-30')
insert into dbo.output(222,'2013-06-30','2014-06-30')
This is what I tried
WITH CTE AS (
SELECT
rn = ROW_NUMBER() OVER (partition by p.inputid ORDER BY p.inputid,endDate),
p.inputid
,EndDate
FROM dbo.input p
)
SELECT distinct
DATEADD(day,1,prev.enddate) as startd,cte.inputid,cte.endDate
FROM CTE
LEFT JOIN CTE prev ON prev.rn = CTE.rn - 1
LEFT JOIN CTE nex ON nex.rn = CTE.rn + 1
order by cte.inputid,startd
It is not grouping by inputid properly and I do not know how to fix this?
Any help is appreciated.
Thanks
MR
Adjusting your existing query, but results are not exactly same as your expected. Should StartDate be next day of previous EndDate? (not same day as the previous EndDate.)
;WITH cte AS (SELECT
rn = ROW_NUMBER() OVER (partition by inputid ORDER BY endDate)
, *
FROM #input
)
SELECT a.inputid, DATEADD(DD, 1, b.endDate) as startDate , a.endDate
FROM CTE a
LEFT JOIN CTE b
ON a.inputid = b.inputid and a.rn = b.rn + 1
ORDER BY a.inputid, a.startDate;
As long as you're using SQL Server 2012 or later, you can use the following...
SELECT
i.inputid,
startDate = DATEADD(dd,
CASE WHEN ROW_NUMBER() OVER (ORDER BY i.inputid, i.endDate) = 2 THEN 1 ELSE 0 END,
LAG(i.endDate, 1) OVER (PARTITION BY i.inputid ORDER BY i.endDate)
),
i.endDate
FROM
#input i;

Resources