To remove duplication of data if within 7 days - sql-server

Following is my table and sample data
DECLARE #Employee_Log table(ID int,eid int, ecode varchar(100), emp_startdate date)
INSERT INTO #Employee_Log
SELECT 1, 1, 'aaa','2019-01-01'
UNION ALL
SELECT 2, 1, 'aaa','2019-01-05'
UNION ALL
SELECT 3, 1, 'bbb','2019-01-03'
UNION ALL
SELECT 4, 2, 'aaa','2019-01-03'
UNION ALL
SELECT 5, 1, 'aaa','2019-02-01'
UNION ALL
SELECT 6, 1, 'aaa','2019-02-15'
UNION ALL
SELECT 7, 1, 'aaa','2019-02-19'
UNION ALL
SELECT 8, 1, 'aaa','2019-02-28'
In the above data I want to remove the duplication based on eid and ecode .If the emp_startdate are within 7 days then take the latest data and ignore the rest data.
I tried the following code but how to add the condition check for week range
SELECT
ROW_NUMBER() OVER(PARTITION BY eid,ecode ORDER BY emp_startdate desc) as rownum,
ID,eid,ecode,emp_startdate
FROM #Employee_Log
I want the result as shown below
ID eid ecode emp_startdate
2 1 aaa 2019-01-05
5 1 aaa 2019-02-01
4 2 aaa 2019-01-03
7 1 aaa 2019-02-19
8 1 aaa 2019-02-28
3 1 bbb 2019-01-03

I am still not sure what you want to happen if more than 2 events happen in the same 7 days. But this solution will get the latest date of all series of dates where the difference between dates is 7 days or less.
select ID,eid,ecode,emp_startdate
from
(
select ID,
eid,
ecode,
emp_startdate,
datediff(day
,emp_startdate
,lead(emp_startdate)
over
(partition by eid,ecode order by emp_startdate)) l
from #Employee_Log
) a
where l is null or l>7
ID eid ecode emp_startdate
-- --- ----- -------------
3 1 bbb 2019-01-03
2 1 aaa 2019-01-05
5 1 aaa 2019-02-01
7 1 aaa 2019-02-19
8 1 aaa 2019-02-28
4 2 aaa 2019-01-03

The following query will give you what you have asked to get in plain English in your question but your sample data and desired output contradicts your own question:
SELECT *
FROM
(
SELECT
ROW_NUMBER() OVER (PARTITION BY eid , ecode , YEAR(emp_startdate)
, DATEPART(WEEK, emp_startdate)
ORDER BY emp_startdate DESC
) AS rownum
, ID
, eid
, ecode
, emp_startdate
FROM #Employee_Log
) x
WHERE x.rownum = 1;

Related

Filter table to show only most recent values [duplicate]

This question already has answers here:
Get top 1 row of each group
(19 answers)
Closed 11 months ago.
I have a table that looks like this.
Category
Type
fromDate
Value
1
1
1/1/2022
5
1
2
1/1/2022
10
2
1
1/1/2022
7.5
2
2
1/1/2022
15
3
1
1/1/2022
3.5
3
2
1/1/2022
5
3
1
4/1/2022
5
3
2
4/1/2022
10
I'm trying to filter this table down to filter down and keep the most recent grouping of Category/Type. IE rows 5 and 6 would be removed in the query since they are older records.
So far I have the below query but I am getting an aggregate error due to not aggregating the "Value" column. My question is how do I get around this without aggregating? I want to keep the actual value that is in the column.
SELECT T1.Category, T1.Type, T2.maxDate, T1.Value
FROM (SELECT Category, Type, MAX(fromDate) AS maxDate
FROM Table GROUP BY Category,Type) T2
INNER JOIN Table T1 ON T1.Category=T2.Category
GROUP BY T1.Category, T1.Type, T2.MaxDate
This has been asked and answered dozens and dozens of times. But it was quick and painless to type up an answer. This should work for you.
declare #MyTable table
(
Category int
, Type int
, fromDate date
, Value decimal(5,2)
)
insert #MyTable
select 1, 1, '1/1/2022', 5 union all
select 1, 2, '1/1/2022', 10 union all
select 2, 1, '1/1/2022', 7.5 union all
select 2, 2, '1/1/2022', 15 union all
select 3, 1, '1/1/2022', 3.5 union all
select 3, 2, '1/1/2022', 5 union all
select 3, 1, '4/1/2022', 5 union all
select 3, 2, '4/1/2022', 10
select Category
, Type
, fromDate
, Value
from
(
select *
, RowNum = ROW_NUMBER() over(partition by Category, Type order by fromDate desc)
from #MyTable
) x
where x.RowNum = 1
order by x.Category
, x.Type

T-SQL - 3 month moving sum - preceding null values

Using SQL Server 2016. I have the following data table (sample)
Target Date Total
-----------------
2018-01-24 1
2018-02-28 1
2018-03-02 1
2018-03-08 1
2018-03-15 1
2018-03-30 1
2018-04-16 1
2018-04-18 1
2018-04-30 1
I would like to get to get a 3 month moving sum (grouping is by month):
Target Date Total_Sum
-----------------------
2018-01-01 1
2018-02-01 2
2018-03-01 6
2018-04-01 8
Ok, this should get the answer you want. Firstly you need to total the value your months, then you can do a running total for the last 3 months:
CREATE TABLE SampleTable (TargetDate date, Total int);
GO
INSERT INTO SampleTable
VALUES ('20180124', 1),
('20180228', 1),
('20180302', 1),
('20180308', 1),
('20180315', 1),
('20180330', 1),
('20180416', 1),
('20180418', 1),
('20180430', 1);
GO
SELECT *
FROM SampleTable;
GO
WITH Months AS (
SELECT DATEADD(MONTH,DATEDIFF(MONTH, 0, TargetDate),0) AS TargetMonth, SUM(Total) AS MonthTotal
FROM SampleTable
GROUP BY DATEADD(MONTH,DATEDIFF(MONTH, 0, TargetDate),0))
SELECT TargetMonth,
SUM(MonthTotal) OVER (ORDER BY TargetMonth ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS Last3Months
FROM Months;
GO
DROP TABLE SampleTable;
GO
Pls try the below code
;WITH CTE(TargetDate,Total)
AS
(
SELECT '2018-01-24', 1 UNION ALL
SELECT '2018-02-28', 1 UNION ALL
SELECT '2018-03-02', 1 UNION ALL
SELECT '2018-03-08', 1 UNION ALL
SELECT '2018-03-15', 1 UNION ALL
SELECT '2018-03-30', 1 UNION ALL
SELECT '2018-04-16', 1 UNION ALL
SELECT '2018-04-18', 1 UNION ALL
SELECT '2018-04-30', 1
)
SELECT STUFF(TargetDate,9,2,'01') AS TargetDate
,Total_Sum
FROM
(
SELECT TargetDate,Total_Sum
,ROW_NUMBER()OVER(PARTITION BY Total_Sum ORDER BY TargetDate) AS Seq
FROM
(
SELECT TargetDate
,SUM(Total )OVER(ORDER BY MONTH(TargetDate) ) AS Total_Sum
FROM CTE
)dt
)fnl
WHERE Seq=1
Result
TargetDate Total_Sum
---------------------
2018-01-01 1
2018-02-01 2
2018-03-01 6
2018-04-01 9

Finding the Datediff between Records in same Table

IP QID ScanDate Rank
101.110.32.80 6 2016-09-28 18:33:21.000 3
101.110.32.80 6 2016-08-28 18:33:21.000 2
101.110.32.80 6 2016-05-30 00:30:33.000 1
I have a Table with certain records, grouped by Ipaddress and QID.. My requirement is to find out which record missed the sequence in the date column or other words the date difference is more than 30 days. In the above table date diff between rank 1 and rank 2 is more than 30 days.So, i should flag the rank 2 record.
You can use LAG in Sql 2012+
declare #Tbl Table (Ip VARCHAR(50), QID INT, ScanDate DATETIME,[Rank] INT)
INSERT INTO #Tbl
VALUES
('101.110.32.80', 6, '2016-09-28 18:33:21.000', 3),
('101.110.32.80', 6, '2016-08-28 18:33:21.000', 2),
('101.110.32.80', 6, '2016-05-30 00:30:33.000', 1)
;WITH Result
AS
(
SELECT
T.Ip ,
T.QID ,
T.ScanDate ,
T.[Rank],
LAG(T.[Rank]) OVER (ORDER BY T.[Rank]) PrivSRank,
LAG(T.ScanDate) OVER (ORDER BY T.[Rank]) PrivScanDate
FROM
#Tbl T
)
SELECT
R.Ip ,
R.QID ,
R.ScanDate ,
R.Rank ,
R.PrivScanDate,
IIF(DATEDIFF(DAY, R.PrivScanDate, R.ScanDate) > 30, 'This is greater than 30 day. Rank ' + CAST(R.PrivSRank AS VARCHAR(10)), '') CFlag
FROM
Result R
Result:
Ip QID ScanDate Rank CFlag
------------------------ ----------- ----------------------- ----------- --------------------------------------------
101.110.32.80 6 2016-05-30 00:30:33.000 1
101.110.32.80 6 2016-08-28 18:33:21.000 2 This is greater than 30 day. Rank 1
101.110.32.80 6 2016-09-28 18:33:21.000 3 This is greater than 30 day. Rank 2
While Window Functions could be used here, I think a self join might be more straight forward and easier to understand:
SELECT
t1.IP,
t1.QID,
t1.Rank,
t1.ScanDate as endScanDate,
t2.ScanDate as beginScanDate,
datediff(day, t2.scandate, t1.scandate) as scanDateDays
FROM
table as t1
INNER JOIN table as t2 ON
t1.ip = t2.ip
t1.rank - 1 = t2.rank --get the record from t2 and is one less in rank
WHERE datediff(day, t2.scandate, t1.scandate) > 30 --only records greater than 30 days
It's pretty self-explanatory. We are joining the table to itself and joining the ranks together where rank 2 gets joined to rank 1, rank 3 gets joined to rank 2, and so on. Then we just test for records that are greater than 30 days using the datediff function.
I would use windowed function to avoid self join which in many case will perform better.
WITH cte
AS (
SELECT
t.IP
, t.QID
, LAG(t.ScanDate) OVER (PARTITION BY t.IP ORDER BY T.ScanDate) AS beginScanDate
, t.ScanDate AS endScanDate
, DATEDIFF(DAY,
LAG(t.ScanDate) OVER (PARTITION BY t.IP ORDER BY t.ScanDate),
t.ScanDate) AS Diff
FROM
MyTable AS t
)
SELECT
*
FROM
cte c
WHERE
Diff > 30;

IF statment in SQL

I have a table_changes (Id,stard_date,end_date) and I want to add two columns rank_end_date and new_end_date.
The problem I have in my data is that not always there is continuousness (in the month level, the day in the month is not in my intrest) between end_date and the start_date coming just after it (see example 1) so I need to "strech" end_date in some cases so there will be continuousness at the level of the month.
For example 1, the new_end_date is 1/2/2015 and doesn't have to be 28/2/2015. If the end_date in rank 1 is sooner than 31/12/2015 strech it to 31/12/9999.
Some Examples:
Ex1:
Id --start date --end_date --rank_end_date new_end_date
111 01/01/1970 1/1/1980 2 1/2/2015
111 01/03/2015 31/12/9999 1 31/12/9999
Ex2:
Id --start_date --end_date --rank_end_date new_end_date
111 01/01/1970 1/1/1980 1 31/12/9999
Ex3:
Id --start_date --end_date --rank_end_date new_end_date
111 01/01/1970 1/1/1980 2 01/05/1990
111 01/05/1990 31/12/1995 1 31/12/9999
Ex4:
Id --start_date --end_date --rank__end_date new_end_date
111 01/03/2015 31/12/9999 1 31/12/9999
Ex5:
Id --start_Date --end_date --rank__end_date new_end_date
111 01/02/2015 31/5/2015 2 01/5/2015
111 01/06/2015 31/12/9999 1 31/12/9999
the syntax should be something like this but I don't know how to write those IF statements in SQL:
if rank_end_date ==2 then new_end_date == 1/Month(start_date(rank_end_date - 1)) - 1 /2015
if rank_end_date ==1 then new_end_date == 31/12/2015
else new_end_date = end_date
Select [Id],[StartDate],[EndDate],
Rank_End_Date, case
when t.Rank_End_Date = (2) **then
CAST(CAST(Year([StartDate]) AS varchar) + '-' + CAST(Month([StartDate]) AS varchar) + '-' +
--How to do I choose the Start_Date from the record with Rank==1? It is selecting
the start date from the record with rank==2 ofcourse.
CAST(Day ([EMER_StartDate]) AS varchar) AS DATE)
when t.Rank_End_Date = (1) then '9999-12-31'
else t.[EMER_EndDate] end As New_End_Date
from (
Select [Id],[StartDate],[EndDate],
Rank() OVER (PARTITION BY [Id] order by [EndDate] desc) as Rank_End_Date
from [dbo].[Changes]
) t
Could anybody help in achieving the result?
If I've understood your question right, and you can only have values in rank_end_date of 1 or 2 then something like this query should give you the answer you're looking for. Either way, the LEAD (or LAG function if you sort the records ascending) will allow you to fetch the value from a different record.
SELECT ID
, start_date
, end_date
, rank_end_date
, CASE WHEN rank_end_date = 1 THEN
CASE WHEN end_date < '31/12/2005' THEN '31/12/9999' ELSE end_date END
WHEN rank_end_date = 2 THEN LEAD(start_date,1) OVER(ORDER BY ID, rank_end_date DESC)
END AS new_end_date
FROM dbo.Changes
You can't use LEAD OR LAG functions in SQL Server 2008, so you can try this solution.
with CTE as
(
Select [Id] as ID,[StartDate] as StartDate,[EndDate] as EndDate,
ROW_NUMBER() OVER (PARTITION BY [Id] order by [StartDate] DESC) as rn_Start_Date
from [dbo].[Changes]
)
Select C1.[Id] , C1.[StartDate], C1.[EndDate], C1.rn_Start_Date as Rank_end_date,
ISNULL(DATEADD(MONTH, DATEDIFF(MONTH, 0, C2.[StartDate])-1, 0), cast('9999-12-31' as DATE)) As New_End_Date
From CTE C1
LEFT JOIN CTE C2 ON C1.[ID] = C2.[ID] AND C1.Rn_Start_Date = C2.Rn_Start_Date + 1

Hard time creating PIVOT in SQL Server

Hi all I am very much confused about how to implement PIVOT in this scenario
AccId Year Month AccType Value
225 2012 7 1 2
225 2012 7 2 0
225 2012 7 3 0
226 2012 7 1 3
226 2012 7 2 0
226 2012 7 3 0
The main problem I am not able to solve is AccId is joined with AccountTable and AccType is also joined with AccountType table I need output like this:
AccId AccName Year Month AccType AccTypeName Value AccType AccTypeName Value AccType AccTypeName Value
225 ABC 2012 7 1 AAA 2 2 BBB 0 3 CCC 0
226 ABC 2012 7 1 AAA 3 2 BBB 0 3 CCC 0
Any Help please. Thanks.
DECLARE #Table1 TABLE
(AccId int, Year int, Month int, AccType int, Value int)
;
INSERT INTO #Table1
(AccId, Year, Month, AccType, Value)
VALUES
(225, 2012, 7, 1, 2),
(225, 2012, 7, 2, 0),
(225, 2012, 7, 3, 0),
(226, 2012, 7, 1, 3),
(226, 2012, 7, 2, 0),
(226, 2012, 7, 3, 0)
;
DECLARE #Tabletype TABLE
(AccId int, Type Varchar(6))
INSERT INTO #Tabletype
(AccId,Type )values (225,'AAA'), (226,'BBB')
;
;WITH CTE AS (
Select AccId, Year, Month,1 AS Acct1,2 AS Acct2,3 AS Acct3,Type from (
select T.AccId, T.Year, T.Month,T.AccType,TT.Type,ROW_NUMBER()OVER(PARTITION BY T.AccId ORDER BY T.Year,T.month)RN from #Table1 T
INNER JOIN #Tabletype TT
ON T.AccId = TT.AccId)T
PIVOT(MAX(RN) FOR AccType IN([1],[2],[3]))PVT
GROUP BY AccId, Year, Month,pvt.Type)
, CTE2 AS (
Select AccId, Year, Month,[0] AS val1,[2] AS val2,[3] AS val3,Type from (
select T.AccId, T.Year, T.Month, T.Value,TT.Type,ROW_NUMBER()OVER(PARTITION BY T.AccId ORDER BY T.Year,T.month)RN from #Table1 T
INNER JOIN #Tabletype TT
ON T.AccId = TT.AccId)T
PIVOT(MAX(RN) FOR value IN([0],[2],[3]))PVTt
GROUP BY AccId, Year, Month,PVTt.[0],PVTt.[2],PVTt.[3],PVTt.Type)
select c.AccId,
c.Year,
C.Month,
c.Acct1,
c.Type,
ISNULL(cc.val1,0)val1,
C.Acct2,
c.Type,
ISNULL(CC.val2,0)val2,
C.Acct3,
c.Type,
ISNULL(CC.val3,0)val3 from CTE c
inner join CTE2 cc
on c.AccId = cc.AccId

Resources