Update column with 4 consecutive purchases - sql-server

I need to update my Result column values for the entire user to yes if the user did make 4 consecutive purchases without receiving a bonus in between. How can this be done. Please see my code below.....
-- drop table #Test
CREATE TABLE #Test (UserID int, TheType VARCHAR(10), TheDate DATETIME, Result VARCHAR(10))
INSERT INTO #Test
SELECT 1234, 'Bonus', GETDATE(), NULL
UNION
SELECT 1234, 'Purchase', GETDATE()-1, NULL
UNION
SELECT 1234, 'Purchase', GETDATE()-2, NULL
UNION
SELECT 1234, 'Purchase', GETDATE()-3, NULL
UNION
SELECT 1234, 'Purchase', GETDATE()-4, NULL
UNION
SELECT 1234, 'Bonus', GETDATE()-5, NULL
UNION
SELECT 1234, 'Purchase', GETDATE()-6, NULL
UNION
SELECT 1234, 'Bonus', GETDATE()-7, NULL
SELECT * FROM #Test ORDER BY TheDate
Again, please note that the purchases need to be consecutive (By TheDate)

You can as the below:
;WITH CTE1
AS
(
SELECT
ROW_NUMBER() OVER (ORDER BY TheDate) RowId,
ROW_NUMBER() OVER (PARTITION BY UserID,TheType ORDER BY TheDate) PurchaseRowId,
*
FROM #Test
), CTE2
AS
(
SELECT
MIN(A.RowId) MinId,
MAX(A.RowId) MaxId
FROM
CTE1 A
GROUP BY
A.TheType,
A.RowId - A.PurchaseRowId
)
SELECT
A.UserID ,
A.TheType ,
A.TheDate ,
CASE WHEN B.MinId IS NULL THEN NULL ELSE 'YES' END Result
FROM
CTE1 A LEFT JOIN
CTE2 B ON A.RowId >= B.MinId AND A.RowId <= B.MaxId AND (B.MaxId - B.MinId) > 2
--AND A.TheType = 'Purchase'
ORDER BY A.TheDate
Result:
UserID TheType TheDate Result
----------- ---------- ----------------------- - ------
1234 Bonus 2017-06-06 11:06:03.130 NULL
1234 Purchase 2017-06-07 11:06:03.130 NULL
1234 Bonus 2017-06-08 11:06:03.130 NULL
1234 Purchase 2017-06-09 11:06:03.130 YES
1234 Purchase 2017-06-10 11:06:03.130 YES
1234 Purchase 2017-06-11 11:06:03.130 YES
1234 Purchase 2017-06-12 11:06:03.130 YES
1234 Bonus 2017-06-13 11:06:03.130 NULL

First you have to derive the column group and then group by that (having = 4) and inner join with the original table.
drop table if exists #Test;
create table #Test
(
UserID int
, TheType varchar(10)
, TheDate date
, Result varchar(10)
);
insert into #Test
select 1234, 'Bonus', getdate(), null
union
select 1234, 'Purchase', getdate() - 1, null
union
select 1234, 'Purchase', getdate() - 2, null
union
select 1234, 'Purchase', getdate() - 3, null
union
select 1234, 'Purchase', getdate() - 4, null
union
select 1234, 'Bonus', getdate() - 5, null
union
select 1234, 'Purchase', getdate() - 6, null
union
select 1234, 'Bonus', getdate() - 7, null;
drop table if exists #temp;
select
*
, lag(t.TheDate, 1) over ( order by t.TheDate ) as Lag01
, lag(t.TheType, 1) over ( order by t.TheDate ) as LagType
into
#temp
from #Test t;
with cteHierarchy
as
(
select
UserID
, TheType
, TheDate
, Result
, Lag01
, t.TheDate as Root
from #temp t
where t.LagType <> t.TheType
union all
select
t.UserID
, t.TheType
, t.TheDate
, t.Result
, t.Lag01
, cte.Root as Root
from #temp t
inner join cteHierarchy cte on t.Lag01 = cte.TheDate
and t.TheType = cte.TheType
)
update test
set
Result = 4
from (
select
t.Root
, count(t.UserID) as Cnt
, t.UserID
from cteHierarchy t
group by t.UserID, t.Root
having count(t.UserID) = 4
) tt
inner join #Test test on tt.UserID = test.UserID
select * from #Test t
order by t.TheDate;

Related

Find absent dates of employee and one date before & after present

I have the following sample data:
--Table 1:
CREATE TABLE tbl_Emp_1
(
EmpID INT,
ColDate DATE
);
INSERT INTO tbl_Emp_1 VALUES(1,'2019-11-01');
INSERT INTO tbl_Emp_1 VALUES(2,'2019-11-02');
INSERT INTO tbl_Emp_1 VALUES(3,'2019-11-11');
INSERT INTO tbl_Emp_1 VALUES(4,'2019-11-12');
INSERT INTO tbl_Emp_1 VALUES(9,'2019-11-13');
INSERT INTO tbl_Emp_1 VALUES(6,'2019-11-16');
INSERT INTO tbl_Emp_1 VALUES(408,'2019-11-25');
--Table 2:
CREATE TABLE tbl_Emp_2
(
EmpID INT,
ColDate DATE
);
INSERT INTO tbl_Emp_2 VALUES(11,'2019-11-02');
INSERT INTO tbl_Emp_2 VALUES(22,'2019-11-06');
INSERT INTO tbl_Emp_2 VALUES(22,'2019-11-08');
INSERT INTO tbl_Emp_2 VALUES(33,'2019-11-10');
INSERT INTO tbl_Emp_2 VALUES(44,'2019-11-15');
--Table 3:
CREATE TABLE tbl_Emp_3
(
EmpID INT,
ColDate DATE
);
INSERT INTO tbl_Emp_3 VALUES(111,'2019-11-12');
INSERT INTO tbl_Emp_3 VALUES(222,'2019-11-16');
INSERT INTO tbl_Emp_3 VALUES(333,'2019-11-17');
INSERT INTO tbl_Emp_3 VALUES(444,'2019-11-19');
INSERT INTO tbl_Emp_3 VALUES(5,'2019-11-22');
--Now I will create View of these tables.
CREATE VIEW vw_Emp AS
SELECT *,1 AS TableID FROM tbl_Emp_1
UNION ALL
SELECT *,2 AS TableID FROM tbl_Emp_2
UNION ALL
SELECT *,3 AS TableID FROM tbl_Emp_3;
Expected Output:
EmpID ColDate
--------------------------------
2 2019-11-02 ---TABLE 1 Starts
NULL 2019-11-03 - 2019-11-10
3 2019-11-11
9 2019-11-13
NULL 2019-11-14 - 2019-11-15
6 2019-11-16
NULL 2019-11-17 - 2019-11-24
408 2019-11-25
11 2019-11-02 ---TABLE 2 Data Starts
NULL 2019-11-03 - 2019-11-05
22 2019-11-06
NULL 2019-11-07
22 2019-11-08
NULL 2019-11-09
33 2019-11-10
NULL 2019-11-11 - 2019-11-14
44 2019-11-15
111 2019-11-12 ---TABLE 3 Data Starts
NULL 2019-11-13 - 2019-11-15
222 2019-11-16
333 2019-11-17
NULL 2019-11-18
444 2019-11-19
NULL 2019-11-20 - 2019-11-21
5 2019-11-22
About the output: Display absent dates of Employee and display emp data of one date before and after those dates(employee not exists dates).
My try:
DECLARE #TableID INT,
#MinDate DATE,
#MaxDate DATE;
DECLARE Cur_Get_MinMax1 CURSOR FOR
SELECT TableID,
(SELECT MIN(ColDate) FROM vw_Emp WHERE TableID = v1.TableID),
(SELECT MAX(ColDate) FROM vw_Emp WHERE TableID = v1.TableID)
FROM vw_Emp v1
GROUP BY TableID;
IF OBJECT_ID('tempdb..#TempEmpData') IS NOT NULL
DROP TABLE #TempEmpData;
CREATE TABLE #TempEmpData
(
Dates DATE,
TableID int
);
OPEN Cur_Get_MinMax1;
FETCH NEXT FROM Cur_Get_MinMax1 INTO
#TableID,
#MinDate,
#MaxDate;
WHILE ##FETCH_STATUS = 0
BEGIN
PRINT(#TableID);
PRINT(#MinDate);
PRINT(#MaxDate);
INSERT INTO #TempEmpData
SELECT TOP (DATEDIFF(DAY, #MinDate, #MaxDate) + 1)
Date = DATEADD(DAY, ROW_NUMBER() OVER(ORDER BY a.object_id) - 1, #MinDate),
#TableID
FROM sys.all_objects a
CROSS JOIN sys.all_objects b
FETCH NEXT FROM Cur_Get_MinMax1 INTO
#TableID,
#MinDate,
#MaxDate;
END;
CLOSE Cur_Get_MinMax1;
DEALLOCATE Cur_Get_MinMax1;
Query 1:
SELECT v.EmpID,t.Dates
FROM #TempEmpData t
LEFT JOIN vw_Emp v ON v.ColDate = t.Dates AND v.TableID = t.TableID
ORDER BY t.TableID,t.Dates;
Edit:
Query 2:
;WITH CTE AS
(
SELECT DISTINCT TableID,Dates,EmpID,
coalesce(stuff((select distinct CAST(MIN(Dates) as varchar(10))+'~'+ CAST(MAX(Dates) as varchar(10)) from #TempEmpData t1 where a.rr = 1 AND t1.Dates=a.Dates for xml path('')),1,0,''),cast(Dates as varchar(10))) Coldate
FROM
(
SELECT v.EmpID,
t.Dates,
t.TableID,
RANK() OVER(ORDER BY v.EmpID) rr
FROM vw_Emp v
RIGHT JOIN #TempEmpData t ON v.ColDate = t.Dates AND v.TableID = t.TableID
GROUP BY t.TableID,v.EmpID,t.Dates,v.TableID
) a
)
SELECT EmpID,ColDate
FROM CTE
ORDER BY TableID,Dates
this uses window function LAG() and LEAD() to find previous and next ColDate based on ColDate ordering.
The first query returns the before and after row when a discontinued date is encounter. The second query returns the date range of the discontinued date.
; with
tbl_Emp as
(
select tbl = 1, EmpID, ColDate from tbl_Emp_1
union all
select tbl = 2, EmpID, ColDate from tbl_Emp_2
union all
select tbl = 3, EmpID, ColDate from tbl_Emp_3
),
cte as
(
select *,
prevColDate = LAG(ColDate) over (partition by tbl order by ColDate),
nextColDate = LEAD(ColDate) over (partition by tbl order by ColDate)
from tbl_Emp
)
-- first query
select c.tbl,
c.EmpID,
c.ColDate,
EndDate = NULL
from cte c
where c.ColDate <> dateadd(day, +1, prevColDate)
or c.ColDate <> dateadd(day, -1, nextColDate)
union all
-- second query
select c.tbl,
EmpID = NULL,
ColDate = dateadd(day, 1, c.ColDate),
EndDate = dateadd(day, -1, nextColDate)
from cte c
where c.ColDate <> dateadd(day, -1, nextColDate)
order by tbl, ColDate;
Note : i didn't concatenate the ColDate and EndDate as what you have shown in your expected result.
SELECT CAST(NULL AS INT) AS EmpId, DATEADD(day, 1, PreviousDate) AS StartDate, DATEADD(day, -1, ColDate) AS EndDate
FROM
(
SELECT ColDate, LAG(ColDate) OVER(ORDER BY ColDate) AS PreviousDate, LEAD(ColDate) OVER(ORDER BY ColDate) AS NextDate
FROM (SELECT DISTINCT ColDate FROM dbo.tbl_Emp_1) AS src
) AS thedates
WHERE ColDate <> DATEADD(day, 1, PreviousDate)
SELECT CAST(NULL AS INT) AS EmpId, StartDate, EndDate
FROM
(
SELECT DATEADD(day, 1, sd.StartDate) AS StartDate, DATEADD(day, -1, MIN(ed.EndDate)) AS EndDate
FROM
(
--start dates of missing ranges
SELECT ColDate AS StartDate
FROM dbo.tbl_Emp_1 as a
WHERE NOT EXISTS(SELECT * FROM dbo.tbl_Emp_1 AS b WHERE b.ColDate = DATEADD(day, 1, a.ColDate))
) AS sd
JOIN
(
--end dates of missing ranges
SELECT ColDate AS EndDate
FROM dbo.tbl_Emp_1 as a
WHERE NOT EXISTS(SELECT * FROM dbo.tbl_Emp_1 AS b WHERE b.ColDate = DATEADD(day, -1, a.ColDate))
) AS ed ON sd.StartDate < ed.EndDate
GROUP BY sd.StartDate
) AS emptyperiods

SQL Server: max of date

Table 1
RefId Name
----- ----
1 A
2 B
Table 2
RefId Date
----- -----
1 29/03/2018 07:15
1 29/03/2018 07:30
2 29/03/2018 07:35
2 29/03/2018 07:40
I would like the result to be as follows (Refid name and the max(date) from table 1 and 2 for that refid)
1 A 29/03/2018 07:30
2 B 29/03/2018 07:40
Query used
select
table1.refId, table1.name,
(select max(date) from table2)
from
table1, table2
where
table1.refid = table2.refid
group by
table2.refid
I am getting the following error message
Column is invalid in the select list because it is not contained in either an aggregate function or the GROUP BY clause.
Use JOIN and the aggregate function MAX with GROUP BY to select the max date for each RefId.
Query
select [t1].[RefId], [t1].[Name], max([t2].[date] as [date]
from [Table1] [t1]
join [Table2] [t2]
on [t1].[RefId] = [t2].[RefId]
group by [t1].[RefId], [t1].[Name];
'29/03/2018 07:15' is nvarchar-type, you need datetime.
nvarchar convert to datetime: SELECT CONVERT(datetime, '29/03/2018 07:15', 103)
Answer to your example:
DECLARE #Table1 TABLE(RefId int, Name nvarchar(10));
INSERT INTO #Table1(RefId, Name) VALUES(1, 'A'), (2, 'B');
DECLARE #Table2 TABLE(RefId int, [Date] nvarchar(50));
INSERT INTO #Table2(RefId, [Date])
VALUES
(1, '29/03/2018 07:15'),
(1, '29/03/2018 07:30'),
(2, '29/03/2018 07:35'),
(2, '29/03/2018 07:40');
SELECT t1.RefId, t1.Name, t2.Date
FROM #Table1 AS t1
INNER JOIN
(SELECT RefId, MAX(CONVERT(datetime, [Date], 103)) AS [Date]
FROM #Table2
GROUP BY RefId) AS t2
ON t1.RefId = t2.RefId

Retrieve records based on preference

I have a table with sample data below.
PatId NetType
100 In
100 Out
100 NA
101 Out
101 NA
102 NA
103 In
When there are multiple netTypeid for same patient return only top one prioritized by( In,Out,NA) as order. What i am trying to do when there are In/Out/NA available for a patid then should return back only In, when there is Out/NA available for a patid then it should return back only In.If no duplicate just return back as is. Output for above scenario should be
PatId NetType
100 In
101 Out
102 NA
103 In
Use row_number() to order your table by NetType
select
PatId, NetType
from (
select
PatId, NetType
, row_number() over (partition by PatId order by case NetType when 'In' then 1 when 'Out' then 2 else 3 end) rn
from
myTable
) t
where
rn = 1
Similar to uzi
DECLARE #T AS TABLE (PatId int, NetType varchar(20));
insert into #t values
(100, 'In')
, (100, 'Out')
, (100, 'NA')
, (101, 'Out')
, (101, 'NA')
, (102, 'NA')
, (103, 'In');
DECLARE #O AS TABLE (ord int primary key, NetType varchar(20));
insert into #O values (1, 'In'), (2, 'Out'), (3, 'NA');
select tt.PatId, tt.NetType
from ( select t.*
, ROW_NUMBER() over (partition by PatId order by o.ord) as rn
from #t t
join #O o
on t.NetType = o.NetType
) tt
where tt.rn = 1;

Delete or Select Only row with same id but have values for certain fields

I would like to select/delete data with different rows but with same id.
For Example.
ID ColumnA
A Honda
A NULL
B Yamaha
B NULL
C NULL
C Merc
D NULL
E NULL
Output:
ID ColumnA
A Honda
B Yamaha
C Merc
D NULL
E NULL
First thing, I already google for the solutions, but no answers. Any help is greatly appreciated
You could use Row_number and TOP 1 WITH TIES
DECLARE #SampleData AS TABLE
(
ID varchar(10),
ColumnA varchar(20)
)
INSERT INTO #SampleData
VALUES
('A', 'Honda'),
('A', NULL),
('B', 'Yamaha'),
('B', NULL),
('C', NULL),
('C', 'Merc'),
('D', NULL),
('E', NULL)
SELECT TOP (1) WITH TIES
sd.ID,
sd.ColumnA
FROM #SampleData sd
ORDER BY Row_number() OVER(PARTITION BY sd.ID ORDER BY sd.ColumnA DESC)
Return
ID ColumnA
------------
A Honda
B Yamaha
C Merc
D NULL
E NULL
;With cte(ID, ColumnA)
AS
(
SELECT 'A','Honda' Union all
SELECT 'A',NULL Union all
SELECT 'B','Yamaha' Union all
SELECT 'B',NULL Union all
SELECT 'C',NULL Union all
SELECT 'C','Merc' Union all
SELECT 'D' , NULL Union all
SELECT 'E', NULL
)
SELECT ID, ColumnA From
(
SELECT *,ROW_NUMBER()Over(Partition by ID order by ColumnA DESc)AS Seq from cte
)Dt
WHERE dt.Seq =1
Output:
ID ColumnA
A Honda
B Yamaha
C Merc
D NULL
E NULL
try this:
declare #tb table(ID varchar(50), ColumnA varchar(50))
insert into #tb
select 'A', 'Honda' union all
select 'A' , null union all
select 'B', 'Yamaha' union all
select 'B' , null union all
select 'C' , null union all
select 'C', 'Merc' union all
select 'D', NULL union all
select 'E', NULL
select a.id,b.ColumnA from
(select count(1) cnt,ID from #tb group by ID having count(1)>1 or count(1)=1) as a
left join
(select * from #tb) as b on a.ID = b.ID
where b.columnA is not null and cnt>1 or cnt =1
result:
A Honda
B Yamaha
C Merc
D NULL
E NULL
drop table if exists dbo.Motorcycle;
create table dbo.Motorcycle (
ID char(1)
, ColumnA varchar(100)
);
insert into dbo.Motorcycle (ID, ColumnA)
values ('A', 'Honda'), ('A', null)
, ('B', 'Yamaha'), ('B', null)
, ('C', null), ('C', 'Merc')
, ('D', null)
, ('E', null);
select
t.ID, t.ColumnA
from (
select
*
, ROW_NUMBER() over (partition by m.ID order by m.ColumnA desc) as RBr
from dbo.Motorcycle m
) t
where t.RBr = 1

SQL Server Ranking issue

I am trying to apply ranking to my data set the logic is as follows:
For each ID , Order by ID2 ASC and Order by IsMaster Desc rank the row 1 and only change it when the ID4 value changes
My dataset and desired output looks like:
Test data
CREATE TABLE Test_Table
(ID INT ,ID2 INT, IsMaster INT, ID4 VARCHAR(10))
GO
INSERT INTO Test_Table (ID ,ID2 , IsMaster , ID4 )
VALUES
(1, 101, 1 ,'AAA') -- 1 <-- Desired output for rank
,(1, 102, 0 ,'AAA') -- 1
,(1, 103, 0 ,'AAB') -- 2
,(1, 104, 0 ,'AAB') -- 2
,(1, 105, 0 ,'CCC') -- 3
,(2, 101, 1 ,'AAA') -- 1
,(2, 102, 0 ,'AAA') -- 1
,(2, 103, 0 ,'AAA') -- 1
,(2, 104, 0 ,'AAB') -- 2
,(2, 105, 0 ,'CCC') -- 3
this is what I have tried so far:
SELECT *
,DENSE_RANK() OVER (PARTITION BY ID ORDER BY ID2 ASC, IsMaster DESC ) rn
FROM Test_Table
please please please help me thank you.
This is a island/gap problem.
First you use LAG() to see if you have a different ID4 on the same partition.
Is important you also need partition by IsMaster
Then you create the islands when ID4 changes.
Finally use comulative SUM() to get the proper rank.
Sql Demo
WITH id4_change as (
SELECT *,
LAG(ID4) OVER (PARTITION BY ID, IsMaster ORDER BY ID2) as prev
FROM Test_Table
), islands as (
SELECT *,
CASE WHEN ID4 = PREV
THEN 0
ELSE 1
END as island
FROM id4_change
)
SELECT *,
SUM(island) OVER (PARTITION BY ID, IsMaster ORDER BY ID2) rank
FROM islands
ORDER BY ID, ID2, IsMaster DESC
;
OUTPUT: You can see when ID4 = PREV doesnt create a new "Island" so have same rank.
EDIT: You can simplify first two querys
WITH id4_change as (
SELECT *,
CASE WHEN ID4 = LAG(ID4) OVER (PARTITION BY ID, IsMaster ORDER BY ID2)
THEN 0
ELSE 1
END as island
FROM Test_Table
)
SELECT *,
SUM(island) OVER (PARTITION BY ID, IsMaster ORDER BY ID2) rank
FROM id4_change
ORDER BY ID, ID2, IsMaster DESC
;
Another way probably less efficient but it will work.
WITH X AS
(
SELECT *
,ROW_NUMBER() OVER (PARTITION BY ID ORDER BY ID2) RowNum
FROM dbo.Test_Table
)
, CTE_VehicleNumber
as
(
SELECT T.ID , T.ID2, t.IsMaster ,T.ID4 , t.RowNum , 1 as [Rank]
FROM X as T
WHERE T.IsMaster = 1
UNION ALL
SELECT T.ID, T.ID2, t.IsMaster ,T.ID4 , t.RowNum , CASE WHEN t.ID4 <> c.ID4 THEN 1+ C.[Rank]
ELSE 0+ C.[Rank]
END as [Rank]
FROM CTE_VehicleNumber as C
inner join X as T ON T.RowNum = C.RowNum + 1
AND t.ID = c.ID
)
SELECT ID , ID2, IsMaster ,ID4 , [Rank]
FROM CTE_VehicleNumber
ORDER BY ID , ID2, IsMaster ,ID4 , [Rank]
OPTION (MAXRECURSION 0);
Are you sure that your orders of ID2 and IsMaster affect the desired result, considering the rest of the data in ID and ID4?
I just tried to use the following code:
; WITH CTE AS (
SELECT DISTINCT ID, ID4, DENSE_RANK() OVER (ORDER BY ID4) Rnk
FROM #Test_Table
)
SELECT t.*, c.Rnk
FROM #Test_Table t
INNER JOIN CTE c ON t.ID = c.ID AND t.ID4 = c.ID4;
... and even with changing the order of ID2 and IsMaster I can't get it to "misbehave" - IF there's only one IsMaster = 1 per a group of ID4's and no duplicates in ID2.

Resources