T-SQL Identifying gaps in broken sequence of dates - sql-server

Please can you help with a problem I am having, I believe is related to the gaps and islands problem in T-SQL. I am using SQL Server 2014.
I am trying to identify count the number of consecutive occurrences of a table/index combination using a date column to distinguish between the broken chains.
Please see the below T-SQL to demonstrate what I am trying to achieve, in particular how do I calculate the Rnk column which for demo purposes I have manually hard-coded?
CREATE TABLE #test (RowID INT IDENTITY(1,1), FileDate DATE, TableName VARCHAR(100), IndexName VARCHAR(100), Rnk INT)
INSERT INTO #test (FileDate, TableName, IndexName, Rnk)
VALUES
('2015-10-31', 't1', 'idx1', 1),
('2015-10-30', 't1', 'idx1', 2),
('2015-10-27', 't1', 'idx1', 1),
('2015-10-26', 't1', 'idx1', 2),
('2015-10-25', 't1', 'idx1', 3),
('2015-10-23', 't1', 'idx1', 1),
('2015-10-22', 't1', 'idx1', 2),
('2015-10-21', 't1', 'idx1', 3),
('2015-10-20', 't1', 'idx1', 4),
('2015-10-19', 't1', 'idx1', 5),
('2015-10-15', 't1', 'idx1', 1),
('2015-10-13', 't1', 'idx1', 1),
('2015-10-10', 't1', 'idx1', 1),
('2015-10-09', 't1', 'idx1', 2),
('2015-10-27', 't3', 'idx13', 1),
('2015-10-26', 't3', 'idx13', 2),
('2015-10-25', 't3', 'idx15', 1),
('2015-10-24', 't3', 'idx15', 2),
('2015-10-21', 't3', 'idx13', 1)
SELECT * FROM #test
DROP TABLE #test
In the screenshot I've attached, the portion of results highlighted would show I want the Rnk column to sequence the consecutive appearance of t1/idx between 2015-10-27 - 2015-10-25, but reset the number for the next appearance at 2015-10-23 through to 2015-10-19.
Can anyone assist me please?
Thanks.

Subtract a sequence of numbers from the date -- and the groups you have identified will have a constant value. Then you can use row_number():
select t.*,
row_number() over (partition by tablename, indexname,
dateadd(day, - seqnum, filedate)
order by filedate desc
) as rnk
from (select t.*,
row_number() over (partition by tablename, indexname order by filedate) as seqnum
from t
) t

I would use cumulative approach :
select t.FileDate, t.TableName, t.IndexName,
row_number() over (partition by tablename, indexname, grp order by rowid)
from (select t.*, sum(case when gap > 1 then 1 else 0 end) over (partition by tablename, indexname order by rowid) as grp
from (select t.*,
isnull(datediff(day, filedate, lag(filedate) over (partition by tablename, indexname order by rowid)), 1) as gap
from #test t
) t
) t;

Similar to the answer from Yogesh, who beat me to it.
(hint: don't expect to be faster when typing an answer on your phone)
SELECT
RowID, FileDate, TableName, IndexName,
ROW_NUMBER() OVER (PARTITION BY TableName, IndexName, DateRank ORDER BY FileDate DESC) AS Rnk
FROM
(
SELECT *,
SUM(DateGap) OVER (PARTITION BY TableName, IndexName ORDER BY FileDate DESC) AS DateRank
FROM
(
SELECT RowID, FileDate, TableName, IndexName,
-- Rnk as ExpRnk,
CASE WHEN DATEDIFF(DAY, FileDate, LAG(FileDate) OVER (PARTITION BY TableName, IndexName ORDER BY FileDate DESC)) <= 1 THEN 0 ELSE 1 END AS DateGap
FROM #Test
) q1
) q2
ORDER BY RowID;

Related

Get top column base on maximum other column using group by?

How to get top column base on maximum other column using group by?
My raw data:
DECLARE #TB TABLE (ID INT, APP VARCHAR(25), PRICE MONEY)
INSERT INTO #TB
VALUES
(1, 'Apple', 10),
(1, 'Banana', 30),
(1, 'Orange', 20),
(2, 'Apple', 20),
(2, 'Banana', 30),
(2, 'Orange', 40)
This what I want:
Explain:
TOP_APP = Banana because MAX(PRICE) GROUP BY ID,
TOTAL = 60 because SUM(PRICE) GROUP BY ID.
You can use ROW_NUMBER and aggregation to achieve your required output-
DEMO HERE
SELECT A.ID,A.App,A.SUM
FROM
(
select *,
SUM(Price) OVER(PARTITION BY ID) SUM,
ROW_NUMBER() OVER (PARTITION BY ID ORDER BY Price DESC) RN
from #tb
)A
WHERE RN = 1
Demo on db<>fiddle
;WITH cte_TempTable as
(
SELECT Id, app, price,
SUM(Price) OVER(PARTITION BY ID) Total,
ROW_NUMBER() OVER (PARTITION BY ID ORDER BY Price DESC) Row_Number
FROM #TB
)
SELECT Id, app as TOP_APP, Total
FROM cte_TempTable
WHERE Row_Number = 1
Output
Id TOP_APP Total
1 Banana 60.0000
2 Orange 90.0000
As my assumption you want the result is
SELECT identity (int,1,1) as ID,
APP AS TOP_APP
,SUM(PRICE) AS TOTAL INTO #T
FROM #TB
GROUP BY APP
order by SUM(PRICE)
select * from #t

Aggregate over the column that is not in group by list

For example I have the following table:
declare #table table(val int, dt datetime)
insert into #table values
(10, '2018-3-20 16:00'),
(12, '2018-3-20 14:00'),
(14, '2018-3-20 12:00'),
(16, '2018-3-20 10:00'),
(10, '2018-3-19 14:00'),
(12, '2018-3-19 12:00'),
(14, '2018-3-19 10:00'),
(10, '2018-3-18 12:00'),
(12, '2018-3-18 10:00')
I try to aggregate using the column in group by, it is okay:
select day, MAX(val) as max_by_value from
(
select DATEPART(DAY, dt) as day, val from #table
) q
group by day
It returns:
day max_by_value
18 12
19 14
20 16
Now I need max value by time of the day, so I need 10 as result for each day.
I try to use over but it say Column '#table.dt' is invalid in the select list because it is not contained in either an aggregate function or the GROUP BY clause.
select DATEPART(DAY, dt), MAX(val) as max_by_value
,ROW_NUMBER() over (partition by DATEPART(DAY, dt) order by dt desc) as max_by_date
from #table
group by DATEPART(DAY, dt)
I understand why I get this error but don't understand how to fix my issue. Could you please help to find some way to fill [max_by_date] column?
As result I expect the following output:
day max_by_value max_by_time
18 12 10
19 14 10
20 16 10
Starting from 2012 version, you can use the First_value window function:
SELECT DISTINCT DATEPART(DAY, dt),
MAX(val) OVER (partition by DATEPART(DAY, dt)) as max_by_value,
FIRST_VALUE(val) OVER (partition by DATEPART(DAY, dt) order by dt desc) as max_by_date
FROM #table
Note: I've used the OVER clause for the MAX function instead of using group by.
With 2008 version, you can use a subquery instead:
SELECT DISTINCT DATEPART(DAY, dt),
MAX(val) OVER (partition by DATEPART(DAY, dt)) as max_by_value,
(
SELECT TOP 1 val
FROM #table as t1
WHERE DATEPART(DAY, t1.dt) = DATEPART(DAY, t0.dt)
ORDER BY dt DESC
) as max_by_date
FROM #table as t0

sql query that gets the difference between 2 recent rows for every row item that occurs more than once in a table

Sql query that gets the difference between 2 recent rows for every value that occurs more than once in a table.
for example
book value date
A 4 2017-07-17 09:16:44.480
A 2 2017-08-15 10:05:58.273
B 3 2017-04-15 10:05:58.273
C 2 2017-08-15 10:05:58.273
B 3 2017-04-13 10:05:58.273
B 3 2017-04-12 10:05:58.273
should return
A 2
B 0
Here is a solution:
SELECT book, MAX(value) - MIN(value) AS difference FROM (
SELECT book, value, ROW_NUMBER() OVER (PARTITION BY book ORDER BY date DESC) AS rownum FROM t
) AS a WHERE rownum <= 2 GROUP BY book HAVING MAX(rownum) >= 2
And here it is in SQLFiddle
SELECT id_pk FROM [table] GROUP BY [fields you whant to compare by] HAVING COUNT(*) > 1)
this select returns you the list of pk from element that are repited
so, in other select you migth get another Select like
Select * from [table] where id_pk in(
SELECT id_pk FROM [table] GROUP BY [fields you whant to compare by] HAVING COUNT(*) > 1)) limit 2
this is functional, still not good as i'm not analising complexity.
Add a rownumber before calculating:
create table #test ([book] char(1), [value] int, [date] datetime)
insert into #test values ('A', 4, '2017-07-17 09:16:44.480')
insert into #test values ('A', 2, '2017-08-15 10:05:58.273')
insert into #test values ('B', 3, '2017-04-15 10:05:58.273')
insert into #test values ('C', 2, '2017-08-15 10:05:58.273')
insert into #test values ('B', 3, '2017-04-13 10:05:58.273')
insert into #test values ('B', 3, '2017-04-12 10:05:58.273')
;with cte as(
Select ROW_NUMBER () OVER (order by [book], [date] ) as rownumber, *
from #test)
select distinct [1].book, abs(first_value([1].[Value]) over (partition by [1].book order by [1].rownumber desc) - [2].val2) as [Difference]
from cte [1]
inner join
(select rownumber, book, first_value([Value]) over (partition by book order by rownumber desc) as val2
from cte) [2] on [1].book = [2].book and [1].rownumber < [2].rownumber
I would use analytic functions:
;with CTE as (
SELECT book
,value
,LAG(value) OVER (PARTITION BY book ORDER BY date) last_value
,ROW_NUMBER() OVER (PARTITION BY book ORDER BY date DESC) rn
FROM MyTable
)
SELECT book
,value - last_value as value_change
FROM CTE
WHERE rn = 1
AND last_value IS NOT NULL
LAG() was added in SQL Server 2012, but even if you're on a higher version, your database must have the compatibility version set to 110 or higher for them to be available. Here's an alternative that should work on SQL Server 2005 or higher, or a database compatibility 90 or higher.
;with CTE as (
SELECT book
,value
,ROW_NUMBER() OVER (PARTITION BY book ORDER BY date DESC) rn
FROM MyTable
)
SELECT c1.book
c1.value - c2.value as value_change
FROM CTE c1
INNER JOIN CTE c2
ON c1.book = c2.book
WHERE c1.rn = 1
AND c2.rn = 2

Count and Row_Number

I want to get the top 5 Zipcodes for each Store with the highest Customers in them (zipcodes).
Please find below my query:
SELECT T.[Store], T.[ZipCode], Count(T.[Customer])
FROM ( SELECT T.[Store], T.[ZipCode],
Count(T.[Customer]) row_number() over (Partition By T.[StoreGitanjali] Order By Count (T.[Customer]) desc) as RN
FROM [Marketing].[dbo].[Poscus] as T
Group By T.[StoreGitanjali], T.[ZipCode]) as T
where T.RN <=5
Group By T.[StoreGitanjali], T.[ZipCode]
Please let me know how to use Count here in this scenario.
Thank you!
Example
CREATE TABLE #t
(
ID INT IDENTITY(1,1),
Customer NVARCHAR(3),
Store NVARCHAR(5),
ZIP INT
)
INSERT INTO #t VALUES('a', 'XYZ', 1234)
,('b', 'XYZ', 1234)
,('c', 'PQR', 1231)
,('d', 'PQR', 1231)
,('e', 'PQR', 1231)
,('f', 'XYZ', 1232)
,('g', 'XYZ', 1232)
,('h', 'XYZ', 1232)
,('i', 'PQR', 1236)
,('j', 'PQR', 1236)
,('k', 'LMN', 1237)
SELECT * FROM #t
The solution is, Set WHERE part < 2 according to your requirement.
SELECT TotalCustomer, Store, ZIP, Part FROM (
SELECT
COUNT(1) AS TotalCustomer,
Store,
ZIP,
ROW_NUMBER() OVER (PARTITION BY Store ORDER BY Store) AS Part
FROM #t
GROUP BY Store, ZIP
) t
WHERE Part < 2
ORDER BY Part
;WITH CTE
AS(
SELECT Store
,Zip
,COUNT(DISTINCT Customer) AS CustCount
FROM #t
GROUP BY Store,Zip
--ORDER BY Store,Zip
)
SELECT A.*
FROM(
SELECT *
--,DENSE_RANK() OVER(PARTITION BY Store ORDER BY CustCount DESC) AS DenRank
,ROW_NUMBER() OVER(PARTITION BY Store ORDER BY CustCount DESC) AS DenRank
FROM CTE
--ORDER BY Store,Zip
) AS A
WHERE A.DenRank <= 2

How do I form a query with a running count retaining the order

I have a trace table which looks like this
I'd like to get a running total which looks like the following output - its very important that I retain the order - as this is the execution order of the stored porcedures - It will help me analyze bottle necks in the system
I have tried
select max(RowNumber),objectname, count(1) from rob
where eventclass = 42
group by objectname
But that mucks up the order
Is this even possible in SQL?
UPDATE:
I tried this
select RowNumber,objectname, count(1) from rob
where eventclass = 42
group by objectname,RowNumber
order by RowNumber
But this (as the query quite rightly says groups by rownumber (have to have that to have it in the order by) )
select objectname,
count(*)
from (
select RowNumber,
objectname,
row_number() over(order by RowNumber) - row_number() over(order by objectname, RowNumber) as grp
from rob
where eventclass = 42
) as T
group by grp, objectname
order by min(RowNumber)
Working sample using a table variable.
declare #T table
(
RowNumber int,
objectname varchar(50)
)
insert into #T values
(8, 'f_system_log_init'),
(10, 'f_purge_system_log'),
(25, 'f_system_log_msg'),
(65, 'f_system_log_msg'),
(104, 'f_system_log_msg'),
(143, 'f_system_log_msg'),
(182, 'f_system_log_msg'),
(221, 'f_system_log_msg'),
(5015, 'f_get_system_logs_parent_log_id_for_dataloader'),
(5055, 'f_system_log_msg'),
(5096, 'f_system_log_msg')
select objectname,
count(*)
from (
select RowNumber,
objectname,
row_number() over(order by RowNumber) - row_number() over(order by objectname, RowNumber) as grp
from #T
) as T
group by grp, objectname
order by min(RowNumber)
Result:
objectname
-------------------------------------------------- -----------
f_system_log_init 1
f_purge_system_log 1
f_system_log_msg 6
f_get_system_logs_parent_log_id_for_dataloader 1
f_system_log_msg 2
Try this:
;WITH CTE as (select *,ROW_NUMBER() over(order by rownumber,objectname) rn from test101)
,CTE1 as(
select *,1 as incr from CTE where rn=1
union all
select t.*,
CASE WHEN t.objectname=c.objectname then incr else incr+1 end as incr
from CTE t inner join CTE1 c
on t.rn=c.rn+1
)
select max(objectname),count(incr) from CTE1
group by incr

Resources