Select rows based on every nth interval of time

Select rows based on every nth interval of time - sql-server

I have a table with a primary key (bigint), datetime, value, foreignKey to configuration tabel that consists of 100,000's of rows. I want to be able to obtain a row for a variable time interval. For example.
Select Timestamp, value from myTable where configID=3
AND{most recent for 15 min interval}
I have a CTE query that returns multiple rows for the interval interval
WITH Time_Interval(timestamp, value, minutes)
AS
(
Select timestamp, value, DatePart(Minute, Timestamp) from myTable
Where Timestamp >= '12/01/2012' and Timestamp <= 'Jan 10, 2013' and
ConfigID = 435 and (DatePart(Minute, Timestamp) % 15) = 0
)
Select Timestamp, value, minutes from Time_Interval
group by minutes, value, timestamp
order by Timestamp
such as:
2012-12-19 18:15:22.040 6.98 15
2012-12-19 18:15:29.887 6.98 15
2012-12-19 18:15:33.480 7.02 15
2012-12-19 18:15:49.370 7.01 15
2012-12-19 18:30:41.920 6.95 30
2012-12-19 18:30:52.437 6.93 30
2012-12-19 19:15:18.467 7.13 15
2012-12-19 19:15:34.250 7.11 15
2012-12-19 19:15:49.813 7.12 15
But as can be seen there are 4 for the 1st 15 minute interval, 2 for the next interval, etc... Worse,
If no data was obtain at an exact times stamp of 15 minutes, then there will be no value.
What I want is the most recent value for a fifteen minute interval... if if the only data for that intervall occurred at 1 second after the start of the interval.
I was thinking of Lead/over but again... the rows are not orgainzed that way. Primary Key is a bigInt and is a clustered Index. Both the timstamp column and ConfigID columns are Indexed. The above query returns 4583 rows in under a second.
Thanks for any help.

Try this on for size. It will even handle returning one row for instances when you have multiple timestamps for a given interval.
NOTE: This assumes your Bigint PK column is named: idx. Just substitute where you see "idx" if it is not.
;WITH Interval_Helper([minute],minute_group)
AS
(
SELECT 0, 1 UNION SELECT 1, 1 UNION SELECT 2, 1 UNION SELECT 3, 1 UNION SELECT 4, 1
UNION SELECT 5, 1 UNION SELECT 6, 1 UNION SELECT 7, 1 UNION SELECT 8, 1 UNION SELECT 9, 1
UNION SELECT 10, 1 UNION SELECT 11, 1 UNION SELECT 12, 1 UNION SELECT 13, 1 UNION SELECT 14, 1
UNION SELECT 15, 2 UNION SELECT 16, 2 UNION SELECT 17, 2 UNION SELECT 18, 2 UNION SELECT 19, 2
UNION SELECT 20, 2 UNION SELECT 21, 2 UNION SELECT 22, 2 UNION SELECT 23, 2 UNION SELECT 24, 2
UNION SELECT 25, 2 UNION SELECT 26, 2 UNION SELECT 27, 2 UNION SELECT 28, 2 UNION SELECT 29, 2
UNION SELECT 30, 3 UNION SELECT 31, 3 UNION SELECT 32, 3 UNION SELECT 33, 3 UNION SELECT 34, 3
UNION SELECT 35, 3 UNION SELECT 36, 3 UNION SELECT 37, 3 UNION SELECT 38, 3 UNION SELECT 39, 3
UNION SELECT 40, 3 UNION SELECT 41, 3 UNION SELECT 42, 3 UNION SELECT 43, 3 UNION SELECT 44, 3
UNION SELECT 45, 4 UNION SELECT 46, 4 UNION SELECT 47, 4 UNION SELECT 48, 4 UNION SELECT 49, 4
UNION SELECT 50, 4 UNION SELECT 51, 4 UNION SELECT 52, 4 UNION SELECT 53, 4 UNION SELECT 54, 4
UNION SELECT 55, 4 UNION SELECT 56, 4 UNION SELECT 57, 4 UNION SELECT 58, 4 UNION SELECT 59, 4
)
,Time_Interval([timestamp], value, [date], [hour], minute_group)
AS
(
SELECT A.[Timestamp]
,A.value
,CONVERT(smalldatetime, CONVERT(char(10), A.[Timestamp], 101))
,DATEPART(HOUR, A.[Timestamp])
,B.minute_group
FROM myTable A
JOIN Interval_Helper B
ON (DATEPART(minute, A.[Timestamp])) = B.[minute]
AND A.[Timestamp] >= '12/01/2012'
AND A.[Timestamp] <= '01/10/2013'
AND A.ConfigID = 435
)
,Time_Interval_TimeGroup([date], [hour], [minute], MaxTimestamp)
AS
(
SELECT [date]
,[hour]
,minute_group
,MAX([Timestamp]) as MaxTimestamp
FROM Time_Interval
GROUP BY [date]
,[hour]
,minute_group
)
,Time_Interval_TimeGroup_Latest(MaxTimestamp, MaxIdx)
AS
(
SELECT MaxTimestamp
,MAX(idx) as MaxIdx
FROM myTable A
JOIN Time_Interval_TimeGroup B
ON A.[Timestamp] = B.MaxTimestamp
GROUP BY MaxTimestamp
)
SELECT A.*
FROM myTable A
JOIN Time_Interval_TimeGroup_Latest B
ON A.idx = B.MaxIdx
ORDER BY A.[timestamp]
This is another take on the clever time group function from #MntManChris below:
CREATE FUNCTION dbo.fGetTimeGroup (#DatePart tinyint, #Date datetime)
RETURNS int
AS
BEGIN
RETURN CASE #DatePart
WHEN 1 THEN DATEPART(mi, #Date)
WHEN 2 THEN DATEPART(mi, #Date)/5 + 1 -- 5 min
WHEN 3 THEN DATEPART(mi, #Date)/15 + 1 -- 15 min
WHEN 4 THEN DATEPART(mi, #Date)/30 + 1 -- 30 min
WHEN 5 THEN DATEPART(hh, #Date) -- hr
WHEN 6 THEN DATEPART(hh, #Date)/6 + 1 -- 6 hours
WHEN 7 THEN DATEPART(hh, #Date)/12 + 1 -- 12 hours
WHEN 8 THEN DATEPART(d, #Date) -- day
ELSE -1
END
END

If you want to partition in 15 minute interval use datediff in minutes and divide by 15.
And use that partition to rank each interval.
WITH myTbl AS
(
SELECT
timestamp, value,
RANK() OVER (PARTITION BY (DATEDIFF(Mi,0, Timestamp)/15) ORDER BY Timestamp desc) RK
FROM myTable
--WHERE Timestamp BETWEEN '' AND ''
)
SELECT * FROM myTble
WHERE RK <= 1

As my comment above says I've used Rob's answer but implmented a user function to eliminate the Interval_Helper table and the first join. Here is the code for the user function.
BEGIN
DECLARE #Ans integer
if #DatePart = 1 -- min
return DATEPART(mi, #Date)
if #DatePart = 2 -- 5 min
return DatePart(mi,#Date)/5 + 1
if #DatePart = 3 -- 15 min
return DatePart(mi,#Date)/15 + 1
if #DatePart = 4 -- 30min
return DatePart(mi,#Date)/30 + 1
if #DatePart = 5 -- hr
return DATEPART(hh, #Date)
if #DatePart = 6 -- 6 hours
return DATEPART(hh, #Date)/6 + 1
if #DatePart = 7 -- 12 hours
return DATEPART(hh, #Date)/12 + 1
if #DatePart = 8 -- day
return DATEPART(d, #Date)
return -1
END
This then made the Time_Interval table look like
;WITH Time_Interval([timestamp], value, [date], [day], time_group)
AS
(
SELECT A.[Timestamp]
,A.value
,CONVERT(smalldatetime, CONVERT(char(10), A.[Timestamp], 101))
,DATEPART(dd, A.[Timestamp])
,dbo.fGetTimeGroup(#tInterval, A.[Timestamp]) as 'time_group'
FROM myTable A
where
A.[Timestamp] >= '12/01/2012'
AND A.[Timestamp] <= '01/10/2013'
AND A.ConfigID= 435
)
Since there is a switch from "hours" to "days" as the #TimeInterval goes from 1hr to 6hr, or 12hr or every day. I also had to have the Time_Interval_TimeGroup table switch from grouping by [hour] to grouping by [day] and of course having this in the select list.
Since this is part of a much larger abstract DB schema where both the table in question and the db are functions of the ConfigID and thus required dynamic SQL, implmenting this switch in grouping was not an issue, I simply implmented two different dynSql sections based on the value of #TimeInterval
Thanks

Related

Uninterrupted occurrences of weekly events

declare #t table([CH_FMT_ID] nvarchar(3),[CH_ISS_DT] date,[CH_RANK] int,[CH_TTL_ID] bigint)
INSERT INTO #t values
('HSI', '6/15/2002', 28, 397130),
('HSI', '6/8/2002', 24, 397130),
('HSI', '6/1/2002', 23, 397130),
('HSI', '5/25/2002', 20, 397130),
('HSI', '5/18/2002', 13, 397130),
('HSI', '5/11/2002', 12, 397130),
('HSI', '5/4/2002', 11, 397130),
('HSI', '6/15/2002', 28 , 111111),
('HSI', '6/8/2002', 24 , 111111),
('HSI', '6/1/2002', 23 , 111111),
('HSI', '5/25/2002', 20 , 111111),
('HSI', '5/18/2002', 13 , 111111),
('HSI', '5/11/2011', 12 , 111111),
('HSI', '5/4/2011', 11 , 111111);
SELECT chart.CH_TTL_ID
,chart.[CH_FMT_ID] Chart
,min(chart.[CH_RANK]) Peak
,max(chart.[CH_RANK]) Trough
,count(chart.[CH_RANK]) Weeks
,MIN(chart.CH_ISS_DT) EntryDate
,MAX(chart.CH_ISS_DT) ExitDate
,(DATEDIFF(day, MIN(CH_ISS_DT),MAX(CH_ISS_DT)) / 7) Weeks_Charted
FROM #t chart
group by chart.CH_TTL_ID
,chart.[CH_FMT_ID]
order by Weeks_Charted desc
The CH_ISS_DT is a date which is updated 1x / week. So the dates are 2017-12-31,2017-12-24, 2017-12-17, etc. The problem comes with the Weeks_Charted column. If something happens the first time in 2007 and doesn't happen again until 2017, the Weeks_Charted will count all weeks in between, even when the data is not present all the time.
This sample returns:
CH_TTL_ID,Chart,Peak,Trough,Weeks,EntryDate,ExitDate,Weeks_Charted
111111,HSI,11,28,7,2002-05-18,2011-05-11,468
397130,HSI,11,28,7,2002-05-04,2002-06-15,6
I am indifferent if the CH_TTL_ID returns 2 rows, one with a 2 as weeks_charted and the other with a 5, or just returns the 2 for the most recent data and the max, mins are different.

Try this query. Query groups by continuous weeks.
select
CH_TTL_ID, CH_FMT_ID Chart
,min([CH_RANK]) Peak
,max([CH_RANK]) Trough
,count([CH_RANK]) Weeks
,MIN(CH_ISS_DT) EntryDate
,MAX(CH_ISS_DT) ExitDate
from (
select
*, sum(N) over (partition by CH_TTL_ID order by CH_ISS_DT) grp
from (
select
*, iif(datediff(dd, lag(CH_ISS_DT) over (partition by CH_TTL_ID order by CH_ISS_DT), CH_ISS_DT) = 7, 0, 1) N
from
#t chart
) t
) t
group by CH_TTL_ID, CH_FMT_ID, grp
One option is to use Recursive CTE in SQL 2008 for this kind of problems
;with cte as (
select
*,row_number() over (partition by CH_TTL_ID, CH_FMT_ID order by CH_ISS_DT) rn
from
#t chart
)
, rcte as (
select
*, 1 grp
from
cte
where
rn = 1
union all
select
a.CH_FMT_ID, b.CH_ISS_DT, b.CH_RANK, a.CH_TTL_ID, b.rn, a.grp + case when datediff(dd, a.CH_ISS_DT, b.CH_ISS_DT) = 7 then 0 else 1 end
from
rcte a
join cte b on a.CH_FMT_ID = b.CH_FMT_ID and a.CH_TTL_ID = b.CH_TTL_ID and a.rn + 1 = b.rn
)
select
CH_TTL_ID, CH_FMT_ID Chart
,min([CH_RANK]) Peak
,max([CH_RANK]) Trough
,count([CH_RANK]) Weeks
,MIN(CH_ISS_DT) EntryDate
,MAX(CH_ISS_DT) ExitDate
from
rcte
group by CH_TTL_ID, CH_FMT_ID, grp
order by CH_FMT_ID, CH_TTL_ID
option (maxrecursion 0)

writing a query and giving a score

I am trying to write a query for the following:
Count the number of transactions in the previous 90 days. Note: This needs to
be averaged out for new members = (No of transactions / Days being a
member) x 90
New members are the one whose DateCreated is between 0 and 90 days from todays date.
Table structure:
Column Name Datatype
---------------------------------
Member_No nvarchar(255)
Order_No int
Transaction_Date datetime
Net money
Date_Created datetime
Also, the final step is after counting the transactions, I need to give a score. So if a member has count more than 8 then give a score of 5.
Following are the ranges:`
Transaction count Score
>8 5
6-8 4
4-6 3
2-4 2
0-2 1
Let me know if any queries

Hope the below code works for you (Not tested because not having Schema.)
;WITH RANKS AS
(
SELECT 0 AS FROM_TR, 2 AS TO_TR, 1 AS SCORE
UNION ALL
SELECT 3 AS FROM_TR, 4 AS TO_TR, 2 AS SCORE
UNION ALL
SELECT 5 AS FROM_TR, 6 AS TO_TR, 3 AS SCORE
UNION ALL
SELECT 7 AS FROM_TR, 8 AS TO_TR, 4 AS SCORE
UNION ALL
SELECT 9 AS FROM_TR, NULL AS TO_TR, 5 AS SCORE
)
, MEMBER_TRANS AS (
SELECT Member_No AS MEMBER_NO
,Date_Created AS DATE_CREATED
,COUNT(DISTINCT Order_No) ACTUAL_TRANSACT_COUNT
,CASE
WHEN Date_Created BETWEEN DATEADD(DD, - 90, GETDATE())
AND GETDATE()
THEN CAST(
COUNT(DISTINCT Order_No) / DATEDIFF(DD, Date_Created, GETDATE()) * 90
AS INT)
ELSE COUNT(DISTINCT Order_No)
END AS TRANSACT_COUNT
FROM TABLE1
WHERE Transaction_Date BETWEEN DATEADD(DD, - 90, GETDATE())
AND GETDATE()
GROUP BY Member_No
,Date_Created
)
SELECT MT.Member_No,MT.TRANSACT_COUNT, R.SCORE FROM MEMBER_TRANS MT
INNER JOIN RANKS R ON MT.TRANSACT_COUNT BETWEEN R.FROM_TR
AND isnull(R.TO_TR,MT.TRANSACT_COUNT)
--Added Extra below code from your comments
UNION ALL
SELECT Member_No,COUNT(DISTINCT Order_No), 1 AS SCORE FROM TABLE1
WHERE Transaction_Date < DATEADD(DD, - 90, GETDATE())
AND Transaction_Date NOT BETWEEN DATEADD(DD, - 90, GETDATE())
AND GETDATE()
GROUP BY Member_No

The code will return the desired result as per your requirement.Please find the snapshot of output for below query.
WITH Mycte
AS
(
Select Member_No,Case when DateDiff(dd,Date_Created,Getdate())>90 then 0 else 1 END AS New_Member
, DateDiff(dd,Date_Created,Getdate()) AS DaysAsMember
,Count(Order_No) TransactionCount from #Sample1
group by Member_No,Date_Created
)
Select Member_No,Case when TransactionCount > 8 THEN 5
WHEN TransactionCount BETWEEN 6 AND 8 THEN 4
WHEN TransactionCount BETWEEN 4 AND 5 THEN 3
WHEN TransactionCount BETWEEN 3 AND 4 THEN 2
WHEN TransactionCount BETWEEN 0 AND 2 THEN 1
END
AS Score
From Mycte

Get contiguous date ranges grouped into a single line

I have a table which holds a lot of rows (Currently, 500K, expected to rise to 15 Million in the next 3 years). The table holds payments made for a certain event for a specific day. And event can have 1 or many payments for the same day, but the payments on the same day must have different PaymentTypes.
Below is a table variable creation that creates the basic data (Input), and then a hardcoded select which is the expected output. I need to group continuous date ranges for a payment type, for a placement, in one row, with a From and To date, and then when there's a break - no rows, and then for the next date range.
For example:
Placement 1 got payments from the 1st to the 2nd (2 days) for Payment type 5, and then from the 4th until the 6th for the same type. Therefore, two rows. Payment type 1 also got a payment for the 1st to the 3rd for payment type 10. So, that is another row.
DECLARE #Temp TABLE
(
Id INT NOT NULL IDENTITY(1,1),
PlacementId INT NOT NULL,
PaymentTypeId INT NOT NULL,
DateValue DATETIME NOT NULL,
Amount DECIMAL(16,2) NOT NULL
)
INSERT INTO #Temp (PlacementId, PaymentTypeId, DateValue, Amount)
SELECT 1, 5, '01-JAN-2015', 100 UNION
SELECT 1, 5, '02-JAN-2015', 150 UNION
SELECT 1, 5, '04-JAN-2015', 78 UNION
SELECT 1, 5, '05-JAN-2015', 89 UNION
SELECT 1, 5, '06-JAN-2015', 22 UNION
SELECT 1, 10, '01-JAN-2015', 10 UNION
SELECT 1, 10, '02-JAN-2015', 10 UNION
SELECT 1, 10, '03-JAN-2015', 15 UNION
SELECT 2, 5, '01-JAN-2015', 200 UNION
SELECT 2, 5, '02-JAN-2015', 5 UNION
SELECT 2, 5, '03-JAN-2015', 50 UNION
SELECT 3, 5, '01-JAN-2015', 80 UNION
SELECT 4, 5, '07-JAN-2015', 100 UNION
SELECT 4, 5, '08-JAN-2015', 12 UNION
SELECT 4, 5, '12-JAN-2015', 66 UNION
SELECT 4, 5, '14-JAN-2015', 4 UNION
SELECT 5, 10, '08-JAN-2015', 10
SELECT * FROM #Temp
SELECT 1 AS PlacementId, 5 AS PaymentTypeId, '2015-01-01' AS FromDate, '2015-01-02' AS ToDate, 250 AS Amount UNION
SELECT 1, 10, '2015-01-01', '2015-01-03', 35 UNION
SELECT 1, 5, '2015-01-04', '2015-01-06', 189 UNION
SELECT 2, 5, '2015-01-01', '2015-01-03', 255 UNION
SELECT 3, 5, '2015-01-01', '2015-01-01', 80 UNION
SELECT 4, 5, '2015-01-07', '2015-01-08', 112 UNION
SELECT 4, 5, '2015-01-12', '2015-01-12', 66 UNION
SELECT 4, 5, '2015-01-14', '2015-01-14', 4 UNION
SELECT 5, 10, '2015-01-08', '2015-01-08', 10
Note, there is a NCI on PlacementID and PaymentTypeID.
We're doing it at the moment with a crazy load of cursors, and are having extreme speed issues. (The 500K lines takes 4 minutes to process).
Is there an efficient method to achieve the desired output?

This is a problem called Grouping Islands of Contiguous Dates. Read this article be Jeff Moden for more info.
SQL Fiddle
;WITH Cte AS(
SELECT *,
RN = DATEADD(DAY, - ROW_NUMBER() OVER(PARTITION BY PlacementId, PaymentTypeId ORDER BY DateValue), DateValue)
FROM #Temp
)
SELECT
PlacementId,
PaymentTypeId,
FromDate = MIN(DateValue),
ToDate = MAX(DateValue),
Amount = SUM(Amount)
FROM Cte
GROUP BY PlacementId, PaymentTypeId, RN
ORDER BY PlacementId, PaymentTypeId, FromDate

a SQL query for sum of all the values of a column

i want a sql query the result like the below :
LineNumber UnitPrice Quantity
1 14 12
2 09 10
3 34 5
4 18 9
5 42 40
6 07 10
7 45 15
-----
101
pls help me....

Another way
WITH YourTable(LineNumber, UnitPrice, Quantity)
AS (SELECT 1, 14,12
UNION ALL
SELECT 2, 09, 10
UNION ALL
SELECT 3, 34, 5
UNION ALL
SELECT 4, 18, 9
UNION ALL
SELECT 5, 42, 40
UNION ALL
SELECT 6, 07, 10
UNION ALL
SELECT 7, 45, 15)
SELECT LineNumber,
UnitPrice,
SUM(Quantity) AS Quantity
FROM YourTable
GROUP BY GROUPING SETS ( ( LineNumber, UnitPrice, Quantity ), ( ) )

To get the total you will use an aggregate:
select sum(quantity) Total
from yourtable
To return the data from your table:
select LineNumber, UnitPrice, Quantity
from yourTable
To return them together you can use a UNION ALL:
select LineNumber, UnitPrice, Quantity
from yourTable
UNION ALL
select 0, 0, sum(quantity) Total
from yourtable
See SQL Fiddle with Demo

-- For all the data from the table
SELECT [LineNumber], [UnitPrice], [Quantity] FROM [SomeTable]
-- For the sum of the quantity field.
SELECT SUM([Quantity]) AS [Sum] FROM [SomeTable]

Find top sales people

I want to find Top and botton 10% sales people.How can I do this using SQL 2005 or 2008?
DECLARE #Sales TABLE
(
SalesPersonID varchar(10), TotalSales int
)
INSERT #Sales
SELECT 1, 200 UNION ALL
SELECT 2, 300 UNION ALL
SELECT 7, 300 UNION ALL
SELECT 4, 100 UNION ALL
SELECT 5, 600 UNION ALL
SELECT 5, 600 UNION ALL
SELECT 2, 200 UNION ALL
SELECT 5, 620 UNION ALL
SELECT 4, 611 UNION ALL
SELECT 3, 650 UNION ALL
SELECT 7, 611 UNION ALL
SELECT 9, 650 UNION ALL
SELECT 3, 555 UNION ALL
SELECT 9, 755 UNION ALL
SELECT 8, 650 UNION ALL
SELECT 3, 620 UNION ALL
SELECT 5, 633 UNION ALL
SELECT 6, 720
GO
Also If i add department, then how can i write same query to find top 10% and bottom 10% in each department? I please want both queries.

TOP 10 %
select top 10 percent SalesPersonID, sum(TotalSales)
from Sales
order by sum(TotalSales)
group by SalesPersonID
BOTTOM 10 %
select top 10 percent SalesPersonID, sum(TotalSales)
from Sales
order by sum(TotalSales) desc
group by SalesPersonID

--Top 10%
SELECT TOP 10 PERCENT SalesPersonID, SUM(TotalSales) FROM #Sales
GROUP BY SalesPersonID
ORDER BY SUM(TotalSales) ASC
--Bottom 10%
SELECT TOP 10 PERCENT SalesPersonID, SUM(TotalSales) FROM #Sales
GROUP BY SalesPersonID
ORDER BY SUM(TotalSales) DESC
If you added a column Department varchar(20) for example:
--By Dept
SELECT TOP 10 PERCENT Department, SUM(TotalSales) FROM #Sales
GROUP BY Department
ORDER BY SUM(TotalSales) ASC/DESC //(Whichever one you want)

cte version:
DECLARE #Sales TABLE (SalesPersonID varchar(10), TotalSales int)INSERT #Sales
SELECT 1, 200 UNION ALL
SELECT 2, 300 UNION ALL
SELECT 7, 300 UNION ALL
SELECT 4, 100 UNION ALL
SELECT 5, 600 UNION ALL
SELECT 5, 600 UNION ALL
SELECT 2, 200 UNION ALL
SELECT 5, 620 UNION ALL
SELECT 4, 611 UNION ALL
SELECT 3, 650 UNION ALL
SELECT 7, 611 UNION ALL
SELECT 9, 650 UNION ALL
SELECT 3, 555 UNION ALL
SELECT 9, 755 UNION ALL
SELECT 8, 650 UNION ALL
SELECT 3, 620 UNION ALL
SELECT 5, 633 UNION ALL
SELECT 6, 720
;with a as
(
select SalesPersonID, sum(TotalSales) as Total
from #Sales
group by SalesPersonID
)
select coalesce(a.SalesPersonID, b.SalesPersonID) as SalesPersonID, coalesce(a.Total,b.Total) as Total
from a a
full outer join a b
on a.SalesPersonID=b.SalesPersonID
where a.SalesPersonID in (select top 10 percent SalesPersonID from a order by Total desc)
or b.SalesPersonID in (select top 10 percent SalesPersonID from a order by Total)
order by a.Total desc