select *
from
(
select year,
week,
salesperson,
count(*) as transactions,
rank() over(partition by week order by count(*) desc) as ranking
from sales
where year = '2010',
group by year,
week,
salesperson
) temp
where ranking <= 10
The query returns a list of the top 10 salespeople (in terms of # of transactions) for each week of the year.
How can I go about adding columns to my results for:
Previous week's ranking for that
salesperson
Total weeks in the Top 10 this year
Consecutive weeks in the Top 10 (starting at week 1)
Consecutive weeks in the Top 10 (starting in previous year, if possible)
Can you give any general advice on how to go about these sorts of problems?
PS: Using SQL server 2008
Actually, I'm not convinced that Views are the best way to go. You can do this sort of logic in CTE's and combine the entire thing into a single query. For example, here is what I have for everything except the consecutive logic:
;With
SalesDateParts As
(
Select DatePart(wk, SaleDate) As WeekNum, DatePart(yy, SaleDate) As [Year], SalesPersonId
From #Sales
)
, SalesByWeek As
(
Select [Year], WeekNum, SalesPersonId, Count(*) As SaleCount
, RANK() OVER( PARTITION BY [Year], [WeekNum] ORDER BY Count(*) DESC ) As SaleRank
From SalesDateParts
Group By [Year], WeekNum, SalesPersonId
)
, PrevWeekTopSales As
(
Select [Year], [WeekNum], SalesPersonId, SaleCount
From SalesByWeek
Where [Year] = DatePart(yyyy, DateAdd(d, -7, CURRENT_TIMESTAMP))
And WeekNum = DatePart(wk, DateAdd(d, -7, CURRENT_TIMESTAMP))
)
, WeeksInTop10 As
(
Select SalesPersonId, Count(*) As Top10Count
From SalesByWeek
Where SaleRank <= 10
Group By SalesPersonId
)
Select *
From Salespersons
Left Join WeeksInTop10
On WeeksInTop10.SalesPersonId = SalesPersons.SalesPersonId
Left Join PrevWeekTopSales
On PrevWeekTopSales.SalesPersonId = SalesPersons.SalesPersonId
The logic for "consecutive" is probably going to require a calendar table which contains a value for every day along with columns for the given date's year and week.
My advice is to do the other queries separately in views and then join them in by saleperson (which I assume is key)
The logic is this query is nice and clean and easy to follow. Otherwise - I think the way to attack this would be to start writing TSQL functions to calculate the other values, but I think those functions will have the queries in them anyway.
Related
I'm writing a T-SQL query to calculate percentages of orders that have a ship time of over X days.
What I have now works but it feels clunky and I'm always looking to improve my query writing skills.
The first CTE is getting total orders from a time frame. The second CTE is getting any orders that have a ship date that is 3 days older (or less) than the order date.
;WITH TotalOrders
AS (
SELECT COUNT(*) AS TotalOrders
, MONTH(OrderDate) AS Month
, YEAR(OrderDate) AS Year
FROM Orders
WHERE OrderDate >= '2020-05-01'
GROUP BY MONTH(OrderDate), YEAR(OrderDate)
),
Under3
AS (
SELECT COUNT(*) AS Under3Days
, MONTH(OrderDate) AS Month
, YEAR(OrderDate) AS Year
FROM SorMaster SM
WHERE OrderDate >= '2020-05-01'
AND DATEDIFF(D, OrderDate, ShipDate) <= 3
GROUP BY MONTH(OrderDate), YEAR(OrderDate)
)
SELECT ROUND(Under3Days * 100.0 / TPO.TotalOrders, 1) AS 'PercentOfTotal'
, TPO.Month
, TPO.Year
FROM Under3
JOIN TotalOrders TPO
ON TPO.Month = Under3.Month
AND TPO.Year = Under3.Year
I know I can do this in temp tables too but wondering if there's a best practice to follow when doing these calculations
You can use conditional aggregation:
SELECT
ROUND(AVG(CASE WHEN ShipDate <= DATEADD(day, 3, OrderDate) THEN 100.0 ELSE 0 END), 1) AS PercentOfTotal
, MONTH(OrderDate) AS Month
, YEAR(OrderDate) AS Year
FROM Orders
WHERE OrderDate >= '2020-05-01'
GROUP BY MONTH(OrderDate), YEAR(OrderDate)
I am trying to create a 12 month grid view of all questions that were submitting for each month in that 12 month period.
SELECT
YEAR(h.metaInsert) [Year],
MONTH(h.metaInsert) [Month],
DATENAME(MONTH,h.metaInsert) [Month Name],
COUNT(1) [Total Documents]
FROM
Document_Count_History AS h
WHERE
YEAR(h.metaInsert) = 2017
GROUP BY
YEAR(h.metaInsert), MONTH(h.metaInsert), DATENAME(MONTH, h.metaInsert)
ORDER BY
1, 2
This returns the data perfectly for the months that have it, but I get no data returned for those with 0 records for that specific month.
My goal is to see all 12 months along with the count of documents. If there are no documents, it will simply be a 0 for that month but it will be included in the result set.
How can I take what I have and apply the missing months?
You could use something like this to generate the sequence of months for your query:
declare #StartDate date = '20170101'
,#NumberOfYears int = 1;
;with Months as (
select top (12*#NumberOfYears)
[Month] = dateadd(Month, row_number() over (order by number) -1, #StartDate)
, NextMonth = dateadd(Month, row_number() over (order by number), #StartDate)
from master.dbo.spt_values
)
select
year(m.Month) [Year],
Month(m.Month) [Month],
datename(Month,m.Month) [Month Name],
count(h.*) [Total Documents]
from Months as m
left join Document_Count_History AS h
on h.metaInsert >= m.Month
and h.metaInsert < m.NextMonth
--where h.metaInsert >= '20170101'
group by m.Month
order by m.Month
Although you may want to consider adding a Calendar table, or Date Dimension.
Calendar and Numbers table references:
Generate a set or sequence without loops - 1 - Aaron Bertrand
The "Numbers" or "Tally" Table: What it is and how it replaces a loop - Jeff Moden
Creating a Date Table/Dimension in SQL Server 2008 - David Stein
Calendar Tables - Why You Need One - David Stein
Creating a date dimension or calendar table in SQL Server - Aaron Bertrand
An example months table:
create table dbo.Months(
MonthStart date not null primary key
, NextMonthStart date not null
, [Year] smallint not null
, [Month] tinyint not null
, [MonthName] varchar(16) not null
);
declare #StartDate date = '20100101'
,#NumberOfYears int = 30;
insert dbo.Months(MonthStart,NextMonthStart,[Year],[Month])
select top (12*#NumberOfYears)
[MonthStart] = dateadd(month, row_number() over (order by number) -1, #StartDate)
, NextMonthStart = dateadd(month, row_number() over (order by number), #StartDate)
, [year] = year(dateadd(month, row_number() over (order by number) -1, #StartDate))
, [Month] = Month(dateadd(month, row_number() over (order by number) -1, #StartDate))
, MonthName = datename(Month,dateadd(month, row_number() over (order by number) -1, #StartDate))
from master.dbo.spt_values;
and your query would simplify to:
select
m.[Year],
m.[Month],
m.[MonthName],
count(h.*) [Total Documents]
from Months as m
left join Document_Count_History AS h
on h.metaInsert >= m.MonthStart
and h.metaInsert < m.NextMonthStart
where m.Year = 2017
group by m.Month, m.Year, m.MonthName
order by m.MonthStart
You need a date dimension. Specifically, you need a table that has all the values for months. Then, you can do a left-join on the table that gets the totals, and pull out a sum value.
*Edit (Hopefully to be more clear)
Table below, I would like to count ids and count duplicate ids where the createddate has a gap of 3 months or more for that ID.
Query I have so far...
if object_id('tempdb..#temp') is not null
begin drop table #temp end
select
top 100
a.id, a.CreatedDate
into #temp
from tbl a
where 1=1
--and year(CreatedDate) = '2015'
if object_id('tempdb..#temp2') is not null
begin drop table #temp2 end
select t.id, count(t.id) as Total_Cnt
into #temp2
from #temp t
group by id
select distinct #temp2.Total_Cnt, #temp2.id, #temp.CreatedDate, DENSE_RANK() over (partition by #temp.id order by createddate) RK
from #temp2
inner join #temp on #temp2.id = #temp.id
where 1=1
order by Total_Cnt desc
Results:
Total_cnt id createddate rk
3 1 01-01-2015 1
3 1 03-02-2015 2
3 1 01-02-2015 3
2 2 05-01-2015 1
2 2 05-02-2015 2
1 3 06-01-2015 1
1 4 07-01-2015 1
Count ids and only count duplicate ids when the createddate from the id is greater than 3 months.
Something like this...
Total_cnt id Countwith3monthgap
3 1 2
2 2 1
1 3 1
1 4 1
You can use a cte and ROW_NUMBER to get your order and self join the cte based on the order..
WITH cte AS
( SELECT
*,
ROW_NUMBER() OVER (PARTITION BY ID ORDER BY CreatedDate) Rn
FROM
Test
)
SELECT
c1.ID,
COUNT(CASE WHEN c2.CreatedDate IS NULL THEN 1
WHEN c1.CreatedDate >= DATEADD(month,3,c2.CreatedDate) THEN 1
END)
FROM
cte c1
LEFT JOIN cte c2 ON c1.ID = c2.ID
AND c1.RN = c2.RN + 1
GROUP BY
c1.ID
You also need to use a conditional count where the Previous CreatedDate is null or if the Current CreatedDate is >= the Previous CreatedDate + 3 months
If you happen to be using SQL 2012+ you can also use LAG here to get the same result
SELECT
ID,
COUNT(*)
FROM
(SELECT
ID,
CreatedDate CurrentDate,
LAG(CreatedDate) OVER (PARTITION BY ID ORDER BY CreatedDate) PreviousDate
FROM
Test
) T
WHERE
PreviousDate IS NULL
OR CurrentDate >= DATEADD(month, 3, PreviousDate)
GROUP BY
ID
You can use a lag to get the previous date, Null for the first in the list
SELECT
id,
lag(CreatedDate,1) OVER (PARTITION BY Id ORDER BY CreatedDate) AS PreviousCreateDate,
CreatedDate
FROM #t
You can use that as a subquery and get the difference in months using DATEDIFF
SELECT sub.id,DATEDiff(month, sub.PreviousCreateDate ,sub.CreatedDate)
FROM (SELECT
id,
lag(CreatedDate,1) OVER (PARTITION BY Id ORDER BY CreatedDate) AS PreviousCreateDate,
CreatedDate
FROM #t) sub
WHERE DATEDiff(month, sub.PreviousCreateDate ,sub.CreatedDate) >=3
OR sub.PreviousCreateDate IS NULL
You can then take your totals
SELECT sub.id,COUNT(sub.id) as cnt
FROM (SELECT
id,
lag(CreatedDate,1) OVER (PARTITION BY Id ORDER BY CreatedDate) AS PreviousCreateDate,
CreatedDate
FROM #t) sub
WHERE DATEDIFF(month, sub.PreviousCreateDate ,sub.CreatedDate) >=3
OR sub.PreviousCreateDate IS NULL
GROUP BY sub.id
Note that using datediff the last day of january is three months before the first day of march. That appears to be the logic you were after.
You might want to define your three month gap criteria as
WHERE sub.PreviousCreateDate <= DATEADD(month, -3, sub.CreatedDate)
OR sub.PreviousCreateDate IS NULL
or
WHERE sub.CreatedDate >= DATEADD(month, +3, sub.PreviousCreateDate )
OR sub.PreviousCreateDate IS NULL
I'm guessing that your desired definition of three-month gap doesn't coincide with datediff()'s. Most of the logic here is to look back at the previous date and decide if the gap is big enough to qualify.
When datediff() counts three months difference we still need to make sure the day of month is later than the first one (per example and ID 5). If difference is more than three months then we're good automatically.
But I'm also assuming that you would want to treat the distance from November 30th to February 28th (or 29th in a leap year) as a full three months because the end date falls on the final day of the month. By adjusting the end date by an extra day this is an easy scenario to snag as it will bump the date into the following month and increase the month difference by one as well. If that's not what you want then just remove the dateadd(day, 1, ...) portion and use only the raw CreatedDate value.
You sample data is limited so I'm also making the assumption that the gaps are measure between consecutive dates. If you're wanting to find blocks of runs that don't span more than three months across the set, then that's a different problem and you should clarify with more information.
Since you've indicated that you're probably on SQL Server 2008 you'll have to do without the lag() function. Although the first query could be adjusted for that it's likely easier to go with the second approach at the end.
with diffs as (
select
ID,
row_number() over (partition by ID order by CreatedDate) as RN,
case when
datediff(
month,
lag(CreatedDate, 1) over (partition by ID order by CreatedDate),
CreatedDate
) = 3
and
datepart(
day,
lag(CreatedDate, 1) over (partition by ID order by CreatedDate)
) <= datepart(day, CreatedDate)
or
datediff(
month,
lag(CreatedDate, 1) over (partition by ID order by CreatedDate),
/* adding one day to handle gaps like Nov30 - Feb28/29 and Jan31 - Apr30 */
dateadd(day, 1, CreatedDate)
) >= 4
then 1
else 0
end as GapFlag
from <T> /* <--- your table name here */
), gaps as (
select
ID, RN,
sum(1 + GapFlag) over (partition by ID order by RN) as Counter
from diffs
)
select ID, count(distinct Counter - RN) as "Count"
from gaps
group by ID
The rest of the logic is a typical gaps and islands scenario looking for holes in the sum(1 + GapCount) sequence with the offset of 1 acting pretty much like row_number().
http://sqlfiddle.com/#!6/61b12/3
JamieD77's approach is also valid. I was originally thinking your problem involved more than looking at the rows in sequence. Here's how I would tweak it for the gap definition I've been running with:
with data as (
select ID, CreatedDate, row_number() over (partition by ID order by CreatedDate) as RN
from T
)
select ID, count(*) as "Count"
from data d1 left outer join data d0
on d0.ID = d1.ID and d0.RN = d1.RN - 1 /* connect to the one before */
where
datediff(month, d0.CreatedDate, d1.CreatedDate) = 3
and datepart(day, d0.CreatedDate) <= datepart(day, d0.CreatedDate)
or datediff(month, d0.CreatedDate, dateadd(day, 1, d0.CreatedDate)) >= 4
or d0.ID is null
group by ID
Edit: You have changed the question since yesterday.
Change this line in the first query to include the total count:
...
select count(*) as TotalCnt, ID, count(distinct Counter - RN) as GapCount
...
Second would look like:
with data as (
select ID, CreatedDate, row_number() over (partition by ID order by CreatedDate) as RN
from T
)
select
count(*) as TotalCnt, ID,
count(case when
datediff(month, d0.CreatedDate, d1.CreatedDate) = 3
and datepart(day, d0.CreatedDate) <= datepart(day, d0.CreatedDate)
or datediff(month, d0.CreatedDate, dateadd(day, 1, d0.CreatedDate)) >= 4
or d0.ID is null then 1 end
) as GapCount
from data d1 left outer join data d0
on d0.ID = d1.ID and d0.RN = d1.RN - 1 /* connect to the one before */
where
group by ID
Within a SQL Server 2012 database, I have a table with two columns customerid and date. I am interested in getting by year-month, a count of customers that have purchased in current month but not in prior 13 months. The table is extremely large so something efficient would be highly appreciated. Results table is shown after the input data. In essence, it is a count of customers that purchased in current month but not in prior 13 months (by year and month).
---input table-----
declare #Sales as Table ( customerid Int, date Date );
insert into #Sales ( customerid, date) values
( 1, '01/01/2012' ),
( 1, '04/01/2013' ),
( 1, '01/01/2014' ),
( 1, '01/01/2014' ),
( 1, '04/06/2014' ),
( 2, '04/01/2014' ),
( 3, '01/03/2012' ),
( 3, '01/03/2014' ),
( 4, '01/04/2012' ),
( 4, '04/04/2013' ),
( 5, '02/01/2010' ),
( 5, '02/01/2013' ),
( 5, '04/01/2014' )
select customerid, date
from #Sales;
---desired results ----
yearmth monthpurchasers monthpurchasernot13m
201002 1 1
201201 3 3
201302 1 1
201304 2 2
201401 2 1
201404 3 2
Thanks very much for looking at this!
Dev
You didn't provide the expected result, but I believe this is pretty close (at least logically):
;with g as (
select customerid, year(date)*100 + month(date) as mon
from #Sales
group by customerid, year(date)*100 + month(date)
),
x as (
select *,
count(*) over(partition by customerid order by mon
rows between 13 preceding and 1 preceding) as cnt
from g
),
y as (
select mon, count(*) as cnt from x
where cnt = 0
group by mon
)
select g.mon,
count(distinct(g.customerid)) as monthpurchasers,
isnull(y.cnt, 0) as cnt
from g
left join y on g.mon = y.mon
group by g.mon, y.cnt
order by g.mon
Tell me if this query helps. It extracts all the rows which meet your condition into a Table variable. Then, I use your query and join to this table.
declare #startDate datetime
declare #todayDate datetime
declare #tbl_Custs as Table(customerid int)
set #startDate = '04/01/2014' -- mm/dd/yyyy
set #todayDate = GETDATE()
insert into #tbl_Custs
-- purchased only this month
select customerid
from Sales
where ([date] >= #startDate and [date] <= #todayDate)
and customerid NOT in
(
-- purchased in past 13 months
select distinct customerid
from Sales
where ([date] >= DATEADD(MONTH,-13,[date])
and [date] < #startDate)
)
-- your query goes here
select year(date) as year
,month(date) as month
,count(distinct(c.customerid)) as monthpurchasers
from #tbl_Custs as c right join
Sales as s
on c.customerid = s.customerid
group by year(date) , month(date)
order by year(date) , month(date)
Below query will produce what you are looking for. I am not sure how performance will be on a big table (how big is your table?) but it is pretty straight forward so I think it will be ok. I simply calculate the 13 months earlier on CTE to find my sale window. Than join to the Sales table within that window / customer id and grouping records based on the unmatched records. You don't actually need 2 CTE's here you can do the DATEADD(mm,-13,date) on the join part of the second CTE but I thought it might be more clear this way.
P.S. If you need to change the time frame from 13 months to something else all you have to change is the DATEADD(mm,-13,date) this simply substracts 13 months from the date value.
Hope this helps or at least leads to a better solution
;WITH PurchaseWindow AS (
select customerid, date, DATEADD(mm,-13,date) minsaledate
FROM #Sales
), JoinBySaleWindow AS (
SELECT a.customerid, a.date,a.minsaledate,b.date earliersaledate
FROM PurchaseWindow a
LEFT JOIN #sales b ON a.customerid =b.customerid
--Find the sales for the customer within the last 13 months of original sale
AND b.date BETWEEN a.date AND a.minsaledate
)
SELECT DATEPART(yy,date) AS [year], DATEPART(mm, date) AS [month], COUNT(DISTINCT customerid) monthpurchases
FROM JoinBySaleWindow
--Exclude records where a sale within last 13 months occured
WHERE earliersaledate IS NULL
GROUP BY DATEPART(mm, date), DATEPART(yy,date)
Sorry about the typos they are fixed now.
Hi I have an SQL server database with 3 columns Activity[start_date(datetime),end_date(datetime),title(string)]
and I wish to count for a whole year, how many activities have start_date in each month, I mean I would like a return of 12 values(12 months) that count the activities within the months, thanks.
If there is an index on start_date and/or if you need to include months in the result even if there was no activity in that month, you might consider this one:
DECLARE #year INT;
SET #year = 2012;
;WITH n AS
(
SELECT TOP (12) m = DATEADD(MONTH, ROW_NUMBER() OVER
(ORDER BY name)-1, DATEADD(YEAR, #year-1900, 0))
FROM sys.all_objects ORDER BY name
)
SELECT [Month] = n.m, ActivityCount = COUNT(t.title)
FROM n
LEFT OUTER JOIN dbo.unspecified_table_name AS t
ON t.start_date >= n.m
AND t.start_date < DATEADD(MONTH, 1, n.m)
GROUP BY n.m
ORDER BY [Month];
(If you don't want a row when there were zero activities in a given month, then change LEFT OUTER to INNER.)
select month(start_date) as Month, count(*) as Count
from Activity
where year(start_date) = 2011
group by month(start_date)
order by month(start_date)