SQL select top 10 for each year - sql-server

I have a fact database from which I want to make a trendline based on top 10 items based on sum quantity for each item per year.
I've done the following, but it does for example select more than 10 entities for my year 2007:
select TOP 10 sum(Quantity) as Quantity,DIM_Time.Year, DIM_Item.Name as Name
from Fact_Purchase
join DIM_Item on DIM_Item.BKey_ItemId = Fact_Purchase.DIM_Item
join DIM_Time on DIM_Time.ID = Fact_Purchase.DIM_Time_DeliveryDate
where Fact_Purchase.DIM_Company = 2 and DIM_Time.ID = FACT_Purchase.DIM_Time_DeliveryDate
Group by dim_item.Name, DIM_Time.Year
Order by Quantity DESC
How do I select top 10 items with the highest quantity through all my years, with only 10 top entities for each year?
As you can guess, the company is individual, and Is going to be a parameter in my report

I think this is what you're going for. My apologies if I messed up on translating your tables across.
select *
from (
select DIM_Time.[Year], dim_item.Name, SUM(Quantity) Quantity, RANK() OVER (PARTITION BY DIM_Time.[Year] ORDER BY SUM(Quantity) DESC) salesrank
from Fact_Purchase
join DIM_Item on DIM_Item.BKey_ItemId = Fact_Purchase.DIM_Item
join DIM_Time on DIM_Time.ID = Fact_Purchase.DIM_Time_DeliveryDate
where Fact_Purchase.DIM_Company = 2 and DIM_Time.ID = FACT_Purchase.DIM_Time_DeliveryDate
group by dim_item.Name, DIM_Time.[Year]
) tbl
where salesrank <= 10
order by [Year], salesrank
The subquery groups by name/year, and the RANK() OVER part sets up a sort of row index that increments by SUM(Quantity) and restarts for each Year. From there you just have to filter out anything with a salesrank (index) that's over 10.

SELECT
_year,
Name,
_SUM,
RANK_iD
FROM
(
SELECT
_year,
Name,
_SUM,
DENSE_RANK()OVER(PARTITION BY _year,_Month ORDER BY _SUM DESC) AS RANK_iD
FROM(
Select
DIM_Time AS _year,
DIM_Item as Name,
sum(Quantity) AS _SUM
from
#ABC
GROUP BY
_year,
Name
)A
)B
WHERE RANK_iD<=10

Related

SQL - Return a value sum only once when grouped

I want to count the unique record of a string but grouping by dates, and if the string already appeared previously on a group it shouldn't be counted anymore.
I've tried using distinct and it does show the unique count of the record but the record is counted again on every month.
Actual and minified SQL query:
select
date,
count(distinct d.name) as count
from ...
group by date
Sample and desired output
Image
Grab unique names and tag them with the earliest date. At that point it's just a matter of regrouping the resulting rows by date. Each name will uniquely correspond to only one date as desired:
with data as (select name, min("date") as dt from T group by name)
select dt, count(name) as cnt from data group by dt;
If you still need to see the original dates even when no names are counted, then flag each row according to whether it should be counted and then count the flags per date:
with data as (
select *,
case when "date" = min("date") over (partition by name)
then 1 end as flag
from T
)
select "date", count(flag) as cnt
from data
group by "date";
So you want the name only count once:
SELECT COUNT(u.name) as name_count, u.[date]
FROM (
SELECT d.name,MIN(d.date) AS [date]
FROM yourTable d
GROUP BY d.name) u
GROUP BY u.[date];
You can add a ROW_NUMBER() that is Partitioned by name and ordered by date and add a WHERE clause that only returns the rows with Row_Number = 1.
You can check this following option-
SELECT A.Date,COUNT(B.[Name]) Count
FROM
(
SELECT DISTINCT Date FROM your_table
)A
LEFT JOIN
(
SELECT * FROM
(
SELECT *,ROW_NUMBER() OVER(PARTITION BY [Name] ORDER BY Date) RN
FROM your_table
)A WHERE RN = 1
)B ON A.Date = B.Date
GROUP BY A.Date
But the best option if I modify a bit the concept from Shawnt00 is as below-
SELECT A.Date,COUNT(B.[Name]) Count
FROM
(
SELECT DISTINCT Date FROM your_table
)A
LEFT JOIN
(
SELECT [Name],MIN(Date) Date FROM your_table GROUP BY [Name]
)B ON A.Date = B.Date
GROUP BY A.Date
Both case the output will be-
Date Count
20190101 2
20190201 0
20190301 1

SQL Server 2014 Consolidate Tables avoiding duplicates

I have 36 Sales tables each referred to one store:
st1.dbo.Sales
st2.dbo.Sales
...
st35.dbo.Sales
st36.dbo.Sales
Each record has the following key columns:
UserName, PostalCode, Location, Country, InvoiceAmount, ItemsCount, StoreID
Here is SQLFiddle
I need to copy into Customers table all Username (and their details) that are not already present into Customers
in case of duplicated it is required to use the fields of record where InvoiceAmount is MAX
I tried to build a query but looks too complicated and it is also wrong because in CROSS APPLY should consider the full list of Sales Tables
INSERT INTO Customers (.....)
SELECT distinct
d.UserName,
w.postalCode,
w.location,
W.country,
max(w.invoiceamount) invoiceamount,
max(w.itemscount) itemscount,
w.storeID
FROM
(SELECT * FROM st1.dbo.Sales
UNION
SELECT * FROM st2.dbo.Sales
UNION
...
SELECT * FROM st36.dbo.Sales) d
LEFT JOIN
G.dbo.Customers s ON d.Username = s.UserName
CROSS APPLY
(SELECT TOP (1) *
FROM s.dbo.[Sales]
WHERE d.Username=w.Username
ORDER BY InvoiceAmount DESC) w
WHERE
s.UserName IS NULL
AND d.username IS NOT NULL
GROUP BY
d.UserName, w.postalCode, w.location,
w.country, w.storeID
Can somebody please give some hints?
As a basic SQL query, I'd create a row_number in the inner subquery and then join to customers and then isolated the max invoice number for each customer not in the customer table.
INSERT INTO Customers (.....)
SELECT w.UserName,
w.postalCode,
w.location,
w.country,
w.invoiceamount,
w.itemscount,
w.storeID
FROM (select d.*,
row_number() over(partition by d.Username order by d.invoiceamount desc) rownumber
from (SELECT *
FROM st1.dbo.Sales
UNION
SELECT *
FROM st2.dbo.Sales
UNION
...
SELECT *
FROM st36.dbo.Sales
) d
LEFT JOIN G.dbo.Customers s
ON d.Username = s.UserName
WHERE s.UserName IS NULL
AND d.username IS NOT NULL
) w
where w.rownumber = 1
Using your fiddle this will select distinct usernames rows with max invoiceamount
with d as(
SELECT * FROM Sales
UNION
SELECT * FROM Sales2
)
select *
from ( select *,
rn = row_number() over(partition by Username order by invoiceamount desc)
from d) dd
where rn=1;
step 1 - use cte .
select username , invoiceamount ,itemscount from Sales
UNION all
select user name , invoiceamount ,itemscount from Sales
.....
...
step 2
next cte use group by and get max invoiceamount ,itemscount for user of last result set.
,cte2 as (
select user name , max (invoiceamount) as invoiceamount ,max(itemscount) as itemscount from cte)
step3
use left join with user table and find missing record and itemscount invoiceamount

rank function with sum

I have two tables.
abc(CID(pk), cname,)
order(order_id(pk), CID(fk), number_of_rentals)
I want to determine top 10 customers on the basis of number of movies they rented.
select
orders.cid,orders.no_rentals, abc.name,
rank() over (order by no_rentals desc) "rank"
from abc
inner join orders on orders.CID = abc.CID;
I used this query but it's not universal. How can I use sum function on number_of_rentals with this query?
Select Top 10
orders.cid
, abc.name
, SUM(orders.no_rentals) TotalRentals
, rank() over (order by SUM(orders.no_rentals) desc) [rank]
from abc
inner join orders on orders.CID = abc.CID
Group By orders.cid, abc.name
Order By TotalRentals DESC

RANK() Over Partition BY not working

When I run the code below the ROWID is always 1.
I need to the ID to start at 1 for each item with the same Credit Value.
;WITH CTETotal AS (SELECT
TranRegion
,TranCustomer
,TranDocNo
,SUM(TranSale) 'CreditValue'
FROM dbo.Transactions
LEFT JOIN customers AS C
ON custregion = tranregion
AND custnumber = trancustomer
LEFT JOIN products AS P
ON prodcode = tranprodcode
GROUP BY
TranRegion
,TranCustomer
,TranDocNo)
SELECT
r.RegionDesc
,suppcodedesc
,t.tranreason as [Reason]
,t.trandocno as [Document Number]
,sum(tranqty) as Qty
,sum(tranmass) as Mass
,sum(transale) as Sale
,cte.CreditValue AS 'Credit Value'
,RANK() OVER (PARTITION BY cte.CreditValue ORDER BY cte.CreditValue)AS ROWID
FROM transactions t
LEFT JOIN dbo.Regions AS r
ON r.RegionCode = TranRegion
LEFT JOIN CTETotal AS cte
ON cte.TranRegion = t.TranRegion
AND cte.TranCustomer = t.TranCustomer
AND cte.TranDocNo = t.TranDocNo
GROUP BY
r.RegionDesc
,suppcodedesc
,t.tranreason
,t.trandocno
,cte.CreditValue
ORDER BY CreditValue ASC
EDIT
All the credit values with 400 must have the ROWID set to 1. And all the credit values with 200 must have the ROWID set to 2. And so on and so on.
Do you need something like this?
with cte (item,CreditValue)
as
(
select 'a',8 as CreditValue union all
select 'b',18 union all
select 'a',8 union all
select 'b',18 union all
select 'a',8
)
select CreditValue,dense_rank() OVER (ORDER BY item)AS ROWID from cte
Result
CreditValue ROWID
----------- --------------------
8 1
8 1
8 1
18 2
18 2
In your code replace
,RANK() OVER (PARTITION BY cte.CreditValue ORDER BY cte.CreditValue)AS ROWID
by
,DENSE_RANK() OVER (ORDER BY cte.CreditValue)AS ROWID
You just don't have to use PARTITION, just DENSE_RANK() OVER (ORDER BY cte.CreditValue)
I think the problem is with the RANK() OVER (PARTITION BY clause
you have to partition it by item not by CreditValue
Try this
RANK() OVER (PARTITION BY cte.CreditValue ORDER BY cte.RegionDesc)AS ROWID
Edit: The issue here isn't actually the nesting of the subquery, it's potentially based on partition by having columns that truly make each row unique (or 1)
Rather than ranking within your complex query like this
select
rank() over(partition by...),
*
from
data_source
join table1
join table2
join table3
join table4
order by
some_column
Try rank() or row_number() on the resulting data set, not within it.
For example, using the query above, remove rank() and implement it this way:
select
rank() over(partition by...),
results.*
from (
select
*
from
data_source
join table1
join table2
join table3
join table4
order by
some_column
) as results

select top 1 with a group by

I have two columns:
namecode name
050125 chris
050125 tof
050125 tof
050130 chris
050131 tof
I want to group by namecode, and return only the name with the most number of occurrences. In this instance, the result would be
050125 tof
050130 chris
050131 tof
This is with SQL Server 2000
I usually use ROW_NUMBER() to achieve this. Not sure how it performs against various data sets, but we haven't had any performance issues as a result of using ROW_NUMBER.
The PARTITION BY clause specifies which value to "group" the row numbers by, and the ORDER BY clause specifies how the records within each "group" should be sorted. So partition the data set by NameCode, and get all records with a Row Number of 1 (that is, the first record in each partition, ordered by the ORDER BY clause).
SELECT
i.NameCode,
i.Name
FROM
(
SELECT
RowNumber = ROW_NUMBER() OVER (PARTITION BY t.NameCode ORDER BY t.Name),
t.NameCode,
t.Name
FROM
MyTable t
) i
WHERE
i.RowNumber = 1;
select distinct namecode
, (
select top 1 name from
(
select namecode, name, count(*)
from myTable i
where i.namecode = o.namecode
group by namecode, name
order by count(*) desc
) x
) as name
from myTable o
SELECT max_table.namecode, count_table2.name
FROM
(SELECT namecode, MAX(count_name) AS max_count
FROM
(SELECT namecode, name, COUNT(name) AS count_name
FROM mytable
GROUP BY namecode, name) AS count_table1
GROUP BY namecode) AS max_table
INNER JOIN
(SELECT namecode, COUNT(name) AS count_name, name
FROM mytable
GROUP BY namecode, name) count_table2
ON max_table.namecode = count_table2.namecode AND
count_table2.count_name = max_table.max_count
I did not try but this should work,
select top 1 t2.* from (
select namecode, count(*) count from temp
group by namecode) t1 join temp t2 on t1.namecode = t2.namecode
order by t1.count desc
Here are to examples that you could use but the temp table use is more efficient than the view, but was done on a small data sample. You would want to check your own statistics.
--Creating A View
GO
CREATE VIEW StateStoreSales AS
SELECT t.state,t.stor_id,t.stor_name,SUM(s.qty) 'TotalSales'
,ROW_NUMBER() OVER (PARTITION BY t.state ORDER BY SUM(s.qty) DESC) AS 'Rank'
FROM [dbo].[sales] s
JOIN [dbo].[stores] t ON (s.stor_id = t.stor_id)
GROUP BY t.state,t.stor_id,t.stor_name
GO
SELECT * FROM StateStoreSales
WHERE Rank <= 1
ORDER BY TotalSales Desc
DROP VIEW StateStoreSales
---Using a Temp Table
SELECT t.state,t.stor_id,t.stor_name,SUM(s.qty) 'TotalSales'
,ROW_NUMBER() OVER (PARTITION BY t.state ORDER BY SUM(s.qty) DESC) AS 'Rank' INTO #TEMP
FROM [dbo].[sales] s
JOIN [dbo].[stores] t ON (s.stor_id = t.stor_id)
GROUP BY t.state,t.stor_id,t.stor_name
SELECT * FROM #TEMP
WHERE Rank <= 1
ORDER BY TotalSales Desc
DROP TABLE #TEMP

Resources