SQL weighted revenue query

SQL weighted revenue query - sql-server

6I have 3 tables. Examples below.
Weight
Channel WeightFirst WeightMiddle WeightLast
Dir 40 45 50
NatS 0 0 0
PC 20 25 30
UnRef 40 45 50
Sales
saleID revenue
32150 1600.00
32153 516.00
Visits
visitID saleID visitDate channel visitRevenue
4479433 32153 2014-12-09 15:00:41.000 NatS NULL
4479434 32153 2014-12-09 14:55:21.000 PC NULL
4479435 32153 2014-12-09 15:09:01.000 UnRef NULL
4755575 32150 2014-12-07 16:41:24.000 NatS NULL
4756323 32150 2014-12-07 16:52:56.000 PC NULL
4756324 32150 2014-12-06 20:49:41.000 Dir NULL
I need to calculate visitRevenus in the Visits table based on the WeightFirst, WeightMiddle, WeightLast in the Weight table.
First visitDate in the Visits table gets WeightFirst, last visitDate gets WeightLast, and everything in between those dates gets WeightMiddle.
For example saleID 32153 has the first visitDate as visitID 4479434, so PC gets a WeightFirst of 20, then visitID 4479433 gets 0 for NatS, and visitID 4479435 gets 50 for UnRef. Total weight is 70. With saleID revenue in Sales being 516.00
I need to divide 516.00 by 70, then multiply the result by each weight and update visitRevenue in the Visits table with that result.
So PC would get 147.4285714285714 and UnRef would get 368.5714285714286. Add them together and it's 516.
The table I have hold more than just 2 saleIDs, many channels, and large amounts of visitDates. I need some SQL to update visitRevenue
with these calculated figures but am having trouble getting started. Any help would be most welcome! And if anymore info is required please ask.
Thanks

This, I think, does the trick. I don't know your system so I did not know if you could alter the underlying tables so this does everything with table variables.
BTW, it really helps to have the tables set up first :)
DECLARE #weight TABLE (
Channel varchar(max),
WeightFirst int,
WeightMiddle int,
WeightLast int)
INSERT INTO #weight VALUES
('Dir', 40,45,50),
('NatS', 0, 0, 0),
('PC', 20,25,30),
('UnRef',40,45,50)
DECLARE #sales TABLE (
salesID int,
revenue float)
INSERT INTO #sales VALUES
(32150,1600.00),
(32153,516.00)
DECLARE #visits TABLE (
visitID int,
salesID int,
visitDate datetime,
channel varchar(max),
VisitRevenue float)
INSERT INTO #visits VALUES
(4479433, 32153, '2014-12-09 15:00:41.000','NatS', NULL),
(4479434, 32153, '2014-12-09 14:55:21.000','PC', NULL),
(4479435, 32153, '2014-12-09 15:09:01.000','UnRef',NULL),
(4755575, 32150, '2014-12-07 16:41:24.000','NatS', NULL),
(4756323, 32150, '2014-12-07 16:52:56.000','PC', NULL),
(4756324, 32150, '2014-12-06 20:49:41.000','Dir', NULL)
DECLARE #visitWeight TABLE (
visitID int,
salesID int,
visitDate datetime,
channel varchar(max),
VisitRevenue float,
visitNumber int,
visitWeight int,
totalWeight float,
revenue float)
INSERT INTO #visitWeight
SELECT visitID, v.salesID, visitDate,channel,visitRevenue,
ROW_NUMBER() OVER (PARTITION BY v.salesID ORDER BY visitDate ASC) AS visitNumber ,NULL,NULL, revenue
FROM #visits v JOIN #sales s ON v.salesID=s.salesID
UPDATE #visitWeight -- this sets the first weight, also sets everything else to middle
SET visitWEIGHT =
CASE WHEN visitNumber=1 THEN WeightFirst ELSE weightMiddle END
FROM #visitWeight vw JOIN #weight w on vw.channel=w.channel
UPDATE #visitWeight -- this sets the last weight
SET visitWEIGHT = WeightLast
FROM
(SELECT salesID, max(visitNumber) AS maxVisit FROM #visitWeight GROUP BY salesID) AS t
JOIN #visitWeight vw ON t.maxVisit=vw.visitNumber JOIN
#weight w on vw.channel=w.channel
UPDATE #visitWeight
SET totalWeight = s.sumWeight,
VisitRevenue = revenue/s.sumWeight*visitWeight
FROM (SELECT salesID, SUM(visitWeight) AS sumWeight FROM #visitWeight GROUP BY salesID) AS s
SELECT * FROM #visitWeight order by salesID, visitDate

Related

Multiple select queries using while loop in a single table? Is it Possible?

I have 2 tables. Table A has Date, ISBN (for Book), Demand(demand for that date). Table B has Date, ISBN (for Book), and SalesRank.
The sample data is as follows:
The DailyBookFile has 150k records for each date, from year 2010 (i.e. 150k * 365 days * 8 years) rows. Same goes with SalesRank Table having about 500k records for each date
DailyBookFile
Date Isbn13 CurrentModifiedDemandTotal
20180122 9780955153075 13
20180122 9780805863567 9
20180122 9781138779396 1
20180122 9780029001516 9
20180122 9780470614150 42
SalesRank
importdate ISBN13 SalesRank
20180122 9780029001516 69499
20180122 9780470614150 52879
20180122 9780805863567 832429
20180122 9780955153075 44528
20180122 9781138779396 926435
Required Output
Date Avg_Rank Book_Group
20180122 385154 Elite
20180121 351545 Elite
20180120 201545 Elite
I want to get the Top 200 CurrentModifiedDemand for each day, and take the average Rank.
I am unable to work out a solution as I am new to SQL.
I started with getting the Top 200 CurrentModifiedDemand for yesterday and get the Avg Rank over last year.
SELECT DBF.Filedate AS [Date],
AVG(AMA.SalesRank) AS Avg_Rank,
'Elite' AS Book_Group
FROM [ODS].[wholesale].[DailyBookFile] AS DBF
INNER JOIN [ODS].[MarketplaceMonitor].[SalesRank] AS AMA ON (DBF.Isbn13 = AMA.ISBN13
AND DBF.FileDate = AMA.importdate)
WHERE DBF.Isbn13 IN (SELECT TOP 200 Isbn13
FROM [ODS].[wholesale].[DailyBookFile]
WHERE FileDate = 20180122
AND CAST(CurrentModifiedDemandTotal AS int) > 200)
AND DBF.Filedate > 20170101
GROUP BY DBF.Filedate;
But the result is not what I want. So, now I want the ISBN for the Top 200 CurrentModifiedDemand for each day and their avg rank. I tried with this.
DECLARE #i int;
SET #i = 20180122;
WHILE (SELECT DISTINCT(DBF.Filedate)
FROM [ODS].[wholesale].[DailyBookFile] AS DBF
WHERE DBF.Filedate = #i) IS NOT NULL
BEGIN
SELECT DBF.Filedate AS [Date],
AVG(AMA.SalesRank) AS Avg_Rank,
'Elite' AS Book_Group
FROM [ODS].[wholesale].[DailyBookFile] AS DBF
INNER JOIN [ODS].[MarketplaceMonitor].[SalesRank] as AMA ON DBF.Isbn13 = AMA.ISBN13
AND DBF.FileDate = AMA.importdate
WHERE DBF.Isbn13 in (SELECT TOP 200 Isbn13
FROM [ODS].[wholesale].[DailyBookFile]
WHERE FileDate = #i
AND CAST (CurrentModifiedDemandTotal AS int) > 500)
AND DBF.Filedate = #i
GROUP BY DBF.Filedate;
SET #i = #i+1;
END
In this I am getting one select query result in each window. Is there any way to have the result in a single table?
P.S. The list of top 200 books every day will change according to the CurrentModifiedDemand. I want to take their avg. sales rank for that day.

Instead of immediately selecting in each iteration of the loop, you can insert rows to temp table (or table-type variable) and select everything after the loop finishes:
IF OBJECT_ID('tempdb..#books') IS NOT NULL
BEGIN
DROP TABLE #books
END
CREATE TABLE #books (
[Date] INT,
[Avg_Rank] FLOAT,
[Book_Group] VARCHAR(512)
);
DECLARE #i int;
SET #i = 20180122;
BEGIN TRY
WHILE (SELECT DISTINCT(DBF.Filedate)
FROM [ODS].[wholesale].[DailyBookFile] AS DBF
WHERE DBF.Filedate = #i) IS NOT NULL
BEGIN
INSERT INTO #books (
[Date],
[Avg_Rank],
[Book_Group]
)
SELECT DBF.Filedate AS [Date],
AVG(AMA.SalesRank) AS Avg_Rank,
'Elite' AS Book_Group
FROM [ODS].[wholesale].[DailyBookFile] AS DBF
INNER JOIN [ODS].[MarketplaceMonitor].[SalesRank] as AMA ON DBF.Isbn13 = AMA.ISBN13
AND DBF.FileDate = AMA.importdate
WHERE DBF.Isbn13 in (SELECT TOP 200 Isbn13
FROM [ODS].[wholesale].[DailyBookFile]
WHERE FileDate = #i
AND CAST (CurrentModifiedDemandTotal AS int) > 500)
AND DBF.Filedate = #i
GROUP BY DBF.Filedate;
SET #i = #i+1;
END
END TRY
BEGIN CATCH
IF OBJECT_ID('tempdb..#books') IS NOT NULL
BEGIN
DROP TABLE #books
END
END CATCH
SELECT *
FROM #books
DROP TABLE #books
Using table-type variable would yield simpler code, but when storing large amounts of data table-type variables start losing in performance against temp tables. I'm not sure how many rows is a cut-off, but in my experience I've seen significant performance gains from changing table-type var to temp table at 10000+ row counts. For small row counts an opposite might apply.

This avoids a costly WHILE loop, and I believe achieves your goal:
CREATE TABLE #DailyBookFile ([Date] date,
Isbn13 bigint,
CurrentModifiedDemandTotal tinyint);
INSERT INTO #DailyBookFile
VALUES ('20180122',9780955153075,13),
('20180122',9780805863567,9 ),
('20180122',9781138779396,1 ),
('20180122',9780029001516,9 ),
('20180122',9780470614150,42);
CREATE TABLE #SalesRank (importdate date,
ISBN13 bigint,
#SalesRank int);
INSERT INTO #SalesRank
VALUES ('20180122',9780029001516,69499 ),
('20180122',9780470614150,52879 ),
('20180122',9780805863567,832429),
('20180122',9780955153075,44528 ),
('20180122',9781138779396,926435);
GO
WITH Ranks AS(
SELECT SR.*,
RANK() OVER (PARTITION By SR.importdate ORDER BY SR.#SalesRank) AS Ranking
FROM #SalesRank SR
JOIN #DailyBookFile DBF ON SR.ISBN13 = DBF.Isbn13
AND SR.importdate = DBF.[Date])
SELECT importdate AS [Date],
AVG(#SalesRank) AS Avg_rank,
'Elite' AS Book_Group
FROM Ranks
WHERE Ranking <= 200
GROUP BY importdate;
GO
DROP TABLE #DailyBookFile;
DROP TABLE #SalesRank;

In T-SQL is there a built-in command to determine if a number is in a range from another table

This is not a homework question.
I'm trying to take the count of t-shirts in an order and see which price range the shirts fall into, depending on how many have been ordered.
My initial thought (I am brand new at this) was to ask another table if count > 1st price range's maximum, and if so, keep looking until it's not.
printing_range_max printing_price_by_range
15 4
24 3
33 2
So for example here, if the order count is 30 shirts they would be $2 each.
When I'm looking into how to do that, it looks like most people are using BETWEEN or IF and hard-coding the ranges instead of looking in another table. I imagine in a business setting it's best to be able to leave the range in its own table so it can be changed more easily. Is there a good/built-in way to do this or should I just write it in with a BETWEEN command or IF statements?
EDIT:
SQL Server 2014

Let's say we have this table:
DECLARE #priceRanges TABLE(printing_range_max tinyint, printing_price_by_range tinyint);
INSERT #priceRanges VALUES (15, 4), (24, 3), (33, 2);
You can create a table with ranges that represent the correct price. Below is how you would do this in pre-2012 and post-2012 systems:
DECLARE #priceRanges TABLE(printing_range_max tinyint, printing_price_by_range tinyint);
INSERT #priceRanges VALUES (15, 4), (24, 3), (33, 2);
-- post-2012 using LAG
WITH pricerange AS
(
SELECT
printing_range_min = LAG(printing_range_max, 1, 0) OVER (ORDER BY printing_range_max),
printing_range_max,
printing_price_by_range
FROM #priceRanges
)
SELECT * FROM pricerange;
-- pre-2012 using ROW_NUMBER and a self-join
WITH prices AS
(
SELECT
rn = ROW_NUMBER() OVER (ORDER BY printing_range_max),
printing_range_max,
printing_price_by_range
FROM #priceRanges
),
pricerange As
(
SELECT
printing_range_min = ISNULL(p2.printing_range_max, 0),
printing_range_max = p1.printing_range_max,
p1.printing_price_by_range
FROM prices p1
LEFT JOIN prices p2 ON p1.rn = p2.rn+1
)
SELECT * FROM pricerange;
Both queries return:
printing_range_min printing_range_max printing_price_by_range
------------------ ------------------ -----------------------
0 15 4
15 24 3
24 33 2
Now that you have that you can use BETWEEN for your join. Here's the full solution:
-- Sample data
DECLARE #priceRanges TABLE
(
printing_range_max tinyint,
printing_price_by_range tinyint
-- if you're on 2014+
,INDEX ix_xxx NONCLUSTERED(printing_range_max, printing_price_by_range)
-- note: second column should be an INCLUDE but not supported in table variables
);
DECLARE #orders TABLE
(
orderid int identity,
ordercount int
-- if you're on 2014+
,INDEX ix_xxy NONCLUSTERED(orderid, ordercount)
-- note: second column should be an INCLUDE but not supported in table variables
);
INSERT #priceRanges VALUES (15, 4), (24, 3), (33, 2);
INSERT #orders(ordercount) VALUES (10), (20), (25), (30);
-- Solution:
WITH pricerange AS
(
SELECT
printing_range_min = LAG(printing_range_max, 1, 0) OVER (ORDER BY printing_range_max),
printing_range_max,
printing_price_by_range
FROM #priceRanges
)
SELECT
o.orderid,
o.ordercount,
--p.printing_range_min,
--p.printing_range_max
p.printing_price_by_range
FROM pricerange p
JOIN #orders o ON o.ordercount BETWEEN printing_range_min AND printing_range_max
Results:
orderid ordercount printing_price_by_range
----------- ----------- -----------------------
1 10 4
2 20 3
3 25 2
4 30 2
Now that we have that we can

SQL Select Sequential Dates with Additional Lookup Values

I am trying to grab a series of dates and the corresponding values (if any) that exist in my database.
I have two parameters - today (date using getDate()) - and a number of days (integer). For this example, I'm using the value 10 for the days.
Code to get the sequential dates for 10 days after today:
SELECT top 10 DATEADD(DAY, ROW_NUMBER()
OVER (ORDER BY object_id), REPLACE(getDate(),'-','')) as Alldays
FROM sys.all_objects
I now need to look up several values for each day in the sequential days code, which may or may not exist in the time table (we assume 8 hours for all dates, unless otherwise specified). The lookup would be on the field recordDateTime. If no "hours" value exists in the table cap_time for that date, I need to return a default value of 8 as the number of hours. Here's the base query:
SELECT u.FullName as UserName, d2.department,
recordDateTime, ISNULL(hours,8) as hours
FROM cap_time c
left join user u on c.userID = u.userid
left join dept d2 on u.deptID = d2.DeptID
WHERE c.userid = 38 AND u.deptID = 1
My end result for the next 10 days should be something like:
Date (sequential), Department, UserName, Number of Hours
I can accomplish this using TSQL and a temp table, but I'd like to see if this can be done in a single statement. Any help is appreciated.

Without any DDL or sample data it's hard to determine exactly what you need.
I think this will get you pretty close (note my comments):
-- sample data
------------------------------------------------------------------------------------------
DECLARE #table TABLE
(
fullName varchar(10),
department varchar(10),
[hours] tinyint,
somedate date
);
INSERT #table VALUES
('bob', 'sales', 5, getdate()+1),
('Sue', 'marketing', 3, getdate()+2),
('Sue', 'sales', 12, getdate()+4),
('Craig', 'sales', 4, getdate()+8),
('Joe', 'sales', 18, getdate()+9),
('Fred', 'sales', 10, getdate()+10);
--SELECT * FROM #table
;
-- solution
------------------------------------------------------------------------------------------
WITH alldays([day]) AS -- logic to get your dates for a LEFT date table
(
SELECT TOP (10)
CAST(DATEADD
(
DAY,
ROW_NUMBER() OVER (ORDER BY object_id),
getdate()
) AS date)
FROM sys.all_objects
)
SELECT d.[day], t.fullName, department, [hours] = ISNULL([hours], 8)
FROM alldays d
LEFT JOIN #table t ON d.[day] = t.somedate;
Results:
day fullName department hours
---------- ---------- ---------- -----
2017-04-12 bob sales 5
2017-04-13 Sue marketing 3
2017-04-14 NULL NULL 8
2017-04-15 Sue sales 12
2017-04-16 NULL NULL 8
2017-04-17 NULL NULL 8
2017-04-18 NULL NULL 8
2017-04-19 Craig sales 4
2017-04-20 Joe sales 18
2017-04-21 Fred sales 10

Maybe a subquery and the in statement, like:
SELECT u.FullName as UserName, d2.department,
recordDateTime, ISNULL(hours,8) as hours
FROM cap_time c
left join user u on c.userID = u.userid
left join dept d2 on u.deptID = d2.DeptID
WHERE c.userid = 38 AND u.deptID = 1 and recordDateTime in
(SELECT top 10 DATEADD(DAY, ROW_NUMBER()
OVER (ORDER BY object_id), REPLACE(getDate(),'-','')) as Alldays
FROM sys.all_objects)

update 1 table from another table by next earliest date

I have 2 tables: budget and budget_rate:
Budget Table
resource period hours dollars
-------- ------ ----- -------
ADMIN03 01/31/16 160 8000
ADMIN03 02/28/16 150 7500
Rate Table
resource rate eff_date
-------- ---- --------
ADMIN03 50.00 01/01/16
ADMIN03 52.50 01/01/17
When the rates change in the rate table, I need to update the budget based on the rate that matches the resource name and is the first rate record earlier than the budget record.
Can this be accomplished with a single UPDATE?
Something like:
update b
set b.dollars = b.hours*r.rate
from
budget b join rate r on
b.resource = r.resource and
b.period >= r.eff_date

I assume rate table is realtive small, so I would recalculate it to have range columns.
with oRates as (
select resource,
rate,
eff_date,
ROW_NUMBER() over(partition by resource order by eff_date desc) rn
from Rates
),
pRates as (
select r1.resource,
r1.rate,
r1.eff_date from_date,
isnull(r2.eff_date,'2070-01-01') to_date
from oRates r1
left join oRates r2 on (r1.rn = r2.rn+1)
)
update b
set dollars = hours * r.rate
from Budget b
join pRates r on (b.resource = r.resource
and b.period >= from_date
and b.period < to_date)

One possible solution is using a computed column instead of some sort of manual update.
An example of how this could be done can be seen here: formula for computed column based on different table's column
For a working example with your data, you'd create a function like this:
CREATE FUNCTION dbo.ufn_BudgetDollars (#resource NVARCHAR(255), #date DATE, #hours INT)
RETURNS DECIMAL(10, 2)
AS BEGIN
DECLARE #out DECIMAL(10, 2);
SELECT #out = #hours * rate
FROM (
SELECT rate, ROW_NUMBER() OVER (ORDER BY eff_date DESC) rn
FROM tblRate
WHERE eff_date <= #date
AND resource = #resource) T
WHERE RN = 1;
RETURN #out;
END
GO
When you've created your function, you would want to drop and recreate the Dollars column on the budget table...
ALTER TABLE tblBudget DROP COLUMN Dollars;
ALTER TABLE tblBudget ADD Dollars AS dbo.ufn_BudgetDollars(resource, Period, Hours);
GO

how to join 2 tables but have the same result count as table a

here is the dilemma i am having...
i have 2 tables
create table #orders
(orderNumber int,qty int,sku varchar(250),barcode varchar(250))
create table #allItemsInBox
([id] int,[date] date,[localsku] varchar(250),[box] varchar(250),barcode varchar(250))
i need to join the 2 tables on [barcode] and only have 1 result in the final table for every row in #allItemsInBox
please note [#allItemsInBox].[id] is unique the other fields in [#allItemsInBox] may not be
how would i go about doing something like this?
sample data:
[#orders]
(1,0,'10','10')
(1,0,'20','20')
(3,0,'20','20')
(4,0,'30','30')
(5,0,'40','40')
(6,0,'50','50')
#allItemsInBox
(1,'12/3/2014',10,'Box1',10)
(2,'12/2/2014',20,'Box2',20)
(3,'12/1/2014',20,'Box3',20)
(4,'11/30/2014',20,'Box4',20)
(5,'11/29/2014',30,'Box5',30)
(6,'11/28/2014',40,'Box6',40)
(7,'11/27/2014',60,'Box8',60)
(8,'11/27/2014',50,'Box10',50)
#output
(ordernumber int,uniqueitemID int,localsku varchar(250),box varchar(250))
(1,1,10,'Box1')
(1,2,20,'Box2')
(3,3,10,'Box3')
(4,5,30,'Box5')
(5,6,40,'Box6')
(6,8,50,'Box10')

This is quick but works. Depending on the size of your data this might be not the best way performance wise. But this will give you a start
DECLARE #orders TABLE (
orderNumber int,
qty int,
sku varchar(250),
barcode varchar(250)
)
DECLARE #allItemsInBox TABLE (
[id] int,
[date] date,
[localsku] varchar(250),
[box] varchar(250),
barcode varchar(250)
)
INSERT INTO #orders VALUES
(1,0,'10','10'),
(1,0,'20','20'),
(3,0,'20','20'),
(4,0,'30','30'),
(5,0,'40','40'),
(6,0,'50','50')
INSERT INTO #allItemsInBox VALUES
(1,'2014-12-03',10,'Box1',10),
(2,'2014-12-02',20,'Box2',20),
(3,'2014-12-01',20,'Box3',20),
(4,'2014-11-30',20,'Box4',20),
(5,'2014-11-29',30,'Box5',30),
(6,'2014-11-28',40,'Box6',40),
(7,'2014-11-27',60,'Box8',60),
(8,'2014-11-27',50,'Box10',50)
SELECT
orders.orderNumber AS ordernumber
,(SELECT TOP 1 allItems.id FROM #allItemsInBox allItems WHERE allItems.barcode = orders.barcode AND allItems.id >= orders.orderNumber ORDER BY allItems.id) AS uniqueitemID
,(SELECT TOP 1 allItems.localsku FROM #allItemsInBox allItems WHERE allItems.barcode = orders.barcode AND allItems.id >= orders.orderNumber ORDER BY allItems.id) AS localsku
,(SELECT TOP 1 allItems.box FROM #allItemsInBox allItems WHERE allItems.barcode = orders.barcode AND allItems.id >= orders.orderNumber ORDER BY allItems.id) AS box
FROM
#orders orders
Results in:
ordernumber uniqueitemID localsku box
1 1 10 Box1
1 2 20 Box2
3 3 20 Box3
4 5 30 Box5
5 6 40 Box6
6 8 50 Box10
edit: I updated the answer. You now have the same output as you specified in your example/question

Develop Reference

c reactjs sql-server angularjs arrays wpf database batch-file google-app-engine silverlight