T-SQL - Filling in the gaps in running balance - sql-server

I am working on a Data Warehouse project and the client provides daily sales data. On-hand quantities are provided in most lines but are often missing. I need help on how to fill those missing values based on prior OH and sales information.
Here's a sample data:
Line# Store Item OnHand SalesUnits DateKey
-----------------------------------------------
1 001 A 100 20 1
2 001 A 80 10 2
3 001 A null 30 3 --[OH updated with 70 (80-10)]
4 001 A null 5 4 --[OH updated with 40 (70-30)]
5 001 A 150 10 5 --[OH untouched]
6 001 B null 4 1 --[OH untouched - new item]
7 001 B 80 12 2
8 001 B null 10 3 --[OH updated with 68 (80-12]
Lines 1 and 2 are not to be updated because OnHand quantities exist.
Lines 3 and 4 are to be updated based on their preceding rows.
Line 5 is to be left untouched because OnHand is provided.
Line 6 is to be left untouched because it is the first row for Item B
Is there a way I can do this in a set operation? I know I can do it easily using a fast_forward cursor but it will take a long time (15M+ rows).
Thanks for your help!

Test data:
declare #t table(
Line# int, Store char(3), Item char, OnHand int, SalesUnits int, DateKey int
)
insert #t values
(1, '001', 'A', 100, 20, 1),
(2, '001', 'A', 80 , 10, 2),
(3, '001', 'A', null, 30, 3),
(4, '001', 'A', null, 5, 4),
(5, '001', 'A', 150, 10, 5),
(6, '001', 'B', null, 4, 1),
(7, '001', 'B', null, 4, 2),
(8, '001', 'B', 80, 12, 3),
(9, '001', 'B', null, 10, 4)
Script to populate not using cursor:
;with a as
(
select Line#, Store, Item, OnHand, SalesUnits, DateKey, 1 correctdata from #t where DateKey = 1
union all
select t.Line#, t.Store, t.Item, coalesce(t.OnHand, a.onhand - a.salesunits), t.SalesUnits, t.DateKey, t.OnHand from #t t
join a on a.DateKey = t.datekey - 1 and a.item = t.item and a.store = t.store
)
update t
set OnHand = a.onhand
from #t t join a on a.line# = t.line#
where a.correctdata is null
Script to populate using cursor:
declare #datekey int, #store int, #item char, #Onhand int,
#calculatedonhand int, #salesunits int, #laststore int, #lastitem char
DECLARE sales_cursor
CURSOR FOR
SELECT datekey+1, store, item, OnHand -SalesUnits, salesunits
FROM #t sales
order by store, item, datekey
OPEN sales_cursor;
FETCH NEXT FROM sales_cursor
INTO #datekey, #store, #item, #Onhand, #salesunits
WHILE ##FETCH_STATUS = 0
BEGIN
SELECT #calculatedonhand = case when #laststore = #store and #lastitem = #item
then coalesce(#onhand, #calculatedonhand - #salesunits) else null end
,#laststore = #store, #lastitem = #item
UPDATE s
SET onhand=#calculatedonhand
FROM #t s
WHERE datekey = #datekey and #store = store and #item = item
and onhand is null and #calculatedonhand is not null
FETCH NEXT FROM sales_cursor
INTO #datekey, #store, #item, #Onhand, #salesunits
END
CLOSE sales_cursor;
DEALLOCATE sales_cursor;
I recommand you use the cursor version, I doubt you can get a decent performance using the recursive query. I know people in here hate cursors, but when your table has that size, it can be the only solution.

Related

Search child rows for values

I have something like this:
Transaction Customer
1 Cust1
2 Cust2
3 Cust3
4 Cust4
TransID Code
2 A
2 B
2 D
3 A
4 B
4 C
If I want to be able to do something like "IF Customer 'Cust1' Has code 'A'", how should I best build a view? I want to end up being able to query something like "Select Customer from View where Code in [Some list of codes]" OR "Select Cust1 from View Having Codes in [Some list of codes]"
While I can do something like
Customer | Codes
Cust1 | A, B, D
Etc.
SELECT Transaction from Tbl where Codes like 'A'
This seems to me to be an impractical way to do it.
Here's how I'd do it
;with xact_cust (xact, cust) as
(
select 1, 'cust1' union all
select 2, 'cust2' union all
select 3, 'cust3' union all
select 4, 'cust4'
), xact_code (xact, code) as
(
select 2, 'A' union all
select 2, 'B' union all
select 2, 'D' union all
select 3, 'A' union all
select 4, 'B' union all
select 4, 'C'
)
select Cust, Code
from xact_cust cust
inner join xact_code code
on cust.xact = code.xact
where exists (select 1
from xact_code i
where i.xact = code.xact
and i.code = 'A')
If you NEED the codes serialized into a delimited list, take a look at this article: What this query does to create comma delimited list SQL Server?
Here's another option...
IF OBJECT_ID('tempdb..#CustomerTransaction', 'U') IS NOT NULL
DROP TABLE #CustomerTransaction;
CREATE TABLE #CustomerTransaction (
TransactionID INT NOT NULL PRIMARY KEY,
Customer CHAR(5) NOT NULL
);
INSERT #CustomerTransaction (TransactionID, Customer) VALUES
(1, 'Cust1'), (2, 'Cust2'), (3, 'Cust3'),
(4, 'Cust4'), (5, 'Cust5');
IF OBJECT_ID('tempdb..#TransactionCode', 'U') IS NOT NULL
DROP TABLE #TransactionCode;
CREATE TABLE #TransactionCode (
TransactionID INT NOT NULL,
Code CHAR(1) NOT NULL
);
INSERT #TransactionCode (TransactionID, Code) VALUES
(2, 'A'), (2, 'B'), (2, 'D'), (3, 'A'), (4, 'B'), (4, 'C');
--SELECT * FROM #CustomerTransaction ct;
--SELECT * FROM #TransactionCode tc;
--=============================================================
SELECT
ct.TransactionID,
ct.Customer,
CodeList = STUFF(tcx.CodeList, 1, 1, '')
FROM
#CustomerTransaction ct
CROSS APPLY (
SELECT
', ' + tc.Code
FROM
#TransactionCode tc
WHERE
ct.TransactionID = tc.TransactionID
ORDER BY
tc.Code ASC
FOR XML PATH('')
) tcx (CodeList);
Results...
TransactionID Customer CodeList
------------- -------- -----------
1 Cust1 NULL
2 Cust2 A, B, D
3 Cust3 A
4 Cust4 B, C
5 Cust5 NULL

How to write a case when statement when there are overlaps in T-SQL

I have a table like this
How can I group it to this
Small is the sum of the count when Count <25; Large is the sum of the count when Count>=25; Total is the sum of all counts.
Try it like this...
IF OBJECT_ID('tempdb..#TestData', 'U') IS NOT NULL
DROP TABLE #TestData;
CREATE TABLE #TestData (
ID INT NOT NULL PRIMARY KEY,
nCount int NOT NULL
);
INSERT #TestData (ID, nCount) VALUES
(1, 10), (2, 15), (3, 22), (4, 23),
(5, 25), (6, 27), (7, 30);
--=====================================
WITH
cte_Totals AS (
SELECT
Total = SUM(td.nCount),
Small = SUM(CASE WHEN td.nCount < 25 THEN td.nCount ELSE 0 END),
Large = SUM(CASE WHEN td.nCount >= 25 THEN td.nCount ELSE 0 END)
FROM
#TestData td
)
SELECT
x.[Group],
x.[Count]
FROM
cte_Totals t
CROSS APPLY (VALUES (1, 'Total', t.Total), (2, 'Small', t.Small), (3, 'Large', t.Large) ) x (SortBy, [Group],[Count])
ORDER BY
x.SortBy;
Results...
Group Count
----- -----------
Total 152
Small 70
Large 82
HTH,
Jason
The simplest way is to use CASE:
SELECT
SUM(Count) as Total,
SUM(CASE WHEN Count < 25 THEN Count ELSE 0 END) as Small,
SUM(CASE WHEN Count >= 25 THEN Count ELSE 0 END) as Large
FROM table
Late answer (keep the accepted as is), but I did want to introduce a concept which may be more helpful down the line.
I maintain a generic Tier Table. The following is a simplified example, but you can take the aggregation tiers out of the code, and put it in a table... things change, and you can serve multiple masters.
Sample Data
Declare #YourTable table (ID int,[Count] int)
Insert Into #YourTable values
(1, 10), (2, 15), (3, 22), (4, 23), (5, 25), (6, 27), (7, 30)
Declare #Tier table (Tier varchar(50),Seq int,Title varchar(50),R1 int,R2 int)
Insert Into #Tier values
('MyGroup',1,'Total',0,99999)
,('MyGroup',2,'Small',0,25)
,('MyGroup',3,'Large',25,99999)
The Actual Query
Select T.Title
,[Count] = sum(D.[Count])
From #Tier T
Join #YourTable D on (T.Tier='MyGroup' and D.Count >= T.R1 and D.Count<T.R2)
Group By T.Title,T.Seq
Order By T.Seq
Returns
Title Count
Total 152
Small 70
Large 82
EDIT - There are many ways you can construct this
Example
Declare #YourTable table (ID varchar(50),[Count] int)
Insert Into #YourTable values
('Tywin', 10), ('Tywin', 15), ('Tyrion', 22), ('Bran', 23), ('Ned', 25), ('John', 27), ('Robb', 30)
Declare #Tier table (Tier varchar(50),Seq int,Title varchar(50),R1 int,R2 int,C1 varchar(50),C2 varchar(50))
Insert Into #Tier values
('MyGroup',1,'Total' ,null,null,'a','z')
,('MyGroup',2,'Group 1',null,null,'Tywin,Tyrion',null)
,('MyGroup',3,'Group 2',null,null,'Bran,Ned,John,Robb',null)
Select T.Title
,[Count] = sum(D.[Count])
From #Tier T
Join #YourTable D on T.Tier='MyGroup' and (D.ID between C1 and C2 or patindex('%,'+D.ID+',%',','+C1+',')>0)
Group By T.Title,T.Seq
Order By T.Seq
Returns
Title Count
Total 152
Group 1 47
Group 2 105

In T-SQL is there a built-in command to determine if a number is in a range from another table

This is not a homework question.
I'm trying to take the count of t-shirts in an order and see which price range the shirts fall into, depending on how many have been ordered.
My initial thought (I am brand new at this) was to ask another table if count > 1st price range's maximum, and if so, keep looking until it's not.
printing_range_max printing_price_by_range
15 4
24 3
33 2
So for example here, if the order count is 30 shirts they would be $2 each.
When I'm looking into how to do that, it looks like most people are using BETWEEN or IF and hard-coding the ranges instead of looking in another table. I imagine in a business setting it's best to be able to leave the range in its own table so it can be changed more easily. Is there a good/built-in way to do this or should I just write it in with a BETWEEN command or IF statements?
EDIT:
SQL Server 2014
Let's say we have this table:
DECLARE #priceRanges TABLE(printing_range_max tinyint, printing_price_by_range tinyint);
INSERT #priceRanges VALUES (15, 4), (24, 3), (33, 2);
You can create a table with ranges that represent the correct price. Below is how you would do this in pre-2012 and post-2012 systems:
DECLARE #priceRanges TABLE(printing_range_max tinyint, printing_price_by_range tinyint);
INSERT #priceRanges VALUES (15, 4), (24, 3), (33, 2);
-- post-2012 using LAG
WITH pricerange AS
(
SELECT
printing_range_min = LAG(printing_range_max, 1, 0) OVER (ORDER BY printing_range_max),
printing_range_max,
printing_price_by_range
FROM #priceRanges
)
SELECT * FROM pricerange;
-- pre-2012 using ROW_NUMBER and a self-join
WITH prices AS
(
SELECT
rn = ROW_NUMBER() OVER (ORDER BY printing_range_max),
printing_range_max,
printing_price_by_range
FROM #priceRanges
),
pricerange As
(
SELECT
printing_range_min = ISNULL(p2.printing_range_max, 0),
printing_range_max = p1.printing_range_max,
p1.printing_price_by_range
FROM prices p1
LEFT JOIN prices p2 ON p1.rn = p2.rn+1
)
SELECT * FROM pricerange;
Both queries return:
printing_range_min printing_range_max printing_price_by_range
------------------ ------------------ -----------------------
0 15 4
15 24 3
24 33 2
Now that you have that you can use BETWEEN for your join. Here's the full solution:
-- Sample data
DECLARE #priceRanges TABLE
(
printing_range_max tinyint,
printing_price_by_range tinyint
-- if you're on 2014+
,INDEX ix_xxx NONCLUSTERED(printing_range_max, printing_price_by_range)
-- note: second column should be an INCLUDE but not supported in table variables
);
DECLARE #orders TABLE
(
orderid int identity,
ordercount int
-- if you're on 2014+
,INDEX ix_xxy NONCLUSTERED(orderid, ordercount)
-- note: second column should be an INCLUDE but not supported in table variables
);
INSERT #priceRanges VALUES (15, 4), (24, 3), (33, 2);
INSERT #orders(ordercount) VALUES (10), (20), (25), (30);
-- Solution:
WITH pricerange AS
(
SELECT
printing_range_min = LAG(printing_range_max, 1, 0) OVER (ORDER BY printing_range_max),
printing_range_max,
printing_price_by_range
FROM #priceRanges
)
SELECT
o.orderid,
o.ordercount,
--p.printing_range_min,
--p.printing_range_max
p.printing_price_by_range
FROM pricerange p
JOIN #orders o ON o.ordercount BETWEEN printing_range_min AND printing_range_max
Results:
orderid ordercount printing_price_by_range
----------- ----------- -----------------------
1 10 4
2 20 3
3 25 2
4 30 2
Now that we have that we can

How do you validate that range doesn't overlap in a list of data?

I have a list of data :
Id StartAge EndAge Amount
1 0 2 50
2 2 5 100
3 5 10 150
4 6 9 160
I have to set Amount for various age group.
The age group >0 and <=2 need to pay 50
The age group >2 and <=5 need to pay 100
The age group >5 and <=10 need to pay 150
But
The age group >6 and <=9 need to pay 160 is an invalid input because >6 and <=9 already exist on 150 amount range.
I have to validate such kind of invalid input before inserting my data as a bulk.Once 5-10 range gets inserted anything that is within this range should not be accepted by system. For example: In above list, user should be allowed to insert 10-15 age group but any of the following should be checked as invalid.
6-9
6-11
3-5
5-7
If Invalid Input exists on my list I don't need to insert the list.
You could try to insert your data to the temporary table first.
DECLARE #TempData TABLE
(
[Id] TINYINT
,[StartAge] TINYINT
,[EndAge] TINYINT
,[Amount] TINYINT
);
INSERT INTO #TempData ([Id], [StartAge], [EndAge], [Amount])
VALUES (1, 0, 2, 50)
,(2, 2, 5, 100)
,(3, 5, 10, 150)
,(4, 6, 9, 160);
Then, this data will be transferred to your target table using INSERT INTO... SELECT... statement.
INSERT INTO <your target table>
SELECT * FROM #TempData s
WHERE
NOT EXISTS (
SELECT 1
FROM #TempData t
WHERE
t.[Id] < s.[Id]
AND s.[StartAge] < t.[EndAge]
AND s.[EndAge] > t.[StartAge]
);
I've created a demo here
We can use recursive CTE to find how records are chained by end age and start age pairs:
DECLARE #DataSource TABLE
(
[Id] TINYINT
,[StartAge] TINYINT
,[EndAge] TINYINT
,[Amount] TINYINT
);
INSERT INTO #DataSource ([Id], [StartAge], [EndAge], [Amount])
VALUES (1, 0, 2, 50)
,(2, 2, 5, 100)
,(3, 5, 10, 150)
,(4, 6, 9, 160)
,(5, 6, 11, 160)
,(6, 3, 5, 160)
,(7, 5, 7, 160)
,(9, 10, 15, 20)
,(8, 7, 15, 20);
WITH PreDataSource AS (
SELECT *, ROW_NUMBER() OVER (PARTITION BY [StartAge] ORDER BY [id]) as [pos]
FROM #DataSource
), DataSource AS
(
SELECT [Id], [StartAge], [EndAge], [Amount], [pos]
FROM PreDataSource
WHERE [id] = 1
UNION ALL
SELECT R.[Id], R.[StartAge], R.[EndAge], R.[Amount], R.[pos]
FROM DataSource A
INNER JOIN PreDataSource R
ON A.[Id] < R.[Id]
AND A.[EndAge] = R.[StartAge]
AND R.[pos] =1
)
SELECT [Id], [StartAge], [EndAge], [Amount]
FROM DataSource;
This is giving us, the following output:
Note, that before this, we are using the following statement to prepare the data:
SELECT *, ROW_NUMBER() OVER (PARTITION BY [StartAge] ORDER BY [id]) as [pos]
FROM #DataSource;
The idea is to find records with same start age and to calculated which one is inserted first. Then, in the CTE we are getting only the first.
Assuming you are bulk inserting the mentioned data into a temp table(#tmp) or table variable (#tmp).
If you are working on sql server 2012 try the below.
select *
from(select *,lag(endage,1,0)over(order by endage) as [col1]
from #tmp)tmp
where startage>=col1 and endage>col1
The result of this query should be inserted into your main table.

Creating blocks within a CTE - SQL Server

I am trying to work out how I can tag unique (what i am calling) blocks (or segments if you will) which have a start and end based consecutive 'Trip' rows ordered by 'epoch' sharing the same 'code'. In this case group by 'trip', 'code' will not work as I need to measure the duration of the 'code' remains constant for the trip. I've tried to use a CTE but I have been unable to partition the data in such a way that it gives desired result shown below. The block number I've shown could be any value, just so long as it is unique so that it tags the consecutive occurrences of the same 'code' on the trip in order of 'epoch'.
Any ideas?
declare #data table (id int, trip int, code int NULL, epoch int, value1 int, value2 int);
insert into #data (id, trip, code, epoch, value1, value2)
values
(1, 1, null, 31631613, 0, 0),
(2, 2, 1, 31631614, 10, 40),
(3, 1, 1, 31631616, 10, 60),
(4, 1, 1, 31631617, 40, 60),
(5, 2, 1, 31631617, 23, 40),
(6, 2, 2, 31631620, 27, 40),
(7, 2, 2, 31631629, 23, 40),
(9, 1, 1, 31631618, 39, 60),
(10, 1, null, 31631621, 38, 60),
(12, 1, null, 31631625, 37, 60),
(15, 1, null, 31631627, 35, 60),
(19, 1, 1, 31631630, 39, 60),
(20, 1, 1, 31631632, 40, 60),
(21, 2, 1, 31631629, 23, 40);
block id trip code epoch value1 value2
1 1 1 NULL 31631613 0 0
2 2 2 1 31631614 10 40
2 5 2 1 31631617 23 40
3 3 1 1 31631616 10 60
3 4 1 1 31631617 40 60
3 9 1 1 31631618 39 60
4 6 2 2 31631620 27 40
4 7 2 2 31631629 23 40
5 10 1 NULL 31631621 38 60
5 12 1 NULL 31631625 37 60
5 15 1 NULL 31631627 35 60
6 19 1 1 31631630 39 60
6 20 1 1 31631632 40 60
7 21 2 1 31631629 23 40
You didn't update your expected output so I'm still not 100% sure this is what you want, but give it a try...
SELECT
DENSE_RANK() OVER (ORDER BY trip, code),
*
FROM
#data
ORDER BY
trip, code, epoch
Ok, it's far from perfect by any means but it is a starter that at least identifies the start and end of a contiguous block where the 'code' has remained the same for the trip. For the sake of at least contributing something I'll post what I jerried up. If I ever get time to do a proper job I'll post it.
declare #minint int; set #minint = -2147483648;
declare #maxint int; set #maxint = 2147483647;
declare #id_data table (pk int IDENTITY(1,1), id int, trip int, code int NULL, epoch int, value1 int, value2 int);
insert into #id_data VALUES(#minint, #minint, #minint, #minint, #minint, #minint);
insert into #id_data
SELECT id, trip, coalesce(code,0), epoch, value1, value2
FROM #data
order by trip, epoch, code;
insert into #id_data VALUES(#maxint, #maxint, #maxint, #maxint, #maxint, #maxint);
WITH CTE as
(
SELECT pk, id, trip, code, epoch, value1, value2, ROW_NUMBER() OVER (PARTITION BY trip ORDER BY epoch) as row_num
FROM #id_data
)
SELECT B.*, A.code, C.min_next_code
FROM CTE A
INNER JOIN CTE B ON (B.pk = A.pk + 1) AND (A.code != B.code) -- SELECTS THE RECORDS THAT START A NEW GROUP
OUTER APPLY (
SELECT min_next_code = MIN(pk) - 1 -- LOCATION OF NEXT GROUP
FROM CTE
WHERE pk > B.pk AND (trip = B.trip) AND (code != B.code)
) C
WHERE B.id < #maxint

Resources