SQL Server Recursive query to reverse engineer - sql-server

Hi I need to write a query to back populate the CurrentQuantity field in the below table. I was originally thinking to use the while loop but I am wondering whether there is a way to write a recursive query to achieve this.
Bascially, the currentQuantity is next available currentQuantity + QuantityChanged of current record. So in this scenario, currentQuantity for ID -11 is CurrentQuantity of ID 20 which is -45 + QuantityChanged of current record which is (-5 * -1) and therefore it becomes -40.
ID column is always incremental so we can make an assumption on this.
I have been trying to write the recursive query using CTE but not sure where to start really... Any help will be highly appreciated !

Not so great because your data is not really related. Managed to do it though - example code:
create table #data (id int, currentQuantity int, quantityChanged int);
insert into #data values (-30, NULL, -10);
insert into #data values (-24, NULL, -10);
insert into #data values (-22, NULL, -10);
insert into #data values (-11, NULL, -5);
insert into #data values (20, -45, -5);
select * from #data;
;with rcte as (
select id, currentquantity, quantityChanged, 0 as Level
from #data where id = 20
union all
select d.id, r.currentquantity + (d.quantityChanged * -1),
d.quantityChanged, level + 1
from #data d inner join rcte r on d.id < r.id
)
update #data
set currentQuantity = r.currentQuantity
from #data d
inner join rcte r
on d.id = r.id
inner join (
select id, row_number() over (order by id desc) - 1 level
from #data
) idl
on r.id = idl.id
and r.level = idl.level;
select * from #data;
That should produce what you are looking for.
NB: If you are using SQL Server > 2008 you could use the lead/lag functions to do this as well.

Related

How to rank one data set using another in SQL?

I have a table with float values. I want to use a subset (A) of those values to create ranks using PERCENT_RANK(). Then I want to assign ranks to a second (non-intersecting) subset (B) of values in the table based on the ranks derived from the first subset (A). Simply joining on values from (B) to values in (A) won't work since the values from subset (B) in general won't equal values in subset (A). In that case, I'm fine using either a "closest value" approach or a "linear interpolation" approach to get the ranks. My preference is for speed and simplicity since I'm dealing with hundreds of thousands of rows.
Here is a concrete example (assume subset A is where Flag = 0 and subset B is where Flag = 1):
DECLARE #Data TABLE
(
Value FLOAT,
Flag BIT
)
INSERT INTO #Data SELECT 0.081, 0
INSERT INTO #Data SELECT 0.831, 0
INSERT INTO #Data SELECT 0.798, 0
INSERT INTO #Data SELECT 0.722, 0
INSERT INTO #Data SELECT 0.322, 0
INSERT INTO #Data SELECT 0.186, 0
INSERT INTO #Data SELECT 0.494, 0
INSERT INTO #Data SELECT 0.757, 0
INSERT INTO #Data SELECT 0.996, 0
INSERT INTO #Data SELECT 0.146, 0
INSERT INTO #Data SELECT 0.514, 1
INSERT INTO #Data SELECT 0.787, 1
INSERT INTO #Data SELECT 0.125, 1
INSERT INTO #Data SELECT 0.324, 1
INSERT INTO #Data SELECT 0.86, 1
--Subset A
SELECT *,
Rnk = PERCENT_RANK() OVER (ORDER BY Value)
FROM #Data
WHERE Flag = 0
--Subset B
SELECT *,
Rnk = ?--Ranking based on ranks derived from subset A
FROM #Data
WHERE Flag = 1
Hmmm . . . This is one way:
with a as (
select d.*
PERCENT_RANK() OVER (ORDER BY Value) as rnk
from #Data d
where Flag = 0
)
select b.*, a.rnk
from #Data b outer join
(select top 1 *
from a
where a.value <= b.value
order by a.value desc
) a
where Flag = 1;

SQL Server: How do I get the highest value not set of an int column?

Let's take an example. These are the rows of the table I want get the data:
The column I'm talking about is the reference one. The user can set this value on the web form, but the system I'm developing must suggest the lowest reference value still not used.
As you can see, the smallest value of this column is 35. I could just take the smaller reference and sum 1, but, in that case, the value 36 is already used. So, the value I want is 37.
Is there a way to do this without a loop verification? This table will grow so much.
This is for 2012+
DECLARE #Tbl TABLE (id int, reference int)
INSERT INTO #Tbl
( id, reference )
VALUES
(1, 49),
(2, 125),
(3, 35),
(4, 1345),
(5, 36),
(6, 37)
SELECT
MIN(A.reference) + 1 Result
FROM
(
SELECT
*,
LEAD(reference) OVER (ORDER BY reference) Tmp
FROM
#Tbl
) A
WHERE
A.reference - A.Tmp != -1
Result: 37
Here is yet another place where the tally table is going to prove invaluable. In fact it is so useful I keep a view on my system that looks like this.
create View [dbo].[cteTally] as
WITH
E1(N) AS (select 1 from (values (1),(1),(1),(1),(1),(1),(1),(1),(1),(1))dt(n)),
E2(N) AS (SELECT 1 FROM E1 a cross join E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a cross join E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS
(
SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
)
select N from cteTally
Next of course we need some sample data and table to hold it.
create table #Something
(
id int identity
, reference int
, description varchar(10)
)
insert #Something (reference, description)
values (49, 'data1')
, (125, 'data2')
, (35, 'data3')
, (1345, 'data4')
, (36, 'data5')
, (7784, 'data6')
Now comes the magic of the tally table.
select top 1 t.N
from cteTally t
left join #Something s on t.N = s.reference
where t.N >= (select MIN(reference) from #Something)
and s.id is null
order by t.N
This is ugly, but should get the job done:
select
top 1 reference+1
from
[table]
where
reference+1 not in (select reference from [table])
order by reference
I used a table valued express to get the next value. I first left outer joined the table to itself (shifting the key in the join by +1). I then looked only at rows that had no corresponding match (b.ID is null). The minimum a.ReferenceID + 1 gives us the answer we are looking for.
create table MyTable
(
ID int identity,
Reference int,
Description varchar(20)
)
insert into MyTable values (10,'Data')
insert into MyTable values (11,'Data')
insert into MyTable values (12,'Data')
insert into MyTable values (15,'Data')
-- Find gap
;with Gaps as
(
select a.Reference+1 as 'GapID'
from MyTable a
left join MyTable b on a.Reference = b.Reference-1
where b.ID is null
)
select min(GapID) as 'NewReference'
from Gaps
NewReference
------------
13
I hope the code was clearer than my description.
CREATE TABLE #T(ID INT , REFERENCE INT, [DESCRIPTION] VARCHAR(50))
INSERT INTO #T
SELECT 1,49 , 'data1' UNION ALL
SELECT 2,125 , 'data2' UNION ALL
SELECT 3,35 , 'data3' UNION ALL
SELECT 4,1345, 'data4' UNION ALL
SELECT 5,36 , 'data5' UNION ALL
SELECT 6,7784, 'data6'
SELECT TOP 1 REFERENCE + 1
FROM #T T1
WHERE
NOT EXISTS
(
SELECT 1 FROM #T T2 WHERE T2.REFERENCE = T1.REFERENCE + 1
)
ORDER BY T1.REFERENCE
--- OR
SELECT MIN(REFERENCE) + 1
FROM #T T1
WHERE
NOT EXISTS
(
SELECT 1 FROM #T T2 WHERE T2.REFERENCE = T1.REFERENCE + 1
)
How about using a Tally table. The following illustrates the concept. It would be better to use a persisted numbers table as opposed to the cte however the code below illustrates the concept.
For further reading as to why you should use a persisted table, check out the following link: sql-auxiliary-table-of-numbers
DECLARE #START int = 1, #END int = 1000
CREATE TABLE #TEST(UsedValues INT)
INSERT INTO #TEST(UsedValues) VALUES
(1),(3),(5),(7),(9),(11),(13),(15),(17)
;With NumberSequence( Number ) as
(
Select #start as Number
union all
Select Number + 1
from NumberSequence
where Number < #end
)
SELECT MIN(Number)
FROM NumberSequence n
LEFT JOIN #TEST t
ON n.Number = t.UsedValues
WHERE UsedValues IS NULL
OPTION ( MAXRECURSION 1000 )
You could try using a descending order:
SELECT DISTINCT reference
FROM `Resultsados`
ORDER BY `reference` ASC;
As far as I know, there is no way to do this without a loop. To prevent multiple values from returning be sure to use DISTINCT.

Column based intersect on two tables

I'm trying to do something similar to a column based intersect on two tables.
The tables are:
LogTag: a log can have zero or more tags
MatchingRule: a matching rule consists of one or more tags that define the rule
A log can have zero or more rules matched to it. I will be passing in a MatchingRuleID and expecting to return all logs that match that rule.
Expected Result: A result set of matching LogIDs. Eg. passing in MatchingRuleID = 30 should return LogID 101. MatchingRuleID = 31 should return LogID 101 & 100.
Also, the LogTag table could have millions of rows so an efficient query is preferred.
The question: How to find all LogIDs that match with a specified rule definition?
Schema:
CREATE TABLE dbo.Tag
(
TagID INT,
TagName NVARCHAR(50)
)
INSERT INTO dbo.Tag (TagID, TagName)
VALUES (1, 'tag1'), (2, 'tag2'), (3, 'tag3')
CREATE TABLE dbo.LogTag
(
LogID INT,
TagID INT
)
INSERT INTO dbo.LogTag (LogID, TagID)
VALUES (100, 1), (101, 1), (101, 2), (101, 3), (101, 4), (102, 2), (102, 3)
CREATE TABLE dbo.MatchingRule
(
MatchingRuleID INT,
TagID INT
)
INSERT INTO dbo.MatchingRule (MatchingRuleID, TagID)
VALUES (30, 1), (30, 2), (30, 3), (31, 1)
Important to have the proper clustered index on the tables. I've put an alternative index in comments for #log_tag which might improve performance for large sets. Since I do not have the proper sample to test on, you will have to verify which is best.
CREATE TABLE #tag(tag_id INT PRIMARY KEY,tag_name NVARCHAR(50));
INSERT INTO #tag (tag_id,tag_name)VALUES
(1,'tag1'),(2,'tag2'),(3,'tag3');
-- Try this key for large sets: PRIMARY KEY(tag_id,log_id));
CREATE TABLE #log_tag(log_id INT,tag_id INT,PRIMARY KEY(log_id,tag_id))
INSERT INTO #log_tag (log_id,tag_id)VALUES
(100,1),(101,1),(101,2),(101,3),(101,4),(102,2),(102,3);
CREATE TABLE #matching_rule(matching_rule_id INT,tag_id INT,PRIMARY KEY(matching_rule_id,tag_id));
INSERT INTO #matching_rule(matching_rule_id,tag_id)VALUES
(30,1),(30,2),(30,3),(31,1);
DECLARE #matching_rule_id INT=31;
;WITH required_tags AS (
SELECT tag_id
FROM #matching_rule
WHERE matching_rule_id=#matching_rule_id
)
SELECT lt.log_id
FROM required_tags AS rt
INNER JOIN #log_tag AS lt ON
lt.tag_id=rt.tag_id
GROUP BY lt.log_id
HAVING COUNT(*)=(SELECT COUNT(*) FROM required_tags);
DROP TABLE #log_tag;
DROP TABLE #matching_rule;
DROP TABLE #tag;
The results are the ones in your Expected Result for both 30 & 31.
Execution plan for the index used in the script:
Try this query
Fiddle Here
DECLARE #InputMatchingRuleId INT = 30
;WITH CTE1
AS
(
SELECT DENSE_RANK() OVER(ORDER BY LT.TAGID) AS RN,LT.TagID,LT.LOGID
FROM MatchingRule MR INNER JOIN LogTag LT ON LT.TagID = MR.TagID
WHERE MatchingRuleID=#InputMatchingRuleId
),
CTE2
AS
(
SELECT 1 AS RN2,LOGID FROM CTE1 C1 WHERE C1.RN=1
UNION ALL
SELECT RN2+1 as RN2,C2.LOGID
FROM CTE1 C1 INNER JOIN CTE2 C2 ON C1.RN = C2.RN2+1 AND C1.LOGID = C2.LOGID
)
SELECT DISTINCT LOGID FROM CTE2
WHERE RN2>(CASE WHEN (SELECT MAX(RN2) FROM CTE2)=1 THEN 0 ELSE 1 END)
NOTE: This will only work with SQL Server 2008+
Here's the query I came up with:
DECLARE #RuleID INT
SELECT #RuleID = 30
SELECT LogID
FROM LogTag lt
INNER JOIN (
SELECT TagID, MatchingRuleID, COUNT(*) OVER (PARTITION BY MatchingRuleID) TagCount
FROM MatchingRule
) mr
ON lt.TagID = mr.TagID
AND mr.MatchingRuleID = #RuleID
GROUP BY LogID, TagCount
HAVING COUNT(*) = TagCount
So basically I match all TagID's within the specified matching rule and then once I know that all tags match I check to see if the count of tags from the MatchingRule table matches the (now filtered and grouped) count of tags from the LogTag table.
should be
; with rules as
(
select TagID, cnt = sum(count(*)) over()
from dbo.MatchingRule
where MatchingRuleID = #MatchingRuleID
group by TagID
)
select LogID
from rules r
inner join LogTag lt on r.TagID = lt.TagID
group by LogID, cnt
having count(*) = r.cnt
select l.LogID
from dbo.MatchingRule r
inner join dbo.LogTag l on l.TagID = r.TagID
where r.MatchingRuleID = 31
another approach is to identify all tags and then:
select l.LogID
from dbo.LogTag l
where exists(select 1 from #Tags t where t.TagID = l.TagID)

TSql equality on groups of rows

I have a table that contains information on groups. There can be any number of members in a group. There is a group identifier and then an element identifier. I want to be able to in a single statement determine whether or not a given set exists in the table
#groupTable is an example of the data that already exists in the database
#inputData is the data that I want to see if it already exists in #groupTable
declare #groupData table
(
groupIdentifier int,
elementIdentifier uniqueidentifier
)
insert into #groupData values
(1, 'dfce40b1-3719-4e4c-acfa-65f728677700'),
(1, '89e7e6be-cee8-40a7-8135-a54659e0d88c')
declare #inputData table
(
tempGroupIdentifier int,
elementIdentifier uniqueidentifier
)
insert into #inputData values
(42, 'dfce40b1-3719-4e4c-acfa-65f728677700'),
(42, '89e7e6be-cee8-40a7-8135-a54659e0d88c'),
(55, 'dfce40b1-3719-4e4c-acfa-65f728677700'),
(55, '2395a42c-94f4-4cda-a773-221b26ea5e44'),
(55, 'f22db9df-a1f4-4078-b74c-90e34376eff6')
Now I want to run a query that will show the relationship of the sets, showing which groupIdentifier is associated with which tempGroupIdentifier. If there is no matching set then I need to know that too.
desired output:
groupIdentifier, tempGroupIdentifier
1, 42
null, 55
Does anyone any suggestions on how to approach this problem?
I could probably pivot the rows and concat all elementIdentifiers into a giant string for each group that then do equality on, but that doesn't seem like a good solution.
SELECT DISTINCT
T1.tempgroupIdentifier, T2.GroupIdentifier
FROM
(
SELECT
COUNT(*) OVER (PARTITION BY tempgroupIdentifier) AS GroupCount,
ROW_NUMBER() OVER (PARTITION BY tempgroupIdentifier ORDER BY elementIdentifier) AS GroupRN,
tempgroupIdentifier, elementIdentifier
FROM
#inputData
) T1
LEFT JOIN
(
SELECT
COUNT(*) OVER (PARTITION BY GroupIdentifier) AS GroupCount,
ROW_NUMBER() OVER (PARTITION BY GroupIdentifier ORDER BY elementIdentifier) AS GroupRN,
GroupIdentifier, elementIdentifier
FROM
#groupData
) T2 ON T1.elementIdentifier = T2.elementIdentifier AND
T1.GroupCount = T2.GroupCount AND
T1.GroupRN = T2.GroupRN
Edit: this will also deal with the same value in a given set
SELECT
(
CASE WHEN matchCount = gdCount AND matchCount = idCount
THEN groupIdentifier
ELSE NULL
END) groupIdentifier,
cj.tempGroupIdentifier
FROM
(
SELECT gd.groupIdentifier, id.tempGroupIdentifier, COUNT(1) matchCount
FROM #groupData gd
CROSS JOIN #inputData id
WHERE id.elementIdentifier = gd.elementIdentifier
GROUP BY gd.groupIdentifier, id.tempGroupIdentifier) as cj
CROSS APPLY (SELECT COUNT(groupIdentifier) from #groupData gdca WHERE gdca.groupIdentifier = cj.groupIdentifier) as gdc(gdCount)
CROSS APPLY (SELECT COUNT(tempGroupIdentifier) from #inputData idca WHERE idca.tempGroupIdentifier = cj.tempGroupIdentifier) as idc(idCount)

How to join sequential numbers to unrelated data (SQL Server)

This question is a followup to a previous question I had about discovering unused sequential number ranges without having to resort to cursors (Working with sequential numbers in SQL Server 2005 without cursors). I'm using SQL Server 2005.
What I need to do with those numbers is to assign those numbers to records in a table. I just can't seem to come up with a way to actually relate the numbers table with the records that need those numbers.
One possible solution that came to mind was insert the records in a temp table using an identity and using the beginning of the number range as an identity seed. The only problem with this approach is that if there are gaps in the number sequence then I'll end up with duplicate control numbers.
This is how my tables look like (overly simplified):
Numbers table:
Number
-------
102314
102315
102319
102320
102324
102329
Data table:
CustomerId PaymentAmt ControlNumber
---------- ---------- -------------
1001 4502.01 NULL
1002 890.00 NULL
9830 902923.34 NULL
I need a way to make it so i end up with:
CustomerId PaymentAmt ControlNumber
---------- ---------- -------------
1001 4502.01 102314
1002 890.00 102315
9830 902923.34 102319
Is this possible without having to use cursors? The reason I'm avoiding cursors is because our current implementation uses cursors and since its so slow (8 minutes over 12,000 records) I was looking for alternatives.
Note: Thanks to all who posted answers. All of them were great, I had to pick the one that seemed easier to implement and easiest to maintain for whomever comes after me. Much appreciated.
Try this:
;WITH CTE AS
(
SELECT *, ROW_NUMBER() OVER(ORDER BY CustomerId) Corr
FROM DataTable
)
UPDATE CTE
SET CTE.ControlNumber = B.Number
FROM CTE
JOIN ( SELECT Number, ROW_NUMBER() OVER(ORDER BY Number) Corr
FROM NumberTable) B
ON CTE.Corr = B.Corr
Buidling on Martin's code from the linked question, you could give all rows without control number a row number. Then give all unused numbers a row number. Join the two sets together, and you get a unique number per row:
DECLARE #StartRange int, #EndRange int
SET #StartRange = 790123401
SET #EndRange = 790123450;
; WITH YourTable(ControlNumber, CustomerId) AS
(
SELECT 790123401, 1000
UNION ALL SELECT 790123402, 1001
UNION ALL SELECT 790123403, 1002
UNION ALL SELECT 790123406, 1003
UNION ALL SELECT NULL, 1004
UNION ALL SELECT NULL, 1005
UNION ALL SELECT NULL, 1006
)
, YourTableNumbered(rn, ControlNumber, CustomerId) AS
(
select row_number() over (
partition by IsNull(ControlNumber, -1)
order by ControlNumber)
, *
from YourTable
)
, Nums(N) AS
(
SELECT #StartRange
UNION ALL
SELECT N+1
FROM Nums
WHERE N < #EndRange
)
, UnusedNums(rn, N) as
(
select row_number() over (order by Nums.N)
, Nums.N
from Nums
where not exists
(
select *
from YourTable yt
where yt.ControlNumber = Nums.N
)
)
select ytn.CustomerId
, IsNull(ytn.ControlNumber, un.N)
from YourTableNumbered ytn
left join
UnusedNums un
on un.rn = ytn.rn
OPTION (MAXRECURSION 0)
All you need is a deterministic order in data table. If you have that, you can use ROW_NUMBER() as a join condition:
with cte as (
select row_number() over (order by CustomerId) as [row_number],
ControlNumber
from [Data Table]
where ControlNumber is null),
nte as (
select row_number() over (order by Number) as [row_number],
Number
from [Numbers])
update cte
set ControlNumber = Number
from cte
join nte on nte.[row_number] = cte.[row_number];
If you need it to be concurency proof, it does get more complex.
EDITED added in code to remove used values from #Number, via the OUTPUT caluse of the UPDATE and a DELETE
try using ROW_NUMBER() to join them:
DECLARE #Number table (Value int)
INSERT #Number VALUES (102314)
INSERT #Number VALUES (102315)
INSERT #Number VALUES (102319)
INSERT #Number VALUES (102320)
INSERT #Number VALUES (102324)
INSERT #Number VALUES (102329)
DECLARE #Data table (CustomerId int, PaymentAmt numeric(10,2),ControlNumber int)
INSERT #Data VALUES (1001, 4502.01 ,NULL)
INSERT #Data VALUES (1002, 890.00 ,NULL)
INSERT #Data VALUES (9830, 902923.34 ,NULL)
DECLARE #Used table (Value int)
;WITH RowNumber AS
(
SELECT Value,ROW_NUMBER() OVER(ORDER BY Value) AS RowNumber FROM #Number
)
,RowData AS
(
SELECT CustomerId,ROW_NUMBER() OVER(ORDER BY CustomerId) AS RowNumber, ControlNumber FROM #Data WHERE ControlNumber IS NULL
)
UPDATE d
SET ControlNumber=r.Value
OUTPUT r.Value INTO #Used
FROM RowData d
INNER JOIN RowNumber r ON d.RowNumber=r.RowNumber
DELETE #Number WHERE Value IN (SELECT Value FROM #Used)
SELECT * FROM #Data
SELECT * FROM #Number
OUTPUT:
CustomerId PaymentAmt ControlNumber
----------- --------------------------------------- -------------
1001 4502.01 102314
1002 890.00 102315
9830 902923.34 102319
(3 row(s) affected)
Value
-----------
102320
102324
102329
(3 row(s) affected)
You'll need something to join the two tables together. Some data value that you can match between the two tables.
I'm assuming there's more to your numbers table than just one column of numbers. If there's anything in there that you can match to your data table you can get away with an update.
How are you updating the data table using cursors?

Resources