Need T-SQL Query for Groups and Islands - sql-server

create table #sample (rowguid int identity ,id_frm int ,id_to int)
insert into #sample values( 1,5)
insert into #sample values( 7,13)
insert into #sample values( 17,20)
In the above table I have values starting Id and Ending Id. I need to prepare a table which has all the numbers falls between starting Id and Ending Id
i have tried it with looping but response is very slow in real world.
any body help me with query ???
This is what I have tried so far...
declare #sql varchar(8000) = '
select top '+cast((select max(id_to) from #sample) as varchar(100))+' identity(int,1,1) as guidid into tally from sysobjects,syscolumns '
exec (#sql)
alter table Tally add slab varchar(10)
create clustered index idx on Tally(guidid)
create clustered index idx on #sample(id_frm asc,id_to desc)
update Tally set slab = rowguid
from #sample join Tally on guidid between id_frm and id_to
delete from Tally where slab is null
select * from Tally
This query works fine with small numbers
But My real time table have 13 digit nos. It through Arithmetic overflow error

Assuming the range id_frm and id_to is relatively small integers, e.g. < 1M, one technique to approach this problem is to create a table with all values in the range and join to it:
WITH lotsOfNumbers AS
(
SELECT ROW_NUMBER() OVER (ORDER BY o1.object_id) AS id
FROM sys.objects o1 CROSS JOIN sys.objects o2
)
INSERT INTO #targetTable
SELECT l.ID
FROM lotsOfNumbers l
INNER JOIN #sample
ON l.ID BETWEEN #sample.id_frm AND #sample.id_to;
SqlFiddle here
A permanent table with the necessary range of ID's and a clustered index on the ID would improve performance, obviously.
Add in a DISTINCT if your ranges overlap, and you don't want duplicates in the result.

If you are able to get a full range of acceptable values into another table, you can use it without looping. The meathod below gets the minimum (1) and maximum (20), and the temporary table named #range will return everything in between.
drop table #sample
drop table #range
create table #sample (id_frm int ,id_to int)
insert into #sample values( 1,5)
insert into #sample values( 7,13)
insert into #sample values( 17,20)
create table #range (id int)
insert into #range select 1
go
insert into #range select top 1 max(id)+ 1 from #range
go 100
declare #min int
declare #max int
set #min= (select min(id_frm ) from #sample )
set #max = (select max(id_to) from #sample )
select * from #range where id between #min and #max

Related

Substring is slow with while loop in SQL Server

One of my table column stores ~650,000 characters (each value of the column contains entire table). I know its bad design however, Client will not be able to change it.
I am tasked to convert the column into multiple columns.
I chose to use dbo.DelimitedSplit8K function
Unfortunately, it can only handle 8k characters at max.
So I decided to split the column into 81 8k batches using while loop and store the same in a variable table (temp or normal table made no improvement)
DECLARE #tab1 table ( serialnumber int, etext nvarchar(1000))
declare #scriptquan int = (select MAX(len (errortext)/8000) from mytable)
DECLARE #Counter INT
DECLARE #A bigint = 1
DECLARE #B bigint = 8000
SET #Counter=1
WHILE ( #Counter <= #scriptquan + 1)
BEGIN
insert into #tab1 select ItemNumber, Item from dbo.mytable cross apply dbo.DelimitedSplit8K(substring(errortext, #A, #B), CHAR(13)+CHAR(10))
SET #A = #A + 8000
SET #B = #B + 8000
SET #Counter = #Counter + 1
END
This followed by using below code
declare #tab2 table (Item nvarchar(max),itemnumber int, Colseq varchar(10)) -- declare table variable
;with cte as (
select [etext] ,ItemNumber, Item from #tab1 -- insert table name
cross apply dbo.DelimitedSplit8K(etext,' ')) -- insert table columns name that contains text
insert into #tab2 Select Item,itemnumber, 'a'+ cast (ItemNumber as varchar) colseq
from cte -- insert values to table variable
;WITH Tbl(item, colseq) AS(
select item, colseq from #tab2
),
CteRn AS(
SELECT item, colseq,
Rn = ROW_NUMBER() OVER(PARTITION BY colseq ORDER BY colseq)
FROM Tbl
)
SELECT
a1 Time,a2 Number,a3 Type,a4 Remarks
FROM CteRn r
PIVOT(
MAX(item)
FOR colseq IN(a1,a2,a3,a4)
)p
where a3 = 'error'
gives the desired output. However, just the loop takes 15 minutes to complete and overall query completes by 27 minutes. Is there any way I can make it faster? Total row count in my table is 2. So I don't think Index can help.
Client uses Azure SQL Database so I can't choose PowerShell or Python to accomplish this either.
Please let me know if more information is needed. I tried my best to mention everything I could.

Why does a join on ROW_NUMBER() between common table expressions result in a Cartesian product?

I am joining two common table expressions (cte) based on an id created within each cte using ROW_NUMBER(). The resulting execution plan includes a Merge Join where the Estimated Rows are 13,530,000,000 in my case; and the actual number of rows ends up being about 13,000.
Below is an example of what I am trying to do, and this results in a similar execution plan with 1,200,000,000 estimated rows in the Merge Join, and 12,000 actual rows.
I am stuck understanding why what seems like a simple join is creating a Cartesian product.
IF OBJECT_ID('tempdb..#Product') IS NOT NULL DROP TABLE #Product;
IF OBJECT_ID('tempdb..#Product_Id') IS NOT NULL DROP TABLE #Product_Id;
CREATE TABLE #Product (
[rowId] int identity primary key,
[id] int
)
CREATE TABLE #Product_Id (
[id] int primary key
)
DECLARE #Id int
SET #Id = 500000
WHILE #Id <= 600000
BEGIN
INSERT INTO #Product_Id
VALUES (#Id)
SET #Id += 1
END
SET #Id = 1
WHILE #Id <= 12000
BEGIN
INSERT INTO #Product
VALUES (NULL)
SET #Id += 1
END
WITH
PRODUCT_ID_CTE AS (
SELECT [id],
ROW_NUMBER() OVER(ORDER BY [id]) AS [rn]
FROM #Product_Id
),
PRODUCT_CTE AS (
SELECT [id],
ROW_NUMBER() OVER(ORDER BY [rowId]) AS [rn]
FROM #Product
)
SELECT ISNULL(a.[id], b.[id]) AS [id]
FROM PRODUCT_CTE a
JOIN PRODUCT_ID_CTE b
ON a.[rn] = b.[rn]
Execution plan with CTE
Execution plan with sub queries

SQL Server query with pagination and count

I want to make a database query with pagination. So, I used a common-table expression and a ranked function to achieve this. Look at the example below.
declare #table table (name varchar(30));
insert into #table values ('Jeanna Hackman');
insert into #table values ('Han Fackler');
insert into #table values ('Tiera Wetherbee');
insert into #table values ('Hilario Mccray');
insert into #table values ('Mariela Edinger');
insert into #table values ('Darla Tremble');
insert into #table values ('Mammie Cicero');
insert into #table values ('Raisa Harbour');
insert into #table values ('Nicholas Blass');
insert into #table values ('Heather Hayashi');
declare #pagenumber int = 2;
declare #pagesize int = 3;
declare #total int;
with query as
(
select name, ROW_NUMBER() OVER(ORDER BY name ASC) as line from #table
)
select top (#pagesize) name from query
where line > (#pagenumber - 1) * #pagesize
Here, I can specify the #pagesize and #pagenumber variables to give me just the records that I want. However, this example (that comes from a stored procedure) is used to make a grid pagination in a web application. This web application requires to show the page numbers. For instance, if a have 12 records in the database and the page size is 3, then I'll have to show 4 links, each one representing a page.
But I can't do this without knowing how many records are there, and this example just gives me the subset of records.
Then I changed the stored procedure to return the count(*).
declare #pagenumber int = 2;
declare #pagesize int = 3;
declare #total int;
with query as
(
select name, ROW_NUMBER() OVER(ORDER BY name ASC) as line, total = count(*) over()from #table
)
select top (#pagesize) name, total from query
where line > (#pagenumber - 1) * #pagesize
So, along with each line, it will show the total number of records. But I didn't like it.
My question is if there's a better way (performance) to do this, maybe setting the #total variable without returning this information in the SELECT. Or is this total column something that won't harm the performance too much?
Thanks
Assuming you are using MSSQL 2012, you can use Offset and Fetch which cleans up server-side paging greatly. We've found performance is fine, and in most cases better. As far as getting the total column count, just use the window function below inline...it will not include the limits imposed by 'offset' and 'fetch'.
For Row_Number, you can use window functions the way you did, but I would recommend that you calculate that client side as (pagenumber*pagesize + resultsetRowNumber), so if you're on the 5th page of 10 results and on the third row you would output row 53.
When applied to an Orders table with about 2 million orders, I found the following:
FAST VERSION
This ran in under a second. The nice thing about it is that you can do your filtering in the common table expression once and it applies both to the paging process and the count. When you have many predicates in the where clause, this keeps things simple.
declare #skipRows int = 25,
#takeRows int = 100,
#count int = 0
;WITH Orders_cte AS (
SELECT OrderID
FROM dbo.Orders
)
SELECT
OrderID,
tCountOrders.CountOrders AS TotalRows
FROM Orders_cte
CROSS JOIN (SELECT Count(*) AS CountOrders FROM Orders_cte) AS tCountOrders
ORDER BY OrderID
OFFSET #skipRows ROWS
FETCH NEXT #takeRows ROWS ONLY;
SLOW VERSION
This took about 10 sec, and it was the Count(*) that caused the slowness. I'm surprised this is so slow, but I suspect it's simply calculating the total for each row. It's very clean though.
declare #skipRows int = 25,
#takeRows int = 100,
#count int = 0
SELECT
OrderID,
Count(*) Over() AS TotalRows
FROM Location.Orders
ORDER BY OrderID
OFFSET #skipRows ROWS
FETCH NEXT #takeRows ROWS ONLY;
CONCLUSION
We've gone through this performance tuning process before and actually found that it depended on the query, predicates used, and indexes involved. For instance, the second we introduced a view it chugged, so we actually query off the base table and then join up the view (which includes the base table) and it actually performs very well.
I would suggest having a couple of straight-forward strategies and applying them to high-value queries that are chugging.
DECLARE #pageNumber INT = 1 ,
#RowsPerPage INT = 20
SELECT *
FROM TableName
ORDER BY Id
OFFSET ( ( #pageNumber - 1 ) * #RowsPerPage ) ROWS
FETCH NEXT #RowsPerPage ROWS ONLY;
What if you calculate the count beforehand?
declare #pagenumber int = 2;
declare #pagesize int = 3;
declare #total int;
SELECT #total = count(*)
FROM #table
with query as
(
select name, ROW_NUMBER() OVER(ORDER BY name ASC) as line from #table
)
select top (#pagesize) name, #total total from query
where line > (#pagenumber - 1) * #pagesize
Another way, is to calculate max(line). Check the link
Return total records from SQL Server when using ROW_NUMBER
UPD:
For single query, check marc_s's answer on the link above.
with query as
(
select name, ROW_NUMBER() OVER(ORDER BY name ASC) as line from #table
)
select top (#pagesize) name,
(SELECT MAX(line) FROM query) AS total
from query
where line > (#pagenumber - 1) * #pagesize
#pagenumber=5
#pagesize=5
Create a common table expression and write logic like this
Between ((#pagenumber-1)*(#pagesize))+1 and (#pagenumber *#pagesize)
There are many way we can achieve pagination: I hope this information is useful to you and others.
Example 1: using offset-fetch next clause. introduce in 2005
declare #table table (name varchar(30));
insert into #table values ('Jeanna Hackman');
insert into #table values ('Han Fackler');
insert into #table values ('Tiera Wetherbee');
insert into #table values ('Hilario Mccray');
insert into #table values ('Mariela Edinger');
insert into #table values ('Darla Tremble');
insert into #table values ('Mammie Cicero');
insert into #table values ('Raisa Harbour');
insert into #table values ('Nicholas Blass');
insert into #table values ('Heather Hayashi');
declare #pagenumber int = 1
declare #pagesize int = 3
--this is a CTE( common table expression and this is introduce in 2005)
with query as
(
select ROW_NUMBER() OVER(ORDER BY name ASC) as line, name from #table
)
--order by clause is required to use offset-fetch
select * from query
order by name
offset ((#pagenumber - 1) * #pagesize) rows
fetch next #pagesize rows only
Example 2: using row_number() function and between
declare #table table (name varchar(30));
insert into #table values ('Jeanna Hackman');
insert into #table values ('Han Fackler');
insert into #table values ('Tiera Wetherbee');
insert into #table values ('Hilario Mccray');
insert into #table values ('Mariela Edinger');
insert into #table values ('Darla Tremble');
insert into #table values ('Mammie Cicero');
insert into #table values ('Raisa Harbour');
insert into #table values ('Nicholas Blass');
insert into #table values ('Heather Hayashi');
declare #pagenumber int = 2
declare #pagesize int = 3
SELECT *
FROM
(select ROW_NUMBER() OVER (ORDER BY PRODUCTNAME) AS RowNum, * from Products)
as Prodcut
where RowNum between (((#pagenumber - 1) * #pageSize )+ 1)
and (#pagenumber * #pageSize )
I hope these will be helpful to all
I don't like other solutions for being too complex, so here is my version.
Execute three select queries in one go and use output parameters for getting the count values. This query returns the total count, the filter count, and the page rows. It supports sorting, searching, and filtering the source data. It's easy to read and modify.
Let's say you have two tables with one-to-many relationship, items and their prices changed over time so the example query is not too trivial.
create table shop.Items
(
Id uniqueidentifier not null primary key,
Name nvarchar(100) not null,
);
create table shop.Prices
(
ItemId uniqueidentifier not null,
Updated datetime not null,
Price money not null,
constraint PK_Prices primary key (ItemId, Updated),
constraint FK_Prices_Items foreign key (ItemId) references shop.Items(Id)
);
Here is the query:
select #TotalCount = count(*) over()
from shop.Items i;
select #FilterCount = count(*) over()
from shop.Items i
outer apply (select top 1 p.Price, p.Updated from shop.Prices p where p.ItemId = i.Id order by p.Updated desc) as p
where (#Search is null or i.Name like '%' + #Search + '%')/**where**/;
select i.Id as ItemId, i.Name, p.Price, p.Updated
from shop.Items i
outer apply (select top 1 p.Price, p.Updated from shop.Prices p where p.ItemId = i.Id order by p.Updated desc) as p
where (#Search is null or i.Name like '%' + #Search + '%')/**where**/
order by /**orderby**/i.Id
offset #SkipCount rows fetch next #TakeCount rows only;
You need to provide the following parameters to the query:
#SkipCount - how many records to skip, calculated from the page number.
#TakeCount - how many records to return, calculated from or equal to the page size.
#Search - a text to search for in some columns, provided by the grid search box.
#TotalCount - the total number of records in the data source, the output parameter.
#FilterCount - the number of records after the search and filtering operations, the output parameter.
You can replace /**orderby**/ comment with the list of columns and their ordering directions if the grid must support sorting the rows by columns. you get this info from the grid and translate it to an SQL expression. We still need to order the records by some column initially, I usually use ID column for that.
If the grid must support filtering data by each column individually, you can replace /**where**/ comment with an SQL expression for that.
If the user is not searching and filtering the data, but only clicks through the grid pages, this query doesn't change at all and the database server executes it very quickly.

Using merge..output to get mapping between source.id and target.id

Very simplified, I have two tables Source and Target.
declare #Source table (SourceID int identity(1,2), SourceName varchar(50))
declare #Target table (TargetID int identity(2,2), TargetName varchar(50))
insert into #Source values ('Row 1'), ('Row 2')
I would like to move all rows from #Source to #Target and know the TargetID for each SourceID because there are also the tables SourceChild and TargetChild that needs to be copied as well and I need to add the new TargetID into TargetChild.TargetID FK column.
There are a couple of solutions to this.
Use a while loop or cursors to insert one row (RBAR) to Target at a time and use scope_identity() to fill the FK of TargetChild.
Add a temp column to #Target and insert SourceID. You can then join that column to fetch the TargetID for the FK in TargetChild.
SET IDENTITY_INSERT OFF for #Target and handle assigning new values yourself. You get a range that you then use in TargetChild.TargetID.
I'm not all that fond of any of them. The one I used so far is cursors.
What I would really like to do is to use the output clause of the insert statement.
insert into #Target(TargetName)
output inserted.TargetID, S.SourceID
select SourceName
from #Source as S
But it is not possible
The multi-part identifier "S.SourceID" could not be bound.
But it is possible with a merge.
merge #Target as T
using #Source as S
on 0=1
when not matched then
insert (TargetName) values (SourceName)
output inserted.TargetID, S.SourceID;
Result
TargetID SourceID
----------- -----------
2 1
4 3
I want to know if you have used this? If you have any thoughts about the solution or see any problems with it? It works fine in simple scenarios but perhaps something ugly could happen when the query plan get really complicated due to a complicated source query. Worst scenario would be that the TargetID/SourceID pairs actually isn't a match.
MSDN has this to say about the from_table_name of the output clause.
Is a column prefix that specifies a table included in the FROM clause of a DELETE, UPDATE, or MERGE statement that is used to specify the rows to update or delete.
For some reason they don't say "rows to insert, update or delete" only "rows to update or delete".
Any thoughts are welcome and totally different solutions to the original problem is much appreciated.
In my opinion this is a great use of MERGE and output. I've used in several scenarios and haven't experienced any oddities to date.
For example, here is test setup that clones a Folder and all Files (identity) within it into a newly created Folder (guid).
DECLARE #FolderIndex TABLE (FolderId UNIQUEIDENTIFIER PRIMARY KEY, FolderName varchar(25));
INSERT INTO #FolderIndex
(FolderId, FolderName)
VALUES(newid(), 'OriginalFolder');
DECLARE #FileIndex TABLE (FileId int identity(1,1) PRIMARY KEY, FileName varchar(10));
INSERT INTO #FileIndex
(FileName)
VALUES('test.txt');
DECLARE #FileFolder TABLE (FolderId UNIQUEIDENTIFIER, FileId int, PRIMARY KEY(FolderId, FileId));
INSERT INTO #FileFolder
(FolderId, FileId)
SELECT FolderId,
FileId
FROM #FolderIndex
CROSS JOIN #FileIndex; -- just to illustrate
DECLARE #sFolder TABLE (FromFolderId UNIQUEIDENTIFIER, ToFolderId UNIQUEIDENTIFIER);
DECLARE #sFile TABLE (FromFileId int, ToFileId int);
-- copy Folder Structure
MERGE #FolderIndex fi
USING ( SELECT 1 [Dummy],
FolderId,
FolderName
FROM #FolderIndex [fi]
WHERE FolderName = 'OriginalFolder'
) d ON d.Dummy = 0
WHEN NOT MATCHED
THEN INSERT
(FolderId, FolderName)
VALUES (newid(), 'copy_'+FolderName)
OUTPUT d.FolderId,
INSERTED.FolderId
INTO #sFolder (FromFolderId, toFolderId);
-- copy File structure
MERGE #FileIndex fi
USING ( SELECT 1 [Dummy],
fi.FileId,
fi.[FileName]
FROM #FileIndex fi
INNER
JOIN #FileFolder fm ON
fi.FileId = fm.FileId
INNER
JOIN #FolderIndex fo ON
fm.FolderId = fo.FolderId
WHERE fo.FolderName = 'OriginalFolder'
) d ON d.Dummy = 0
WHEN NOT MATCHED
THEN INSERT ([FileName])
VALUES ([FileName])
OUTPUT d.FileId,
INSERTED.FileId
INTO #sFile (FromFileId, toFileId);
-- link new files to Folders
INSERT INTO #FileFolder (FileId, FolderId)
SELECT sfi.toFileId, sfo.toFolderId
FROM #FileFolder fm
INNER
JOIN #sFile sfi ON
fm.FileId = sfi.FromFileId
INNER
JOIN #sFolder sfo ON
fm.FolderId = sfo.FromFolderId
-- return
SELECT *
FROM #FileIndex fi
JOIN #FileFolder ff ON
fi.FileId = ff.FileId
JOIN #FolderIndex fo ON
ff.FolderId = fo.FolderId
I would like to add another example to add to #Nathan's example, as I found it somewhat confusing.
Mine uses real tables for the most part, and not temp tables.
I also got my inspiration from here: another example
-- Copy the FormSectionInstance
DECLARE #FormSectionInstanceTable TABLE(OldFormSectionInstanceId INT, NewFormSectionInstanceId INT)
;MERGE INTO [dbo].[FormSectionInstance]
USING
(
SELECT
fsi.FormSectionInstanceId [OldFormSectionInstanceId]
, #NewFormHeaderId [NewFormHeaderId]
, fsi.FormSectionId
, fsi.IsClone
, #UserId [NewCreatedByUserId]
, GETDATE() NewCreatedDate
, #UserId [NewUpdatedByUserId]
, GETDATE() NewUpdatedDate
FROM [dbo].[FormSectionInstance] fsi
WHERE fsi.[FormHeaderId] = #FormHeaderId
) tblSource ON 1=0 -- use always false condition
WHEN NOT MATCHED
THEN INSERT
( [FormHeaderId], FormSectionId, IsClone, CreatedByUserId, CreatedDate, UpdatedByUserId, UpdatedDate)
VALUES( [NewFormHeaderId], FormSectionId, IsClone, NewCreatedByUserId, NewCreatedDate, NewUpdatedByUserId, NewUpdatedDate)
OUTPUT tblSource.[OldFormSectionInstanceId], INSERTED.FormSectionInstanceId
INTO #FormSectionInstanceTable(OldFormSectionInstanceId, NewFormSectionInstanceId);
-- Copy the FormDetail
INSERT INTO [dbo].[FormDetail]
(FormHeaderId, FormFieldId, FormSectionInstanceId, IsOther, Value, CreatedByUserId, CreatedDate, UpdatedByUserId, UpdatedDate)
SELECT
#NewFormHeaderId, FormFieldId, fsit.NewFormSectionInstanceId, IsOther, Value, #UserId, CreatedDate, #UserId, UpdatedDate
FROM [dbo].[FormDetail] fd
INNER JOIN #FormSectionInstanceTable fsit ON fsit.OldFormSectionInstanceId = fd.FormSectionInstanceId
WHERE [FormHeaderId] = #FormHeaderId
Here's a solution that doesn't use MERGE (which I've had problems with many times I try to avoid if possible). It relies on two memory tables (you could use temp tables if you want) with IDENTITY columns that get matched, and importantly, using ORDER BY when doing the INSERT, and WHERE conditions that match between the two INSERTs... the first one holds the source IDs and the second one holds the target IDs.
-- Setup... We have a table that we need to know the old IDs and new IDs after copying.
-- We want to copy all of DocID=1
DECLARE #newDocID int = 99;
DECLARE #tbl table (RuleID int PRIMARY KEY NOT NULL IDENTITY(1, 1), DocID int, Val varchar(100));
INSERT INTO #tbl (DocID, Val) VALUES (1, 'RuleA-2'), (1, 'RuleA-1'), (2, 'RuleB-1'), (2, 'RuleB-2'), (3, 'RuleC-1'), (1, 'RuleA-3')
-- Create a break in IDENTITY values.. just to simulate more realistic data
INSERT INTO #tbl (Val) VALUES ('DeleteMe'), ('DeleteMe');
DELETE FROM #tbl WHERE Val = 'DeleteMe';
INSERT INTO #tbl (DocID, Val) VALUES (6, 'RuleE'), (7, 'RuleF');
SELECT * FROM #tbl t;
-- Declare TWO temp tables each with an IDENTITY - one will hold the RuleID of the items we are copying, other will hold the RuleID that we create
DECLARE #input table (RID int IDENTITY(1, 1), SourceRuleID int NOT NULL, Val varchar(100));
DECLARE #output table (RID int IDENTITY(1,1), TargetRuleID int NOT NULL, Val varchar(100));
-- Capture the IDs of the rows we will be copying by inserting them into the #input table
-- Important - we must specify the sort order - best thing is to use the IDENTITY of the source table (t.RuleID) that we are copying
INSERT INTO #input (SourceRuleID, Val) SELECT t.RuleID, t.Val FROM #tbl t WHERE t.DocID = 1 ORDER BY t.RuleID;
-- Copy the rows, and use the OUTPUT clause to capture the IDs of the inserted rows.
-- Important - we must use the same WHERE and ORDER BY clauses as above
INSERT INTO #tbl (DocID, Val)
OUTPUT Inserted.RuleID, Inserted.Val INTO #output(TargetRuleID, Val)
SELECT #newDocID, t.Val FROM #tbl t
WHERE t.DocID = 1
ORDER BY t.RuleID;
-- Now #input and #output should have the same # of rows, and the order of both inserts was the same, so the IDENTITY columns (RID) can be matched
-- Use this as the map from old-to-new when you are copying sub-table rows
-- Technically, #input and #output don't even need the 'Val' columns, just RID and RuleID - they were included here to prove that the rules matched
SELECT i.*, o.* FROM #output o
INNER JOIN #input i ON i.RID = o.RID
-- Confirm the matching worked
SELECT * FROM #tbl t

How to expand rows from count in tsql only

I have a table that contains a number and a range value. For instance, one column has the value of 40 and the other column has a value of 100 meaning that starting 40 the range has 100 values ending in 139 inclusive of the number 40. I want to write a tsql statement that expands my data into individual rows.
I think I need a cte for this but do not know how I can achieve this.
Note: when expanded I am expecting 7m rows.
If you want CTE here is an example:
Initial insert:
insert into rangeTable (StartValue, RangeValue)
select 40,100
union all select 150,10
go
the query:
with r_CTE (startVal, rangeVal, generatedVal)
as
(
select r.startValue, r.rangeValue, r.startValue
from rangeTable r
union all
select r.startValue, r.rangeValue, generatedVal+1
from rangeTable r
inner join r_CTE rc
on r.startValue = rc.startVal
and r.rangeValue = rc.rangeVal
and r.startValue + r.rangeValue > rc.generatedVal + 1
)
select * from r_CTE
order by startVal, rangeVal, generatedVal
Just be aware that the default maximum number of recursions is 100. You can change it to the maximum of 32767 by calling
option (maxrecursion 32767)
or to no limit
option (maxrecursion 0)
See BOL for details
I don't know how this could be done with common table expressions, but here is a solution using a temporary table:
SET NOCOUNT ON
DECLARE #MaxValue INT
SELECT #MaxValue = max(StartValue + RangeValue) FROM MyTable
DECLARE #Numbers table (
Number INT IDENTITY(1,1) PRIMARY KEY
)
INSERT #Numbers DEFAULT VALUES
WHILE COALESCE(SCOPE_IDENTITY(), 0) <= #MaxValue
INSERT #Numbers DEFAULT VALUES
SELECT n.Number
FROM #Numbers n
WHERE EXISTS(
SELECT *
FROM MyTable t
WHERE n.Number BETWEEN t.StartValue AND t.StartValue + t.RangeValue - 1
)
SET NOCOUNT OFF
Could be optimized if the Numbers table was a regular table. So you don't have to fill the temporary table on every call.
you could try this approach:
create function [dbo].[fRange](#a int, #b int)
returns #ret table (val int)
as
begin
declare #val int
declare #end int
set #val = #a
set #end = #a + #b
while #val < #end
begin
insert into #ret(val)
select #val
set #val = #val+1
end
return
end
go
declare #ranges table(start int, noOfEntries int)
insert into #ranges (start, noOfEntries)
select 40,100
union all select 150, 10
select * from #ranges r
cross apply dbo.fRange(start,noOfEntries ) fr
not the fastest but should work
I would do something slightly different from splattne...
SET NOCOUNT ON
DECLARE #MaxValue INT
DECLARE #Numbers table (
Number INT IDENTITY(1,1) PRIMARY KEY CLUSTERED
)
SELECT #MaxValue = max(RangeValue) FROM MyTable
INSERT #Numbers DEFAULT VALUES
WHILE COALESCE(SCOPE_IDENTITY(), 0) <= #MaxValue
INSERT #Numbers DEFAULT VALUES
SELECT
t.startValue + n.Number
FROM
MyTable t
INNER JOIN
#Numbers n
ON n.Number < t.RangeValue
SET NOCOUNT OFF
This will minimise the number of rows you need to insert into the table variable, then use a join to 'multiply' one table by the other...
By the nature of the query, the source table table doesn't need indexing, but the "numbers" table should have an index (or primary key). Clustered Indexes refer to how they're stored on the Disk, so I can't see CLUSTERED being relevant here, but I left it in as I just copied from Splattne.
(Large joins like this may be slow, but still much faster than millions of inserts.)

Resources