Create a SELECT with defined number of rows - sql-server

I have a stored procedure that should insert some random rows in a table depending on the amount values
#amount1 INT --EligibilityID = 1
#amount2 INT --EligibilityID = 2
#amount3 INT --EligibilityID = 3
Maybe the obvious way is to use TOP(#amount) but there are a lot of amount values and the second select is much larger. So, I was looking for a way to do it in a single statement if possible.
INSERT INTO [dbo].[CaseInfo]
SELECT ([EligibilityID],[CaseNumber],[CaseMonth])
FROM (
SELECT TOP(#amount1) [EligibilityID],[CaseNumber],[CaseMonth]
FROM [dbo].[tempCases]
WHERE [EligibilityID] = 1
)
INSERT INTO [dbo].[CaseInfo]
SELECT ([EligibilityID],[CaseNumber],[CaseMonth])
FROM (
SELECT TOP(#amount2) [EligibilityID],[CaseNumber],[CaseMonth]
FROM [dbo].[tempCases]
WHERE [EligibilityID] = 2
)
INSERT INTO [dbo].[CaseInfo]
SELECT ([EligibilityID],[CaseNumber],[CaseMonth])
FROM (
SELECT TOP(#amount3) [EligibilityID],[CaseNumber],[CaseMonth]
FROM [dbo].[tempCases]
WHERE [EligibilityID] = 3
)

I would recommend to use row_number, partitioned by eligibilityID, and then compare it with a case statement to select the correct variable each time:
INSERT INTO [dbo].[CaseInfo]
SELECT ([EligibilityID],[CaseNumber],[CaseMonth])
FROM (
SELECT [EligibilityID],[CaseNumber],[CaseMonth]
,row_number() over (partition by EligibilityID order by CaseNumber) as rn -- you haven't mentioned an ORDER BY, you can change it here
FROM [dbo].[tempCases]
) as table1
where rn<=case
when EligibilityID=1 then #amount1
when EligibilityID=2 then #amount2
when EligibilityID=3 then #amount3
end

Related

ROW_NUMBER in cross apply generating "incorrect" values based on exists clause

Here is the sql:
-- Schema
DECLARE #ModelItem TABLE (
ModelItemId UNIQUEIDENTIFIER,
MetamodelItemId UNIQUEIDENTIFIER
)
DECLARE #MetamodelItemAncestor TABLE (
MetamodelItemId UNIQUEIDENTIFIER,
ParentMetamodelItemId UNIQUEIDENTIFIER,
AncestorLevel INT
)
DECLARE #SolutionMetamodelItem TABLE (
MetamodelItemId UNIQUEIDENTIFIER,
SolutionId UNIQUEIDENTIFIER
)
INSERT INTO #ModelItem VALUES ('EC6AC6A9-684E-E611-8117-00155D026308', '2AB1F075-684E-E611-8117-00155D026308')
INSERT INTO #MetamodelItemAncestor
VALUES ('2AB1F075-684E-E611-8117-00155D026308', '2AB1F075-684E-E611-8117-00155D026308', 0),
('2AB1F075-684E-E611-8117-00155D026308', 'AA12E380-CA4D-E611-8117-00155D026308', 1)
INSERT INTO #SolutionMetamodelItem
VALUES ('2AB1F075-684E-E611-8117-00155D026308', 'f612a333-ca4d-e611-8117-00155d026308'),
('AA12E380-CA4D-E611-8117-00155D026308', 'fc160f3e-ca4d-e611-8117-00155d026308')
-- query
DECLARE #ModelItemId TABLE (EntityId UNIQUEIDENTIFIER)
DECLARE #SolutionId TABLE (EntityId UNIQUEIDENTIFIER)
INSERT INTO #ModelItemId
VALUES ('EC6AC6A9-684E-E611-8117-00155D026308')
INSERT INTO #SolutionId
VALUES ('f612a333-ca4d-e611-8117-00155d026308'), ('fc160f3e-ca4d-e611-8117-00155d026308')
SELECT mia.*
FROM (
SELECT M.EntityId AS ModelItemId, S.EntityId AS SolutionId
FROM #ModelItemId AS M
CROSS JOIN #SolutionId AS S
) AS m
CROSS APPLY (
SELECT
MI.ModelItemId,
OTA.ParentMetamodelItemId AS [MetamodelItemId],
ROW_NUMBER() OVER (PARTITION BY [MI].[ModelItemId] ORDER BY [OTA].[AncestorLevel] ASC) AS [AspectRank]
FROM #ModelItem AS MI
INNER JOIN #MetamodelItemAncestor AS OTA
ON MI.MetamodelItemId = OTA.MetamodelItemId
WHERE
MI.ModelItemId = m.ModelItemId
AND EXISTS (
SELECT 1
FROM #SolutionMetamodelItem AS MSMI
WHERE MSMI.MetamodelItemId = OTA.ParentMetamodelItemId
AND MSMI.SolutionId = m.SolutionId
)
) mia
SELECT mia.*
FROM #ModelItemId AS m
CROSS APPLY (
SELECT
MI.ModelItemId,
OTA.ParentMetamodelItemId AS [MetamodelItemId],
ROW_NUMBER() OVER (PARTITION BY [MI].[ModelItemId] ORDER BY [OTA].[AncestorLevel] ASC) AS [AspectRank]
FROM #ModelItem as MI
INNER JOIN #MetamodelItemAncestor AS OTA
ON MI.MetamodelItemId = OTA.MetamodelItemId
WHERE
MI.ModelItemId = m.EntityId
AND EXISTS (
SELECT 1
FROM #SolutionMetamodelItem MSMI
WHERE MSMI.MetamodelItemId = OTA.ParentMetamodelItemId
AND MSMI.SolutionId IN (SELECT s.EntityId FROM #SolutionId AS s)
)
) mia
Notice the AspectRank. In the second query it has correctly increased the value sequentially based on the partition.
Looking at the execution plan, for the first query it seems like the row_number (sequence project) is running concurrently to the scan of the #solution table, but I still am not fully sure why it has not increased the row number value since there a duplicate items.
Could someone explain this? I need to use the first approach because the cross apply query is in fact a UDF with the ModelItemId and SolutionId as parameters.
I would assume the cross apply is executed separately for each of the rows in your outer query -> each of the rows returned is the 1st (and only) row.
Why do you need to have the row number inside the cross apply, instead of being in the outer query, if that's actually where your data is?

TSQL matching the first instances of multiple values in a resultset

Say I have part of a large query, as below, that returns a resultset with multiple rows of the same key information (PolNum) with different value information (PolPremium) in a random order.
Would it be possible to select the first matching PolNum fields and sum up the PolPremium. In this case I know that there are 2 PolNumber's used so given the screenshot of the resultset (yes I know it starts at 14 for illustration purposes) and return the first values and sum the result.
First match for PolNum 000035789547
(ROW 14) PolPremium - 32.00
First match for PolNum 000035789547
(ROW 16) PolPremium - 706043.00
Total summed should be 32.00 + 706043.00 = 706072.00
Query
OUTER APPLY
(
SELECT PolNum, PolPremium
FROM PN20
WHERE PolNum IN(SELECT PolNum FROM SvcPlanPolicyView
WHERE SvcPlanPolicyView.ControlNum IN (SELECT val AS ServedCoverages FROM ufn_SplitMax(
(SELECT TOP 1 ServicedCoverages FROM SV91 WHERE SV91.AccountKey = 3113413), ';')))
ORDER BY PN20.PolEffDate DESC
}
Resultset
Suppose that pic if the final result your query produces. Then you can do something like:
DECLARE #t TABLE
(
PolNum VARCHAR(20) ,
PolPremium MONEY
)
INSERT INTO #t
VALUES ( '000035789547', 32 ),
( '000035789547', 76 ),
( '000071709897', 706043.00 ),
( '000071709897', 1706043.00 )
SELECT t.PolNum ,
SUM(PolPremium) AS PolPremium
FROM ( SELECT * ,
ROW_NUMBER() OVER ( PARTITION BY PolNum ORDER BY PolPremium ) AS rn
FROM #t
) t
WHERE rn = 1
GROUP BY GROUPING SETS(t.PolNum, ( ))
Output:
PolNum PolPremium
000035789547 32.00
000071709897 706043.00
NULL 706075.00
Just replace #t with your query. Also I assume that row with minimum of premium is the first. You could probably do filtering top row in outer apply part but it really not clear for me what is going on there without some sample data.

How to query number based SQL Sets with Ranges in SQL

What I'm looking for is a way in MSSQL to create a complex IN or LIKE clause that contains a SET of values, some of which will be ranges.
Sort of like this, there are some single numbers, but also some ranges of numbers.
EX: SELECT * FROM table WHERE field LIKE/IN '1-10, 13, 24, 51-60'
I need to find a way to do this WITHOUT having to specify every number in the ranges separately AND without having to say "field LIKE blah OR field BETWEEN blah AND blah OR field LIKE blah.
This is just a simple example but the real query will have many groups and large ranges in it so all the OR's will not work.
One fairly easy way to do this would be to load a temp table with your values/ranges:
CREATE TABLE #Ranges (ValA int, ValB int)
INSERT INTO #Ranges
VALUES
(1, 10)
,(13, NULL)
,(24, NULL)
,(51,60)
SELECT *
FROM Table t
JOIN #Ranges R
ON (t.Field = R.ValA AND R.ValB IS NULL)
OR (t.Field BETWEEN R.ValA and R.ValB AND R.ValB IS NOT NULL)
The BETWEEN won't scale that well, though, so you may want to consider expanding this to include all values and eliminating ranges.
You can do this with CTEs.
First, create a numbers/tally table if you don't already have one (it might be better to make it permanent instead of temporary if you are going to use it a lot):
;WITH Numbers AS
(
SELECT
1 as Value
UNION ALL
SELECT
Numbers.Value + 1
FROM
Numbers
)
SELECT TOP 1000
Value
INTO ##Numbers
FROM
Numbers
OPTION (MAXRECURSION 1000)
Then you can use a CTE to parse the comma delimited string and join the ranges with the numbers table to get the "NewValue" column which contains the whole list of numbers you are looking for:
DECLARE #TestData varchar(50) = '1-10,13,24,51-60'
;WITH CTE AS
(
SELECT
1 AS RowCounter,
1 AS StartPosition,
CHARINDEX(',',#TestData) AS EndPosition
UNION ALL
SELECT
CTE.RowCounter + 1,
EndPosition + 1,
CHARINDEX(',',#TestData, CTE.EndPosition+1)
FROM CTE
WHERE
CTE.EndPosition > 0
)
SELECT
u.Value,
u.StartValue,
u.EndValue,
n.Value as NewValue
FROM
(
SELECT
Value,
SUBSTRING(Value,1,CASE WHEN CHARINDEX('-',Value) > 0 THEN CHARINDEX('-',Value)-1 ELSE LEN(Value) END) AS StartValue,
SUBSTRING(Value,CASE WHEN CHARINDEX('-',Value) > 0 THEN CHARINDEX('-',Value)+1 ELSE 1 END,LEN(Value)- CHARINDEX('-',Value)) AS EndValue
FROM
(
SELECT
SUBSTRING(#TestData, StartPosition, CASE WHEN EndPosition > 0 THEN EndPosition-StartPosition ELSE LEN(#TestData)-StartPosition+1 END) AS Value
FROM
CTE
)t
)u INNER JOIN ##Numbers n ON n.Value BETWEEN u.StartValue AND u.EndValue
All you would need to do once you have that is query the results using an IN statement, so something like
SELECT * FROM MyTable WHERE Value IN (SELECT NewValue FROM (/*subquery from above*/)t)

T-SQL get row count before TOP is applied

I have a SELECT that can return hundreds of rows from a table (table can be ~50000 rows). My app is interested in knowing the number of rows returned, it means something important to me, but it actually uses only the top 5 of those hundreds of rows. What I want to do is limit the SELECT query to return only 5 rows, but also tell my app how many it would have returned (the hundreds). This is the original query:
SELECT id, a, b, c FROM table WHERE a < 2
Here is what I came up with - a CTE - but I don't feel comfortable with the total row count appearing in every column. Ideally I would want a result set of the TOP 5 and a returned parameter for the total row count.
WITH Everything AS
(
SELECT id, a, b, c FROM table
),
DetermineCount AS
(
SELECT COUNT(*) AS Total FROM Everything
)
SELECT TOP (5) id, a, b, c, Total
FROM Everything
CROSS JOIN DetermineCount;
Can you think of a better way?
Is there a way in T-SQl to return the affected row count of a select top query before the top was applied? ##rowcount would return 5 but I wonder if there is a ##rowcountbeforetop sort of thing.
Thanks in advance for your help.
** Update **
This is what I'm doing now and I kind of like it over the CTE although CTEs as so elegant.
-- #count is passed in as an out param to the stored procedure
CREATE TABLE dbo.#everything (id int, a int, b int, c int);
INSERT INTO #everything
SELECT id, a, b, c FROM table WHERE a < 2;
SET #count = ##rowcount;
SELECT TOP (5) id FROM #everything;
DROP TABLE #everything;
Here's a relatively efficient way to get 5 random rows and include the total count. The random element will introduce a full sort no matter where you put it.
SELECT TOP (5) id,a,b,c,total = COUNT(*) OVER()
FROM dbo.mytable
ORDER BY NEWID();
Assuming you want the top 5 ordering by id ascending, this will do it with a single pass through your table.
; WITH Everything AS
(
SELECT id
, a
, b
, c
, ROW_NUMBER() OVER (ORDER BY id ASC) AS rn_asc
, ROW_NUMBER() OVER (ORDER BY id DESC) AS rn_desc
FROM <table>
)
SELECT id
, a
, b
, c
, rn_asc + rn_desc - 1 AS total_rows
FROM Everything
WHERE rn_asc <= 5
** Update **
This is what I'm doing now and I kind of like it over the CTE although CTEs as so elegant. Let me know what you think. Thanks!
-- #count is passed in as an out param to the stored procedure
CREATE TABLE dbo.#everything (id int, a int, b int, c int);
INSERT INTO #everything
SELECT id, a, b, c FROM table WHERE a < 2;
SET #count = ##rowcount;
SELECT TOP (5) id FROM #everything;
DROP TABLE #everything;

How to select Top % in T-SQL without using Top clause?

How to select Top 40% from a table without using the Top clause (or Top percent, the assignment is a little ambiguous) ? This question is for T-SQL, SQL Server 2008. I am not allowed to use Top for my assignment.
Thanks.
This is what I've tried but seems complicated. Isn't there an easier way ?
select top (convert (int, (select round (0.4*COUNT(*), 0) from MyTable))) * from MyTable
Try the NTILE function:
;WITH YourCTE AS
(
SELECT
(some columns),
percentile = NTILE(10) OVER(ORDER BY SomeColumn DESC)
FROM
dbo.YourTable
)
SELECT *
FROM YourCTE
WHERE percentile <= 4
The NTILE(10) OVER(....) creates 10 groups of percentages over your data - and thus, the top 40% are the groups no. 1, 2, 3, 4 of that result
Use NTILE
CREATE TABLE #temp(StudentID CHAR(3), Score INT)
INSERT #temp VALUES('S1',75 )
INSERT #temp VALUES('S2',83)
INSERT #temp VALUES('S3',91)
INSERT #temp VALUES('S4',83)
INSERT #temp VALUES('S5',93 )
INSERT #temp VALUES('S6',75 )
INSERT #temp VALUES('S7',83)
INSERT #temp VALUES('S8',91)
INSERT #temp VALUES('S9',83)
INSERT #temp VALUES('S10',93 )
SELECT * FROM (
SELECT NTILE(10) OVER(ORDER BY Score) AS NtileValue,*
FROM #temp) x
WHERE NtileValue <= 4
ORDER BY 1
Interesting enough I blogged about NTILE today: Does anyone use the NTILE() windowing function?
A problem with the NTILE(10) answers given so far is that if the table has 15 rows they will return 8 rows (53%) rather than the correct number to make up 40% (6).
If the number of rows is not evenly divisible by number of buckets the extra rows all go into the first buckets rather than being evenly distributed.
This alternative (borrows SQL Menace's table) avoids that issue.
WITH CTE
AS (SELECT *,
ROW_NUMBER() OVER ( ORDER BY Score) AS RN,
COUNT(*) OVER() AS Cnt
FROM #temp)
SELECT StudentID,
Score
FROM CTE
WHERE RN <= CEILING(0.4 * Cnt )
Using Top t-sql command:
select top 10 [Column_1],
[Column_2] from [Table]
order by [Column_1]
Using Paging method:
select
[Column_1],
[Column_2]
from
(Select ROW_NUMBER() Over (ORDER BY [Column_1]) AS Row,
[Column_1],
[Column_2]
FROM [Table]) as [alias]
WHERE (Row between 0 and 10)
This is finding the top 10 with order by [Column_1]...please note this is using [variable] method of documentation.
If you could provide column names and table names i could write much more beneficial t-sql, for example to find the top 40% you are going to need to do another sub-query to get count of all rows then do division, i'd likely do this as a query before i do the main query.
Calculate and set ROWCOUNT for whatever number of records.
Then execute you query for the limited set.
declare #rc as integer
select #rc = count(*)*0.40 from CTE
Set ROWCOUNT #rc
select * from CTE
ROWCOUNT is not deprecated yet - see http://msdn.microsoft.com/en-us/library/ms188774.aspx

Resources