how to acomplish a full series in sql - sql-server

I want to achieve a full numeric scale from 0 to the max number in the table.
Let's say we have a table T with two fields named x and y
select x,y
from t
would show us lets say the results
X Y
3 11
5 23
7 45
9 1
10 34
I found this query to build sequential numbers:
With T_Misparim As
(Select 1 N
Union All
Select N+1 N
From T_Misparim
Where N<1000)
Select N
From T_Misparim
Option (MaxRecursion 0);
from this source : http://www.sqlserver.co.il/?p=3296
My bottom line is, how do i integrate the two queries into a single query to give
right outer join :
N X Y
0 null 0
1 null 0
2 null 0
3 3 11
4 null 0
5 5 23
6 null 0
7 7 45
8 null 0
9 9 1
10 10 34

You can just LEFT JOIN with the ordinal number CTE;
select 3 as X, 11 as Y into #TEST
insert #TEST values (5,23),(7,45),(9,1),(10,34)
;with NUMS(n) as (
select 0 union all
select 1 + n from NUMS where n < 50
)
select
NUMS.n N,
T.X,
isnull(T.Y, 0) Y
from NUMS
left join #TEST T on (T.X = NUMS.n)
option (maxrecursion 50)
For
N X Y
0 NULL 0
1 NULL 0
2 NULL 0
3 3 11
4 NULL 0
5 5 23
6 NULL 0
7 7 45
8 NULL 0
9 9 1
10 10 34

Related

Get the rank of each string extracted from split_string

I have a TABLE with STR DNA DATA the following Table [DYS]
Id
DYS385
3
10-19
4
13-16
5
13-18
6
13-19
7
13-17
8
13-18
9
13-18
10
14-19
11
13-19
12
13-18
I am USING the following script to split the values of [DYS385]
select top 10 id,[DYS385],t.Value
from dys
OUTER APPLY(select * from string_split([DYS385],'-')) t
where dys385 is not null
Output
Id
DYS385
VALUE
3
10-19
10
3
10-19
19
4
13-16
13
4
13-16
16
5
13-18
13
5
13-18
18
6
13-19
13
6
13-19
19
7
13-17
13
7
13-17
17
I want to get for each Value, the Rank
example
10-19 => 10: Rank 1, 19: Rank 2
Desired results:
Id
DYS385
VALUE
RANK
3
10-19
10
1
3
10-19
19
2
4
13-16
13
1
4
13-16
16
2
5
13-18
13
1
5
13-18
18
2
6
13-19
13
1
6
13-19
19
2
7
13-17
13
1
7
13-17
17
2
Use an alternative string-split method, such as XML or Json that can return an ordinal position, such as:
create function dbo.SplitString(#string varchar(1000), #Delimiter varchar(10))
returns table
as
return(
select j.[value], 1 + Convert(tinyint,j.[key]) Seq
from OpenJson(Concat('["',replace(#string,#delimiter, '","'),'"]')) j
);
select value, Seq as [Rank]
from dbo.SplitString('10-19','-')
order by [Rank];
Example fiddle
Given this data:
CREATE TABLE dbo.Something(Id int, DYS385 varchar(10));
INSERT dbo.Something(Id, DYS385) VALUES (3 , '10-19'),
(4 , '13-16'), (5 , '13-18'), (6 , '13-19'),
(7 , '13-17'), (8 , '13-18'), (9 , '13-18'),
(10, '14-19'), (11, '13-19'), (12, '13-18');
Another way to make sure you rank the broken-up strings in the right order could be to use tricks like PARSENAME(), though this can be sensitive to strings lengths and whether a dot is valid within the data:
SELECT s.Id, s.DYS385, value = v.v, [rank] = ROW_NUMBER() OVER
(PARTITION BY s.Id ORDER BY c.c DESC)
FROM dbo.Something AS s
CROSS APPLY (VALUES(1),(2)) AS c(c)
CROSS APPLY (VALUES(PARSENAME(REPLACE(s.DYS385,'-','.'),c.c))) AS v(v)
ORDER BY s.Id, [rank];
Output:
Id
DYS385
value
rank
3
10-19
10
1
3
10-19
19
2
4
13-16
13
1
4
13-16
16
2
5
13-18
13
1
5
13-18
18
2
6
13-19
13
1
6
13-19
19
2
7
13-17
13
1
7
13-17
17
2
8
13-18
13
1
8
13-18
18
2
9
13-18
13
1
9
13-18
18
2
10
14-19
14
1
10
14-19
19
2
11
13-19
13
1
11
13-19
19
2
12
13-18
13
1
12
13-18
18
2
Example db<>fiddle
Split_string has an option to output this rank, called ordinal. Just add an extra parameter with the value of 1:
SELECT *FROM STRING_SPLIT('Lorem ipsum dolor sit amet.', ' ', 1);
That returns:
value ordinal
Lorem 1
ipsum 2
dolor 3
sit 4
amet. 5
In your case the query would be:
select top 10 id,[DYS385],t.* from dys OUTER APPLY(select * from string_split([DYS385],'-',1)) t where dys385 is not null
We can also make recursive split_string function which returns the rank of each item as follows
Link dbfiddle
Create function Split_Recursive(#string nvarchar(max),#delimiter as varchar(1)) returns Table
as
return(
with cte as(
select 1 N,
case when charindex(#delimiter,#string,1) =0 then #string else
substring(#string,1,-1+charindex(#delimiter,#string,1)) end [Found],
case when charindex(#delimiter,#string,1)=0 then #string else substring(#string,1+charindex(#delimiter,#string,1) ,len(#string))
end [MYSTRING]
union all
select 1+n,
case when charindex(#delimiter,[MYSTRING],1)=0 then [MYSTRING] else
substring([MYSTRING],1,-1+charindex(#delimiter,[MYSTRING],1)) end,substring([MYSTRING],1+charindex(#delimiter,[MYSTRING]),len([MYSTRING])) from cte
where charindex(#delimiter,[MYSTRING],1)>0),
cte2 as (select N,Found from cte
union select 1+t.N,Mystring from cte OUTER APPLY(select top 1 N from cte order by n desc)t where t.n=cte.n
)
select N Rank,Found from cte2
)
go
select * from dbo.Split_Recursive('12-16','-')
Output
Rank
Found
1
12
2
16

How to Group Items in a Count statement

I'm trying to create a query that will return Total Claims reported in 0-3 days, 4-7 days, 8-14 days, and 15+
Select DATEDiff(DD,LossDate,DateReported) As TimeToReport,Count(ClaimId) As Num from LossRun
where PolicyNum='1234567890'
And PolTerm='201403'
Group By DATEDiff(DD,LossDate,DateReported)
order by DATEDiff(DD,LossDate,DateReported);
This is what i get
TimeToReport NumofClaims
0 5
1 3
2 1
3 4
4 3
5 2
6 2
7 2
8 1
12 1
13 1
14 2
15 2
48 1
52 1
107 1
121 1
147 1
533 1
Basically i want to see the total for 0-3, 4-7,8-14,and the rest,,,, timeToReport
You can try to use SUM with CASW WHEN
select
SUM(CASW WHEN TimeToReport <= 3 THEN NumofClaims ELSE 0 END) '0~3 day',
SUM(CASW WHEN TimeToReport >= 4 AND TimeToReport <=7 THEN NumofClaims END) '4-7 days',
SUM(CASW WHEN TimeToReport >= 8 AND TimeToReport <=14 THEN NumofClaims ELSE 0 END) '8-14 days',
SUM(CASW WHEN TimeToReport >= 15 THEN NumofClaims ELSE 0 END) '15+ day'
from (
Select DATEDiff(DD,LossDate,DateReported) As TimeToReport,Count(ClaimId) As Num
from LossRun
where PolicyNum='1234567890'
And PolTerm='201403'
Group By DATEDiff(DD,LossDate,DateReported)
) t
The most simple way is going to be by creating your own temp table which includes the min and max for each bucket and then joining to it.
declare #t table (OrderedID int, EmpID int, EffDate date, Salary money)
insert into #t
values
(1,1234,'20150101',1)
,(2,1234,'20160101',2)
,(3,1234,'20170101',8)
,(4,1234,'20180101',15)
,(1,2351,'20150101',17)
,(5,1234,'20190101',4)
,(5,1234,'20190101',2)
,(5,1234,'20190101',9)
declare #Bin table (MinVal int, MaxVal int)
insert into #Bin
values
(1,3)
,(4,6)
,(7,9)
,(10,15)
,(15,20)
,(20,25)
Select
B.MinVal,count(T.EmpID) as EmpsInBin
From #t T
inner join #Bin B on T.Salary between B.MinVal and B.MaxVal
group by B.MinVal
Output
MinVal EmpsInBin
1 3
4 1
7 2
10 1
15 2

Get Min and Max Dates from SQL Server SQL

Data in my table looks like this
PAY_END_DT Sal
10/27/2013 0
11/10/2013 0
11/24/2013 2473.14
12/08/2013 0
01/19/2014 0
02/02/2014 0
02/16/2014 0
My desired result should be like as below
10/27/2013 11/10/2013
12/08/2013 02/16/2014
I need a SQL to generate this result set.. please help
SELECT
employee_id,
MIN(pay_end_dt) AS island_min_pay_end_dt,
MAX(pay_end_dt) AS island_max_pay_end_dt
FROM
(
SELECT
pay_end_dt,
ROW_NUMBER() OVER (PARTITION BY employee_id,
ORDER BY pay_end_dt ) AS full_set_ordinal,
ROW_NUMBER() OVER (PARTITION BY employee_id, sal
ORDER BY pay_end_dt ) AS zero_set_ordinal
FROM
yourTable
)
AS sorted_set
WHERE
sal = 0
GROUP BY
employee_id,
full_set_ordinal - zero_set_ordinal
;
Using your data as an example:
PAY_END_DT Sal FULL_SET_ORIDINAL ZERO_SET_ORDINAL "FULL - ZERO"
10/27/2013 0 1 1 0
11/10/2013 0 2 2 0
11/24/2013 2473.14 3 1 2
12/08/2013 0 4 3 1
01/19/2014 0 5 4 1
02/02/2014 0 6 5 1
02/16/2014 0 7 6 1
Which then allows us to include only the rows WHERE sal = 0 and then GROUP BY "FULL - ZERO" to get our two sets, and then finally apply the MIN() and MAX() functions.
As stated in a comment, this is known as "Gaps and Islands".
1 1 1 1
0 0 0 0 0
1 2 3 4 5 6 7 8 9 - ordinal from the whole set
1 2 3 4 - ordinal from just the "islands"
1 2 3 4 5 - ordinal from just the "gaps"
2 2 2 3 - ordinal of the "islands" (whole_set_id - islands_id)
0 0 3 4 4 - ordinal of the "gaps" (whole_set_id - gaps_id)
;)
SELECT "10/27/2013 11/10/2013"
UNION
SELECT "12/08/2013 02/16/2014"

SQL Server NTILE - Same value in different quartile

I have a scenario where i'm splitting a number of results into quartilies using the SQL Server NTILE function below. The goal is to have an as equal number of rows in each class
case NTILE(4) over (order by t2.TotalStd)
when 1 then 'A' when 2 then 'B' when 3 then 'C' else 'D' end as Class
The result table is shown below and there is a (9,9,8,8) split between the 4 class groups A,B,C and D.
There are two results which cause me an issue, both rows have a same total std value of 30 but are assigned to different quartiles.
8 30 A
2 30 B
I'm wondering is there a way to ensure that rows with the same value are assigned to the same quartile? Can i group or partition by another column to get this behaviour?
Pos TotalStd class
1 16 A
2 23 A
3 21 A
4 29 A
5 25 A
6 26 A
7 28 A
8 30 A
9 29 A
1 31 B
2 30 B
3 32 B
4 32 B
5 34 B
6 32 B
7 34 B
8 32 B
9 33 B
1 36 C
2 35 C
3 35 C
4 35 C
5 40 C
6 38 C
7 41 C
8 43 C
1 43 D
2 48 D
3 45 D
4 47 D
5 44 D
6 48 D
7 46 D
8 57 D
You will need to re create the Ntile function, using the rank function.
The rank function gives the same rank for rows with the same value. The value later 'jumps' to the next rank as if you used row_number.
We can use this behavior to mimic the Ntile function, forcing it to give the same Ntile value to rows with the same value. However - this will cause the Ntile partitions to be with a different size.
See the example below for the new Ntile using 4 bins:
declare #data table ( x int )
insert #data values
(1),(2),
(2),(3),
(3),(4),
(4),(5)
select
x,
1+(rank() over (order by x)-1) * 4 / count(1) over (partition by (select 1)) as new_ntile
from #data
Results:
x new_ntile
---------------
1 1
2 1
2 1
3 2
3 2
4 3
4 3
5 4
Not sure what you're expecting to happen here, really. SQL Server has divided up the data into 4 groups of as-equal-size-as-possible, as you asked. What do you want to happen? Have a look at this example:
declare #data table ( x int )
insert #data values
(1),(2),
(2),(3),
(3),(4),
(4),(5)
select
x,
NTILE(4) over (order by x) as ntile
from #data
Results:
x ntile
----------- ----------
1 1
2 1
2 2
3 2
3 3
4 3
4 4
5 4
Now every ntile group shares a value with the one(s) next to it! But what else should it do?
Try this:
; with a as (
       select TotalStd,Class=case ntile(4)over( order by TotalStd )
                                when 1 then 'A'
                                when 2 then 'B'
                                when 3 then 'C'
                                when 4 then 'D'
                                end
                from t2
                group by TotalStd
)
select d.*, a.Class from t2 d
inner join a on a.TotalStd=d.TotalStd
order by Class,Pos;
Here we have a table of 34 rows.
DECLARE #x TABLE (TotalStd INT)
INSERT #x (TotalStd) VALUES (16), (21), (23), (25), (26), (28), (29), (29), (30), (30), (31), (32), (32), (32), (32), (33), (34),
(34), (35), (35), (35), (36), (38), (40), (41), (43), (43), (44), (45), (46), (47), (48), (48), (57)
SELECT '#x', TotalStd FROM #x ORDER BY TotalStd
We want to divide into quartiles. If we use NTILE, the bucket sizes will be roughly the same size (8 to 9 rows each) but ties are broken arbitrarily:
SELECT '#x with NTILE', TotalStd, NTILE(4) OVER (ORDER BY TotalStd) quantile FROM #x
See how 30 appears twice: once in quantile 1 and once in quantile 2. Similarly, 43 appears both in quantiles 3 and 4.
What I ought to find is 10 items in quantile 1, 8 in quantile 2, 7 in quantile 3 and 9 in quantile 4 (i.e. not a perfect 9-8-9-8 split, but such a split is impossible if we are not allowed to break ties arbitrarily). I can do it using NTILE to determine cutoff points in a temporary table:
DECLARE #cutoffs TABLE (quantile INT, min_value INT, max_value INT)
INSERT #cutoffs (quantile, min_value)
SELECT y.quantile, MIN(y.TotalStd)
FROM (SELECT TotalStd, NTILE(4) OVER (ORDER BY TotalStd) AS quantile FROM #x) y
GROUP BY y.quantile
-- The max values are the minimum values of the next quintiles
UPDATE c1 SET c1.max_value = ISNULL(C2.min_value, (SELECT MAX(TotalStd) + 1 FROM #x))
FROM #cutoffs c1 LEFT OUTER JOIN #cutoffs c2 ON c2.quantile - 1 = c1.quantile
SELECT '#cutoffs', * FROM #cutoffs
We'll use the the boundary values in the #cutoffs table to create the final table:
SELECT x.TotalStd, c.quantile FROM #x x
INNER JOIN #cutoffs c ON x.TotalStd >= c.min_value AND x.TotalStd < c.max_value

Partition data into subgroups based on bit fields

I have the first 4 columns of data, and I wan't to use the Ranking functions in the SQL 2008 R2 to derive the fifth column. What's the best way to partition the data into subgroups based on the nextiteminsubgroup and previousiteminsubgroup fields?
Group OrderInGroup NextItemInSubGroup PreviousItemInSubGroup SubGroup
1 1 1 0 1
1 2 1 1 1
1 3 1 1 1
1 4 0 1 1
1 5 0 0 2
1 6 0 0 3
1 7 1 0 4
1 8 1 1 4
1 9 0 1 4
2 1 0 0 1
2 2 0 0 2
2 3 0 0 3
2 4 1 0 4
2 5 0 1 4
3 1 0 0 1
4 1 0 0 1
4 2 0 0 2
4 3 0 0 3
A recursive CTE solution:
DECLARE #t TABLE
([Group] INT
,OrderInGroup INT
,NextItemInSubGroup INT
,PreviousItemInSubGroup INT
,SubGroup INT
)
INSERT #t
VALUES
(1,1,1,0,1),(1,2,1,1,1),(1,3,1,1,1),(1,4,0,1,1),(1,5,0,0,2),(1,6,0,0,3),
(1,7,1,0,4),(1,8,1,1,4),(1,9,0,1,4),(2,1,0,0,1),(2,2,0,0,2),(2,3,0,0,3),
(2,4,1,0,4),(2,5,0,1,4),(3,1,0,0,1),(4,1,0,0,1),(4,2,0,0,2),(4,3,0,0,3)
;WITH recCTE
AS
(
SELECT [Group], OrderInGroup,NextItemInSubGroup , PreviousItemInSubGroup, 1 AS subgroup
FROM #t
WHERE OrderInGroup = 1
UNION ALL
SELECT r.[Group], t.OrderInGroup,t.NextItemInSubGroup , t.PreviousItemInSubGroup,
CASE WHEN r.NextItemInSubGroup = 1 THEN r.subgroup ELSE r.subgroup + 1 END
FROM recCTE AS r
JOIN #t AS t
ON t.[Group] = r.[Group]
AND t.OrderInGroup = r.OrderInGroup + 1
)
SELECT * FROM recCTE
ORDER BY [Group],OrderInGroup ;
P.S. it's best practice to avoid using SQL keywords (e.g. GROUP) as table/column names
Seems like 0 and 0 restart the ranking.
Select
Rank() Over (
Partition By
[Group]
, Case When [NextItemInSubGroup] + [PreviousItemInSubGroup] = 0
Then 0
Else 1
End
Order By [OrderInGroup]
) as [SubGroup]
From Your_Table;

Resources