With a table as:
Name Num Value
----------------------
Peter 10 10
Mary 10,15 5,10
John 5,20 10,20
How can I get a result as the follow table with a View ?
Name Num Value
------------------
Peter 10 10
Mary 10 5
Mary 15 10
John 5 10
John 20 20
Notice Mary and John have multiple data (comma-delimited).
I've a function to do the split of a cell and returns a table but just for a specific row and cell, hos to iterate from all table ?
Aditional Info:
SELECT NAM.NAME, Data AS NUM, VAL.VALUE
FROM dbo.Split((SELECT NUM FROM t1 WHERE LineNum = 2), ','))
CROSS APPLY (
SELECT Id As Id1, Data AS VALUE
FROM dbo.Split((SELECT VALUE FROM t1 WHERE LineNum = 2), ','))
) AS VAL
CROSS APPLY (
SELECT NAME FROM t1 WHERE LineNum = 2
) AS NAM
WHERE Id = Id1
Note: LineNum is row number from t1
From Second Row of t1 (Mary | 10,15 | 5,10)
Previous Function returns table as:
Name Num Value
------------------
Mary 10 5
Mary 15 10
Split Function from a cell (10,15) returns:
Id Data
-----------
1 10
2 15
===============================================
My Split Function:
CREATE FUNCTION [dbo].[Split]
(
#String NVARCHAR(4000),
#Delimiter NCHAR(1)
)
RETURNS TABLE
AS
RETURN
(
WITH Split(stpos,endpos)
AS(
SELECT 0 AS stpos, CHARINDEX(#Delimiter,#String) AS endpos
UNION ALL
SELECT endpos+1, CHARINDEX(#Delimiter,#String,endpos+1)
FROM Split
WHERE endpos > 0
)
SELECT 'Id' = ROW_NUMBER() OVER (ORDER BY (SELECT 1)),
'Data' = SUBSTRING(#String,stpos,COALESCE(NULLIF(endpos,0),LEN(#String)+1)-stpos)
FROM Split
)
Declare #YourTable table (Name varchar(50), Num varchar(50), Value varchar(50))
Insert into #YourTable values
('Peter','10', '10'),
('Mary','10,15','5,10'),
('John','5,20', '10,20')
Select A.Name
,B.*
From #YourTable A
Cross Apply (
Select Num = A.RetVal
,Value = B.RetVal
From (
Select RetSeq = Row_Number() over (Order By (Select null))
,RetVal = LTrim(RTrim(B.i.value('(./text())[1]', 'varchar(max)')))
From (Select x = Cast('<x>'+ Replace(A.Num,',','</x><x>')+'</x>' as xml).query('.')) as A
Cross Apply x.nodes('x') AS B(i)
) A
Join (
Select RetSeq = Row_Number() over (Order By (Select null))
,RetVal = LTrim(RTrim(B.i.value('(./text())[1]', 'varchar(max)')))
From (Select x = Cast('<x>'+ Replace(A.Value,',','</x><x>')+'</x>' as xml).query('.')) as A
Cross Apply x.nodes('x') AS B(i)
) B on A.RetSeq=B.RetSeq
) B
Returns
Name Num Value
Peter 10 10
Mary 10 5
Mary 15 10
John 5 10
John 20 20
Edit - Another option with a UDF
Select A.Name
,B.*
From #YourTable A
Cross Apply (
Select Num = A.RetVal
,Value = B.RetVal
From [dbo].[udf-Str-Parse-8K](A.Num,',') A
Join [dbo].[udf-Str-Parse-8K](A.Value,',') B
on A.RetSeq=B.RetSeq
) B
The fastest UDF
CREATE FUNCTION [dbo].[udf-Str-Parse-8K] (#String varchar(max),#Delimiter varchar(10))
Returns Table
As
Return (
with cte1(N) As (Select 1 From (Values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N(N)),
cte2(N) As (Select Top (IsNull(DataLength(#String),0)) Row_Number() over (Order By (Select NULL)) From (Select N=1 From cte1 a,cte1 b,cte1 c,cte1 d) A ),
cte3(N) As (Select 1 Union All Select t.N+DataLength(#Delimiter) From cte2 t Where Substring(#String,t.N,DataLength(#Delimiter)) = #Delimiter),
cte4(N,L) As (Select S.N,IsNull(NullIf(CharIndex(#Delimiter,#String,s.N),0)-S.N,8000) From cte3 S)
Select RetSeq = Row_Number() over (Order By A.N)
,RetVal = Substring(#String, A.N, A.L)
From cte4 A
);
--Much faster than str-Parse, but limited to 8K
--Select * from [dbo].[udf-Str-Parse-8K]('Dog,Cat,House,Car',',')
Related
I have a table in MS SQL Server, where are some null values in column "value"
Group ID Value
A 1 10
A 2
A 3
A 4 40
B 1
B 2 20
B 3 30
B 4
I want to update null values by not null in the same group with with the first higher ID, or if there is not any higher in same group, first lower. So the result should look like this.
Group ID Value
A 1 10
A 2 40
A 3 40
A 4 40
B 1 20
B 2 20
B 3 30
B 4 30
Thanks!
You can use windowed version of SUM function in order to determine islands of NULL valued records along with the record having the higher ID in the same group:
SELECT [Group], ID, Value,
SUM(CASE WHEN Value IS NULL THEN 0 ELSE 1 END) OVER
(PARTITION BY [Group] ORDER BY ID DESC) AS grp
FROM mytable
Output:
Group ID Value grp
-----------------------
A 4 40 1
A 3 30 2
A 2 NULL 2
A 1 NULL 2
B 4 40 1
B 3 NULL 1
B 2 20 2
B 1 10 3
You can now wrap the above query in a CTE and use another CTE to do the update:
;WITH CTE AS (
SELECT [Group], ID, Value,
SUM(CASE WHEN Value IS NULL THEN 0 ELSE 1 END) OVER
(PARTITION BY [Group] ORDER BY ID DESC) AS grp
FROM mytable
), ToUpdate AS (
SELECT [Group], ID, Value,
MAX(Value) OVER (PARTITION BY [Group], grp) AS group_value
FROM CTE
)
UPDATE ToUpdate
SET Value = group_value
WHERE Value IS NULL
Demo here
Edit:
The above query doesn't handle the edge case where the very last record within a Group slice is NULL. To handle this case as well you can use the following query:
;WITH CTE AS (
SELECT [Group], ID, Value,
SUM(CASE WHEN Value IS NULL THEN 0 ELSE 1 END) OVER
(PARTITION BY [Group] ORDER BY ID DESC) AS grp,
SUM(CASE WHEN Value IS NULL THEN 0 ELSE 1 END) OVER
(PARTITION BY [Group] ORDER BY ID) AS grp2
FROM mytable
), ToUpdate AS (
SELECT [Group], ID, Value,
MAX(Value) OVER (PARTITION BY [Group], grp) AS group_value,
MAX(Value) OVER (PARTITION BY [Group], grp2) AS group_value2
FROM CTE
)
UPDATE ToUpdate
SET Value = COALESCE(group_value, group_value2)
WHERE Value IS NULL
Demo here
Please try this-
DATA GENERATION
DECLARE #T TABLE
(
GroupCd CHAR(1),
Id INT,
Value INT
)
INSERT INTO #T
VALUES('A',1,10),
('A',2,NULL),
('A',3,NULL),
('A',4,40),
('B',1,NULL),
('B',2,20),
('B',3,30),
('B',4,NULL)
SOLUTION
UPDATE a
SET a.Value = b.Value
FROM #T a
INNER JOIN
(
SELECT a.GroupCd,a.Id,Coalesce(a.Value,z.Value,z1.Value) Value
FROM #T a
OUTER APPLY
(
SELECT TOP 1 Value
FROM #T b
WHERE a.GroupCd = b.GroupCd
AND b.Value IS NOT NULL AND a.Id < b.Id
ORDER BY Id
)z
OUTER APPLY
(
SELECT TOP 1 Value
FROM #T b
WHERE a.GroupCd = b.GroupCd
AND b.Value IS NOT NULL AND a.Id > b.Id
ORDER BY Id DESC
)z1
)b ON a.GroupCd = b.GroupCd AND a.Id = b.Id
SELECT * FROM #T
OUTPUT
GroupCd Id Value
------- ----------- -----------
A 1 10
A 2 40
A 3 40
A 4 40
B 1 20
B 2 20
B 3 30
B 4 30
(8 rows affected)
You Can try This simple Method
DECLARE #T TABLE
(
GroupCd CHAR(1),
Id INT,
Value INT
)
INSERT INTO #T
VALUES('A',1,NULL),
('A',2,NULL),
('A',3,30),
('A',4,40),
('B',1,10),
('B',2,20),
('B',3,NULL),
('B',4,40)
SELECT
*,
NewVal = COALESCE(Value,(SELECT TOP 1 Value FROM #T WHERE GroupCd = T.GroupCd AND Id > T.Id AND Value IS NOT NULL ORDER BY Id ASC))
FROM #T T
My Result
update MY_TABLE set [value] = [newValue] from (
select [Group] [newGroup],
[Value] [newValue]
from (
select [Group], [Value],
row_number() over (partition by [group] order by [Id] desc) [rn]
from MY_TABLE
where [Value] is not null
) [a] where [rn] = 1
) where [Group] = [newGroup] and [Value] is null
I have a table with like this:
ID | FIX_1 | FTO | FIX_2 |
_____________________________________________________
1 | | 15452 |1.3-1.7-1.8-2.4-2.0 |
2 | | 15454 |1.4-1.1-1.4-2.7-2.6-1.8-2.4 |
3 | | 15454 |1.9-1.3-1.3 |
.... ...... .... .................
.... ...... .... .................
100 | | 15552 |0.4-1.7-1.2-2.1-2.6-1.6 |
I need do a select with FIX_1 field equal to the average of the hyphen separated values in FIX_2 field.
Is it possible with T-SQL without use of temporary table?
Thanks in advance
Option with a UDF
Declare #YourTable table (ID int,FIX_1 money,FTO int,FIX_2 varchar(max))
Insert Into #YourTable values
(1,null,15452,'1.3-1.7-1.8-2.4-2.0'),
(2,null,15454,'1.4-1.1-1.4-2.7-2.6-1.8-2.4'),
(3,null,15454,'1.9-1.3-1.3')
Update #YourTable Set FIX_1=B.Value
From #YourTable A
Cross Apply (
Select Value = Avg(cast(RetVal as money))
From (Select * from [dbo].[udf-Str-Parse](A.FIX_2,'-')) B1
) B
Select * From #YourTable
Option without a UDF
Update #YourTable Set FIX_1=B.Value
From #YourTable A
Cross Apply (
Select Value = Avg(cast(RetVal as money))
From (
Select RetSeq = Row_Number() over (Order By (Select null))
,RetVal = LTrim(RTrim(B.i.value('(./text())[1]', 'varchar(max)')))
From (Select x = Cast('<x>' + replace((Select replace(A.FIX_2,'-','§§Split§§') as [*] For XML Path('')),'§§Split§§','</x><x>')+'</x>' as xml).query('.')) as A
Cross Apply x.nodes('x') AS B(i)
) B1
) B
Both would Return
ID FIX_1 FTO FIX_2
1 1.84 15452 1.3-1.7-1.8-2.4-2.0
2 1.9142 15454 1.4-1.1-1.4-2.7-2.6-1.8-2.4
3 1.50 15454 1.9-1.3-1.3
The UDF if Needed
CREATE FUNCTION [dbo].[udf-Str-Parse] (#String varchar(max),#Delimiter varchar(10))
Returns Table
As
Return (
Select RetSeq = Row_Number() over (Order By (Select null))
,RetVal = LTrim(RTrim(B.i.value('(./text())[1]', 'varchar(max)')))
From (Select x = Cast('<x>' + replace((Select replace(#String,#Delimiter,'§§Split§§') as [*] For XML Path('')),'§§Split§§','</x><x>')+'</x>' as xml).query('.')) as A
Cross Apply x.nodes('x') AS B(i)
);
--Thanks Shnugo for making this XML safe
--Select * from [dbo].[udf-Str-Parse]('Dog,Cat,House,Car',',')
--Select * from [dbo].[udf-Str-Parse]('John Cappelletti was here',' ')
--Select * from [dbo].[udf-Str-Parse]('this,is,<test>,for,< & >',',')
with a user defined function...
create FUNCTION dbo.AvgOfDashSepVals ( #vals varchar(500))
returns float as
BEGIN
declare #avg decimal
declare #cnt int = 0
declare #sum float = 0.0
While charIndex('-', #vals) > 0 Begin
if isnumeric(left(#vals, charIndex('-', #vals)-1)) = 0
return null
set #cnt+= 1
set #sum += cast(left(#vals, charIndex('-', #vals)-1) as float)
set #vals = substring(#vals, charIndex('-', #vals)+1, len(#vals))
End
RETURN case #cnt when 0 then null else #sum / #cnt end
then alter your table to add a computed column.
alter table myTable
add Fix_1 as ([dbo].[AvgOfDashSepVals]([Fix_2]))
Declare #YourTable table (ID int,FIX_1 money,FTO int,FIX_2 varchar(max))
Insert Into #YourTable values
(1,null,15452,'1.3-1.7-1.8-2.4-2.0'),
(2,null,15454,'1.4-1.1-1.4-2.7-2.6-1.8-2.4'),
(3,null,15454,'1.9-1.3-1.3'),
(4,null,15454,'1.5')
;WITH cte AS
(
SELECT ID, SUBSTRING(FIX_2, 1, CHARINDEX('-',FIX_2) - 1) AS VALUE, SUBSTRING(FIX_2, CHARINDEX('-',FIX_2) + 1, LEN(FIX_2)) AS NEW_FIX_2
FROM #YourTable
WHERE CHARINDEX('-',FIX_2) > 0
UNION ALL
SELECT cte.ID, SUBSTRING(NEW_FIX_2, 1, CHARINDEX('-',NEW_FIX_2) - 1) AS VALUE, SUBSTRING(NEW_FIX_2, CHARINDEX('-',NEW_FIX_2) + 1, LEN(NEW_FIX_2)) AS NEW_FIX_2
FROM #YourTable y
JOIN cte ON cte.ID = y.ID
WHERE CHARINDEX('-', NEW_FIX_2) > 0
UNION ALL
SELECT ID, NEW_FIX_2, NULL
FROM cte
WHERE CHARINDEX('-', NEW_FIX_2) = 0
)
SELECT t.ID, ISNULL(v.VALUE, t.FIX_2) AS FIX_1, t.FTO, t.FIX_2
FROM #YourTable t
LEFT JOIN (
SELECT cte.ID, AVG(CAST(cte.VALUE AS MONEY)) AS VALUE
FROM cte
GROUP BY cte.ID
) v ON v.ID = t.ID
I am working with an employee hierarchy string that is in the format of the following. These number represent employeeID numbers and how the are structured within the company, thus being able to follow the chain of management.
123|456|789|012|345|320
I am trying to take this string of data and turn it into a temp table so I can work with each of the ID's as their own value.
I tried making a function to split the string:
ALTER FUNCTION [dbo].[SplitString]
(#String NVARCHAR(4000),
#Delimiter NCHAR(1))
RETURNS TABLE
AS
RETURN
(WITH Split(stpos, endpos) AS
(
SELECT 0 AS stpos, CHARINDEX(#Delimiter, #String) AS endpos
UNION ALL
SELECT endpos + 1, CHARINDEX(#Delimiter, #String, endpos+1)
FROM Split
WHERE endpos > 0
)
SELECT
'Id' = ROW_NUMBER() OVER (ORDER BY (SELECT 1)),
'Data' = SUBSTRING(#String, stpos, COALESCE(NULLIF(endpos, 0), LEN(#String) + 1))
FROM
Split
)
This however resulted in the following:
Id Data
-------------------
1 123
2 456|7893
3 7893|012|345|
4 012|345|320
5 345|320
6 320
Is there a better way to approach this, maybe not needing a function at all or will it be required to achieve this?
Without a Parse Function
Declare #YourTable table (ID int,IDList varchar(Max))
Insert Into #YourTable values
(1,'123|456|789|012|345|320'),
(2,'123|456')
Select A.ID
,B.*
From #YourTable A
Cross Apply (
Select RetSeq = Row_Number() over (Order By (Select null))
,RetVal = LTrim(RTrim(B.i.value('(./text())[1]', 'varchar(max)')))
From (Select x = Cast('<x>'+ replace((Select A.IDList as [*] For XML Path('')),'|','</x><x>')+'</x>' as xml).query('.')) as A
Cross Apply x.nodes('x') AS B(i)
) B
Returns
ID RetSeq RetVal
1 1 123
1 2 456
1 3 789
1 4 012
1 5 345
1 6 320
2 1 123
2 2 456
OR with the SUPER DUPER Parse (orig source listed below / couple of tweaks)
Select A.ID
,B.*
From #YourTable A
Cross Apply [dbo].[udf-Str-Parse-8K](A.IDList,'|') B
Would Return the same as above
CREATE FUNCTION [dbo].[udf-Str-Parse-8K] (#String varchar(max),#Delimiter varchar(10))
Returns Table
As
Return (
with cte1(N) As (Select 1 From (Values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N(N)),
cte2(N) As (Select Top (IsNull(DataLength(#String),0)) Row_Number() over (Order By (Select NULL)) From (Select N=1 From cte1 a,cte1 b,cte1 c,cte1 d) A ),
cte3(N) As (Select 1 Union All Select t.N+DataLength(#Delimiter) From cte2 t Where Substring(#String,t.N,DataLength(#Delimiter)) = #Delimiter),
cte4(N,L) As (Select S.N,IsNull(NullIf(CharIndex(#Delimiter,#String,s.N),0)-S.N,8000) From cte3 S)
Select RetSeq = Row_Number() over (Order By A.N)
,RetVal = Substring(#String, A.N, A.L)
From cte4 A
);
--Orginal Source http://www.sqlservercentral.com/articles/Tally+Table/72993/
--Much faster than str-Parse, but limited to 8K
--Select * from [dbo].[udf-Str-Parse-8K]('Dog,Cat,House,Car',',')
--Select * from [dbo].[udf-Str-Parse-8K]('John||Cappelletti||was||here','||')
Edit - Stand Alone
Declare #String varchar(max) = '123|456|789|012|345|320'
Declare #Delim varchar(10) = '|'
Select RetSeq = Row_Number() over (Order By (Select null))
,RetVal = LTrim(RTrim(B.i.value('(./text())[1]', 'varchar(max)')))
From (Select x = Cast('<x>'+ replace((Select #String as [*] For XML Path('')),#Delim,'</x><x>')+'</x>' as xml).query('.')) as A
Cross Apply x.nodes('x') AS B(i)
If you need a string "splitter" the fastest one available for 2012 (pre- 2016) is going to be found here. This will blow the doors off of anything posted thusfar. If your items/tokens are all the same size then an even faster method would be this:
DECLARE #yourstring varchar(8000) = '123|456|789|012|345|320';
WITH E(N) AS (SELECT 1 FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1))t(v)),
iTally(N) AS (SELECT TOP ((LEN(#yourstring)/4)+1) ROW_NUMBER() OVER (ORDER BY (SELECT 1))
FROM e a, e b, e c, e d)
SELECT itemNumber = ROW_NUMBER() OVER (ORDER BY N), item = SUBSTRING(#yourstring, ((N*4)-3), 3)
FROM iTally;
Results:
itemNumber item
-------------------- ----
1 123
2 456
3 789
4 012
5 345
6 320
I write more about this and provide examples of how to put this logic into a function here.
I use this version of the Split function.
CREATE FUNCTION [dbo].[Split]
(
#delimited nvarchar(max),
#delimiter nvarchar(100)
) RETURNS #t TABLE
(
-- Id column can be commented out, not required for sql splitting string
id int identity(1,1), -- I use this column for numbering splitted parts
val nvarchar(max)
)
AS
BEGIN
declare #xml xml
set #xml = N'<root><r>' + replace(#delimited,#delimiter,'</r><r>') + '</r></root>'
insert into #t(val)
select
r.value('.','varchar(max)') as item
from #xml.nodes('//root/r') as records(r)
RETURN
END
You Query would look something like....
SELECT *
FROM TableName t
CROSS APPLY [dbo].[Split](t.EmpIDs, '|')
I need a help on writing an optimal query for the below problem. Have attached the query I have with me but it is highly utilizing resources.
Below is the code to achieve above said logic. Please suggest some optimal way to achieve the same
-- drop table #me
create table #ME (memid int , EffectiveDate datetime , termdate datetime)
Insert into #ME values ('123','3-Dec-16','10-Jan-17')
Insert into #ME values ('123','11-Jan-17','6-Feb-17')
Insert into #ME values ('123','7-Feb-17','5-Mar-17')
Insert into #ME values ('123','8-Mar-17','15-Apr-17')
Insert into #ME values ('123','16-Apr-17','24-May-17')
--drop table #dim
select * from #ME
declare #StartDate datetime , #CutoffDate datetime
select #StartDate= min(effectivedate),#CutoffDate = max(termdate) From #me where termdate<>'9999-12-31 00:00:00.000'
SELECT d
into #dim
FROM
(
SELECT d = DATEADD(DAY, rn - 1, #StartDate)
FROM
(
SELECT TOP (DATEDIFF(DAY, #StartDate, #CutoffDate))
rn = ROW_NUMBER() OVER (ORDER BY s1.[object_id])
FROM sys.all_objects AS s1
CROSS JOIN sys.all_objects AS s2
-- on my system this would support > 5 million days
ORDER BY s1.[object_id]
) AS x
) AS y;
--drop table #MemEligibilityDateSpread
select MemID, D As DateSpread Into #MemEligibilityDateSpread From #Dim dim JOIN #me ME on dim.d between ME.effectivedate and me.termdate
--drop table #DateClasified
WITH CTE AS
(
SELECT MEmID,
UniqueDate = DateSpread,
DateGroup = DATEADD(dd, - ROW_NUMBER() OVER (PARTITION BY Memid ORDER BY Memid,DateSpread), DateSpread)
FROM #MemEligibilityDateSpread
GROUP BY Memid,DateSpread
)
--===== Now, if we find the MIN and MAX date for each DateGroup, we'll have the
-- Start and End dates of each group of contiguous daes. While we're at it,
-- we can also figure out how many days are in each range of days.
SELECT Memid,
StartDate = MIN(UniqueDate),
EndDate = MAX(UniqueDate)
INTO #DateClasified
FROM cte
GROUP BY Memid,DateGroup
ORDER BY Memid,StartDate
select ME.MemID,ME.EffectiveDate,ME.TermDate,DC.StartDate,DC.EndDate from #DateClasified dc join #me ME ON Me.MemID = dc.MemID
and (ME.EffectiveDate BETWEEN DC.StartDate AND DC.EndDate
OR ME.TermDate BETWEEN DC.StartDate AND DC.EndDate)
In cte0 and cte1, we create an ad-hoc tally/calendar table. Once we have that, it is a small matter to calculate and group by Island.
Currently, the tally is has a max of 10,000 days (27 years), but you can easily expand the tally table by adding , cte0 N5
;with cte0(N) as (Select 1 From (Values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N(N))
,cte1(R,D) as (Select Row_Number() over (Order By (Select Null))
,DateAdd(DD,-1+Row_Number() over (Order By (Select Null)),(Select MinDate=min(EffectiveDate) From #ME))
From cte0 N1, cte0 N2, cte0 N3, cte0 N4)
Select MemID
,EffectiveDate
,TermDate
,SinceFrom = Min(EffectiveDate) over (Partition By Island)
,Tildate = Max(TermDate) over (Partition By Island)
From (
Select *,Island = R - Row_Number() over (Partition By MemID Order by TermDate)
From #ME A
Join cte1 B on D Between EffectiveDate and TermDate
) A
Group By MemID,Island,EffectiveDate,TermDate
Order By 1,2
Returns
MemID EffectiveDate TermDate SinceFrom Tildate
123 2016-12-03 2017-01-10 2016-12-03 2017-03-05
123 2017-01-11 2017-02-06 2016-12-03 2017-03-05
123 2017-02-07 2017-03-05 2016-12-03 2017-03-05
123 2017-03-08 2017-04-15 2017-03-08 2017-05-24
123 2017-04-16 2017-05-24 2017-03-08 2017-05-24
Edit - Now if you want a compressed dataset
Select MemID
,EffectiveDate = Min(EffectiveDate)
,TermDate = Max(TermDate)
From (
Select *,Island = R - Row_Number() over (Partition By MemID Order by TermDate)
From #ME A
Join cte1 B on D Between EffectiveDate and TermDate
) A
Group By MemID,Island
Order By 1,2
Returns
MemID EffectiveDate TermDate
123 2016-12-03 2017-03-05
123 2017-03-08 2017-05-24
I have a column in a table which has incremented values like:
AAA0000001
AAA0000002
... and so on
I want to find if the values stored in this column are in proper sequential order or if any value is missing in between or is deleted.
How can i achieve this?
Assuming the pattern is always: AAA[0-9][0-9][0-9][0-9][0-9][0-9][0-9], you can do this with a Tally Table.
Sample Data:
CREATE TABLE Tbl(val VARCHAR(10))
INSERT INTO Tbl VALUES
('AAA0000001'), ('AAA0000002'), ('AAA0000004'), ('AAA0000011');
val
----------
AAA0000001
AAA0000002
AAA0000004
AAA0000011
SQL Fiddle
;WITH Cte AS(
SELECT *,
num = CAST(SUBSTRING(val, 4, LEN(val) - 3) AS INT)
FROM Tbl
),
E1(N) AS(
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
),
E2(N) AS(SELECT 1 FROM E1 a CROSS JOIN E1 b),
E4(N) AS(SELECT 1 FROM E2 a CROSS JOIN E2 b),
Tally(N) AS(
SELECT TOP(SELECT MAX(num) FROM Cte)
ROW_NUMBER() OVER(ORDER BY (SELECT NULL))
FROM E4
)
SELECT
N,
val = 'AAA' + RIGHT('0000000' + CAST(N AS VARCHAR(7)), 7)
FROM Tally
WHERE NOT EXISTS(
SELECT 1 FROM Cte WHERE num = N
)
RESULT
N val
-------------------- ----------
3 AAA0000003
5 AAA0000005
6 AAA0000006
7 AAA0000007
8 AAA0000008
9 AAA0000009
10 AAA0000010
Explanation:
The first CTE, named as Cte, extracts the numeric part of the strings and CASTs them to INT.
The succeeding CTEs, from E1 to Tally(N) generates a table with sequential values from 1 up to the MAX(num) - the INT return from the first CTE.
The final SELECT just checks for the non-existing num from the first CTE.
'AAA' + RIGHT('0000000' + CAST(N AS VARCHAR(7)), 7) transforms N so that it follows the pattern.
This is a Gaps problem. You can look into this article by Dwain Camps for more solutions on Gaps and Islands.
You can use ROW_NUMBER like this.
Sample Data
DECLARE #tab1 TABLE(id VARCHAR(20));
insert into #tab1 VALUES('AAA0000001'),('AAA0000002'),('AAA0000003'),('AAA0000004'),('AAA0000006'),('AAA0000007'),('AAA0000010');
Query
;WITH CTE as
(
SELECT convert(int,STUFF(id,1,3,'')) id,convert(int,STUFF(id,1,3,'')) - ROW_NUMBER()OVER(ORDER BY convert(int,STUFF(id,1,3,''))) rn
FROM #tab1
),CTE2 as
(
SELECT ROW_NUMBER()OVER(ORDER BY rn) as rn, MIN(id) series_start,MAX(id) series_end
FROM CTE
GROUP BY rn
)
SELECT C2.series_end,C1.series_start
FROM CTE2 C1
INNER JOIN CTE2 C2 ON C1.rn = C2.rn + 1;
SQL Fiddle
Explanation
Output of CTE is the difference of gaps between id values.
Output of CTE2 is the start and end of continuous series of numbers
Final Output gives the start and end of gaps within the series
Output
series_end series_start
4 6
7 10
If the schema is fixed then no need for complex queries. This works:
DECLARE #t TABLE ( v VARCHAR(100) );
INSERT INTO #t
VALUES ( 'AAA0000001' ),
( 'AAA0000002' ),
( 'AAA0000007' ),
( 'AAA0000008' ),
( 'AAA0000010' ),
( 'AAA0000011' ),
( 'AAA0000012' );
SELECT * FROM #t t1
CROSS APPLY(SELECT TOP 1 v FROM #t t2 WHERE t2.v > t1.v ORDER BY v) ca
WHERE RIGHT(t1.v, 7) <> RIGHT(ca.v, 7) - 1
Output:
v v
AAA0000002 AAA0000007
AAA0000008 AAA0000010
In sqlserver 2012, you can use LAG and LEAD
DECLARE #t table(col1 varchar(15))
INSERT #t values('AAA0000001'),('AAA0000002'),('AAA0000004')
SELECT
case when
stuff(lag(col1) over (order by col1), 1,3,'') + 1
= stuff(col1, 1,3,'') then 'Yes' else 'No' end previous_exists,
case when
stuff(lead(col1) over (order by col1), 1,3,'') - 1
= stuff(col1, 1,3,'') then 'Yes' else 'No' end next_exists,
col1
FROM #t
Result:
previous_exists next_exists col1
No Yes AAA0000001
Yes No AAA0000002
No No AAA0000004