How to generate sequence numbers for hierarchical data in sql server - sql-server

I have create a function in sql to get a serial number in hierarchy.I have table called Goals.the structure of table is below
GoalId ParentId Goalstatement
---------- ---------- ----------
1 0 abc
2 0 def
3 1 acc
4 2 efc
5 3 dec
6 0 efc
7 3 jhg
I want to write a function to get the result as
Serial no GoalId ParentId GoalStatement
---------- ---------- ---------- --------------------
1 1 0
2 2 0
3 6 0
1.1 3 1
1.1.1 5 3
1.1.2 7 3
2.1 4 2
----------
I have tried with common table expression
WITH Hierarchy(GoalID, ParentId, Parents)
AS
(
SELECT GoalID, GoalParentID, CAST('' AS VARCHAR(MAX))
FROM Goals AS FirtGeneration
WHERE GoalParentID =0
UNION ALL
SELECT NextGeneration.GoalID, NextGeneration.GoalParentID,
CAST(CASE WHEN Parent.Parents = ''
THEN(CAST(NextGeneration.GoalParentID AS VARCHAR(MAX)))
ELSE(Parent.Parents + '.' + CAST(NextGeneration.GoalParentID AS VARCHAR(MAX)))
END AS VARCHAR(MAX))
FROM Goals AS NextGeneration
INNER JOIN Hierarchy AS Parent ON NextGeneration.GoalParentID = Parent.GoalID
)
SELECT *
FROM Hierarchy
OPTION(MAXRECURSION 32767)
Can any one help me to write a function to create serial number in a hierarchical way

Your recursive CTE is quite close but you need to add in a ROW_NUMBER() in order to generate the sequential numbers at each level of the hierarchy. Try this;
DECLARE #Goals TABLE (GoalId INT, GoalParentID INT, Goalstatement VARCHAR(100))
INSERT #Goals VALUES
(1, 0, 'abc'),
(2, 0, 'def'),
(3, 1, 'acc'),
(4, 2, 'efc'),
(5, 3, 'dec'),
(6, 0, 'efc'),
(7, 3, 'jhg')
;WITH NumberedGoals(GoalId, GoalParentID, Goalstatement, GoalSequence) AS (
SELECT
GoalId, GoalParentID, Goalstatement, ROW_NUMBER() OVER (PARTITION BY GoalParentID ORDER BY GoalId) AS GoalSequence
FROM
#Goals
), Hierarchy(GoalID, GoalParentID, GoalSequence, Parents)
AS
(
SELECT GoalID, GoalParentID, GoalSequence, CAST(GoalSequence AS VARCHAR(MAX))
FROM NumberedGoals AS FirtGeneration
WHERE GoalParentID = 0
UNION ALL
SELECT NextGeneration.GoalID, NextGeneration.GoalParentID, NextGeneration.GoalSequence,
CAST(CASE WHEN Parent.Parents = ''
THEN(CAST(NextGeneration.GoalSequence AS VARCHAR(MAX)))
ELSE(Parent.Parents + '.' + CAST(NextGeneration.GoalSequence AS VARCHAR(MAX)))
END AS VARCHAR(MAX))
FROM NumberedGoals AS NextGeneration
INNER JOIN Hierarchy AS Parent ON NextGeneration.GoalParentID = Parent.GoalID
)
SELECT h.Parents as [Serial no], h.GoalId, h.GoalParentId, g.GoalStatement
FROM Hierarchy h
JOIN #Goals g ON g.GoalID = h.GoalID
OPTION (MAXRECURSION 32767)

;with Hierarchy
as
(
select GoalID,
ParentId,
Row_Number() over(partition by ParentId order by GoalID) as number,
cast(Row_Number() over(partition by ParentId order by GoalID) as nvarchar(200)) newnumber
from Goals where ParentId = 0
Union All
Select p.GoalId,
p.ParentId,
Row_Number() over(partition by p.ParentId order by p.GoalID) as number,
cast(cte.newnumber + '.' + cast(Row_Number() over(partition by p.ParentId order by p.GoalID) as nvarchar(200)) as nvarchar(200)) newnumber
From Goals p
Join Hierarchy cte On cte.GoalId = p.ParentId
)
select * from Hierarchy

Related

How to group ID's by score in different types in SQL Server

I have a table like this:
ID Type Score
-------------------
5 1 100
8 1 200
3 1 300
8 2 100
3 2 200
5 2 300
How do I sort them by descending score (to give them a ranking for that Type) and then create a table which a column for each Type where the ID's positions are shown such as:
ID Type1 Type2
--------------------
3 1st 2nd
5 3rd 1st
8 2nd 3rd
So far I am able able to do this by explicitly declaring the Type number such as:
SELECT ROW_NUMBER() OVER(ORDER BY Score DESC) AS Rank, ID
FROM Table
WHERE Type = 1
This returns a rank for each ID when Type is 1.
How do I join this together with the same result when Type is 2? And how do I do this for any number of types?
There are a number of ways to tackled this. My choice would be to use conditional aggregation. Here is how this might look. If you need a dynamic number of types that can be accomplished also but is a little trickier.
declare #Something table
(
ID int
, Type int
, Score int
)
;
insert #Something values
(5, 1, 100)
, (8, 1, 200)
, (3, 1, 300)
, (8, 2, 100)
, (3, 2, 200)
, (5, 2, 300)
;
with SortedValues as
(
select *
, RowNum = ROW_NUMBER() over (partition by Type order by Score)
from #Something
)
select ID
, Type1 = max(case when Type = 1 then RowNum end)
, Type2 = max(case when Type = 2 then RowNum end)
from SortedValues
group by ID
order by ID
;
-- EDIT --
I realized you said you need to have this work for any number of Types. Most people around SO like to ue a dynamic pivot. I personally find the syntax for pivot to be very obtuse. I prefer to build a dynamic version of conditional aggregation for this type of thing. Here is how you can use dynamic sql to generate the results for any number of Types.
Note I had to switch to using a temp table because a table variable would not be available in the scope of the dynamic sql unless it is declared inside the dynamic sql.
if OBJECT_ID('tempdb..#Something') is not null
drop table #Something
create table #Something
(
ID int
, Type int
, Score int
)
;
insert #Something values
(5, 1, 100)
, (8, 1, 200)
, (3, 1, 300)
, (8, 2, 100)
, (3, 2, 200)
, (5, 2, 300)
;
declare #StaticPortion nvarchar(2000) =
'with SortedValues as
(
select *, ROW_NUMBER() over(partition by Type order by Score) as RowNum
from #Something
)
select ID';
declare #DynamicPortion nvarchar(max) = '';
with E1(N) AS (select 1 from (values (1),(1),(1),(1),(1),(1),(1),(1),(1),(1))dt(n)),
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
cteTally(N) AS
(
SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E2
)
select #DynamicPortion = #DynamicPortion +
', MAX(Case when Type = ' + CAST(N as varchar(6)) + ' then RowNum end) as Type' + CAST(N as varchar(6)) + CHAR(10)
from cteTally t
where t.N <=
(
select Count(distinct Type)
from #Something
)
declare #FinalStaticPortion nvarchar(2000) = ' from SortedValues
group by ID
order by ID';
select #StaticPortion + #DynamicPortion + #FinalStaticPortion
declare #SqlToExecute nvarchar(max) = #StaticPortion + #DynamicPortion + #FinalStaticPortion;
select #SqlToExecute
exec sp_executesql #SqlToExecute

I need a more reliable sort for CTE hierarchy query

Example table:
CREATE TABLE Fruit (
ID int identity(1,1) NOT NULL,
ParentID int NULL,
Name varchar(255)
);
I want to sort parent and child records from the same table in alphabetical order (more than one level deep):
Apples
--Green
----Just Sour
----Really Sour
--Red
----Big
----Small
Bananas
--etc.
I attempted this:
;WITH CTE(ID, ParentID, Name, Sort) AS
(
SELECT
ID
,ParentID
,Name
,cast('\' + Name as nvarchar(255)) AS Sort
FROM Fruit
WHERE ParentID IS NULL
UNION ALL
SELECT
a.ID
,a.ParentID
,a.Name
,cast(b.Sort + '\' + a.Name as nvarchar(255)) AS Sort
FROM Fruit a
INNER JOIN CTE b ON a.ParentID = b.ID
)
SELECT * FROM CTE Order by Sort
This produces results for the sort like:
\Apples
\Apples\Green
\Apples\Green\Just Sour
\etc.
Just when I thought things were good, it isn't reliable. For example, if an item has more than one word. Like:
\Apples
\Apples A <-- culprit
\Apples\Green
If I can expand my question while I'm at it, I'd like to show actual hyphens or something in the results:
Parent
- Child
--Grandchild
The cruddy way I quickly did this was by adding a prefix column in the table with the value of - for all records. Then I could do this:
;WITH CTE(ID, ParentID, Name, Sort, Prefix) AS
(
SELECT
ID
,ParentID
,Name
,cast('\' + Name as nvarchar(255)) AS Sort
,Prefix
FROM Fruit
WHERE ParentID IS NULL
UNION ALL
SELECT
a.ID
,a.ParentID
,a.Name
,cast(b.Sort + '\' + a.Name as nvarchar(255)) AS Sort
,cast(b.Prefix + a.Prefix as nvarchar(10)) AS Prefix
FROM Fruit a
INNER JOIN CTE b ON a.ParentID = b.ID
)
SELECT * FROM CTE Order by Sort
But that seems incorrect or not optimal.
These hierarchical queries still give me a headache, so perhaps I'm just not seeing the obvious.
I tend to use row_number() ordered by Name in this case
Example
Declare #YourTable table (id int,ParentId int,Name varchar(50))
Insert into #YourTable values
( 1, NULL,'Apples')
,( 2, 1 ,'Green')
,( 3, 2 ,'Just Sour')
,( 4, 2 ,'Really Sour')
,( 5, 1 ,'Red')
,( 6, 5 ,'Big')
,( 7, 5 ,'Small')
,( 8, NULL,'Bananas')
Declare #Top int = null --<< Sets top of Hier Try 5
Declare #Nest varchar(25) = '|-----' --<< Optional: Added for readability
;with cteP as (
Select Seq = cast(1000+Row_Number() over (Order by Name) as varchar(500))
,ID
,ParentId
,Lvl=1
,Name
From #YourTable
Where IsNull(#Top,-1) = case when #Top is null then isnull(ParentId ,-1) else ID end
Union All
Select Seq = cast(concat(p.Seq,'.',1000+Row_Number() over (Order by r.Name)) as varchar(500))
,r.ID
,r.ParentId
,p.Lvl+1
,r.Name
From #YourTable r
Join cteP p on r.ParentId = p.ID)
Select A.ID
,A.ParentId
,A.Lvl
,Name = Replicate(#Nest,A.Lvl-1) + A.Name
,Seq --<< Can be removed
From cteP A
Order By Seq
Returns
ID ParentId Lvl Name Seq
1 NULL 1 Apples 1001
2 1 2 |-----Green 1001.1001
3 2 3 |-----|-----Just Sour 1001.1001.1001
4 2 3 |-----|-----Really Sour 1001.1001.1002
5 1 2 |-----Red 1001.1002
6 5 3 |-----|-----Big 1001.1002.1001
7 5 3 |-----|-----Small 1001.1002.1002
8 NULL 1 Bananas 1002
I'm going to guess you want this result
\Apples
\Apples\Green
\Apples A
Maybe try something like this:
SELECT *
FROM CTE
ORDER BY replace(Sort, ' ', '~')
'~' is ascii 126, You can also check using excel sorting.

Finding sequence of the last numeric value in a varchar variable

I have a column in a table which has incremented values like:
AAA0000001
AAA0000002
... and so on
I want to find if the values stored in this column are in proper sequential order or if any value is missing in between or is deleted.
How can i achieve this?
Assuming the pattern is always: AAA[0-9][0-9][0-9][0-9][0-9][0-9][0-9], you can do this with a Tally Table.
Sample Data:
CREATE TABLE Tbl(val VARCHAR(10))
INSERT INTO Tbl VALUES
('AAA0000001'), ('AAA0000002'), ('AAA0000004'), ('AAA0000011');
val
----------
AAA0000001
AAA0000002
AAA0000004
AAA0000011
SQL Fiddle
;WITH Cte AS(
SELECT *,
num = CAST(SUBSTRING(val, 4, LEN(val) - 3) AS INT)
FROM Tbl
),
E1(N) AS(
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
),
E2(N) AS(SELECT 1 FROM E1 a CROSS JOIN E1 b),
E4(N) AS(SELECT 1 FROM E2 a CROSS JOIN E2 b),
Tally(N) AS(
SELECT TOP(SELECT MAX(num) FROM Cte)
ROW_NUMBER() OVER(ORDER BY (SELECT NULL))
FROM E4
)
SELECT
N,
val = 'AAA' + RIGHT('0000000' + CAST(N AS VARCHAR(7)), 7)
FROM Tally
WHERE NOT EXISTS(
SELECT 1 FROM Cte WHERE num = N
)
RESULT
N val
-------------------- ----------
3 AAA0000003
5 AAA0000005
6 AAA0000006
7 AAA0000007
8 AAA0000008
9 AAA0000009
10 AAA0000010
Explanation:
The first CTE, named as Cte, extracts the numeric part of the strings and CASTs them to INT.
The succeeding CTEs, from E1 to Tally(N) generates a table with sequential values from 1 up to the MAX(num) - the INT return from the first CTE.
The final SELECT just checks for the non-existing num from the first CTE.
'AAA' + RIGHT('0000000' + CAST(N AS VARCHAR(7)), 7) transforms N so that it follows the pattern.
This is a Gaps problem. You can look into this article by Dwain Camps for more solutions on Gaps and Islands.
You can use ROW_NUMBER like this.
Sample Data
DECLARE #tab1 TABLE(id VARCHAR(20));
insert into #tab1 VALUES('AAA0000001'),('AAA0000002'),('AAA0000003'),('AAA0000004'),('AAA0000006'),('AAA0000007'),('AAA0000010');
Query
;WITH CTE as
(
SELECT convert(int,STUFF(id,1,3,'')) id,convert(int,STUFF(id,1,3,'')) - ROW_NUMBER()OVER(ORDER BY convert(int,STUFF(id,1,3,''))) rn
FROM #tab1
),CTE2 as
(
SELECT ROW_NUMBER()OVER(ORDER BY rn) as rn, MIN(id) series_start,MAX(id) series_end
FROM CTE
GROUP BY rn
)
SELECT C2.series_end,C1.series_start
FROM CTE2 C1
INNER JOIN CTE2 C2 ON C1.rn = C2.rn + 1;
SQL Fiddle
Explanation
Output of CTE is the difference of gaps between id values.
Output of CTE2 is the start and end of continuous series of numbers
Final Output gives the start and end of gaps within the series
Output
series_end series_start
4 6
7 10
If the schema is fixed then no need for complex queries. This works:
DECLARE #t TABLE ( v VARCHAR(100) );
INSERT INTO #t
VALUES ( 'AAA0000001' ),
( 'AAA0000002' ),
( 'AAA0000007' ),
( 'AAA0000008' ),
( 'AAA0000010' ),
( 'AAA0000011' ),
( 'AAA0000012' );
SELECT * FROM #t t1
CROSS APPLY(SELECT TOP 1 v FROM #t t2 WHERE t2.v > t1.v ORDER BY v) ca
WHERE RIGHT(t1.v, 7) <> RIGHT(ca.v, 7) - 1
Output:
v v
AAA0000002 AAA0000007
AAA0000008 AAA0000010
In sqlserver 2012, you can use LAG and LEAD
DECLARE #t table(col1 varchar(15))
INSERT #t values('AAA0000001'),('AAA0000002'),('AAA0000004')
SELECT
case when
stuff(lag(col1) over (order by col1), 1,3,'') + 1
= stuff(col1, 1,3,'') then 'Yes' else 'No' end previous_exists,
case when
stuff(lead(col1) over (order by col1), 1,3,'') - 1
= stuff(col1, 1,3,'') then 'Yes' else 'No' end next_exists,
col1
FROM #t
Result:
previous_exists next_exists col1
No Yes AAA0000001
Yes No AAA0000002
No No AAA0000004

Retrieve most frequent values from each column within groups

For each group, grouped using field GRP, I would like to retrieve the most frequently occurring value in column A and the most frequently occurring value in column B, and potentially do this for many other columns.
Sample Data:
GRP | A | B
-----------
Cat | 1 | 1
Cat | 2 | 1
Cat | 3 | 2
Cat | 3 | 3
Dog | 5 | 6
Dog | 5 | 7
Dog | 6 | 7
Expected Output:
GRP | A | B
-----------
Cat | 3 | 1
Dog | 5 | 7
This query achieves that result:
SELECT
freq1.GRP,
freq1.A,
freq2.B
FROM (
SELECT
GRP,
A,
ROW_NUMBER() OVER(PARTITION BY GRP ORDER BY COUNT(*) DESC) AS F_RANK
FROM MyTable
GROUP BY GRP, A
) AS freq1
INNER JOIN (
SELECT
GRP,
B,
ROW_NUMBER() OVER(PARTITION BY GRP ORDER BY COUNT(*) DESC) AS F_RANK
FROM MyTable
GROUP BY GRP, B
) AS freq2 ON freq2.GRP = freq1.GRP
WHERE freq1.F_RANK = 1 AND freq2.F_RANK = 1
It just doesn't look very efficient, and even less so if I were to add a column C, D, etc...
Is there a better way?
I wouldn't say this approach is "better" because it will generate the exact same execution plan. However, I find this type of approach a lot more maintainable as the number of columns might grow. For me this is a lot easier to read.
with GroupA as
(
select Grp
, A
, ROW_NUMBER() over(partition by grp order by count(*) desc) as RowNum
from MyTable
group by Grp, A
)
, GroupB as
(
select Grp
, B
, ROW_NUMBER() over(partition by grp order by count(*) desc) as RowNum
from MyTable
group by Grp, B
)
select a.Grp
, a.A
, b.B
from GroupA a
inner join GroupB b on a.Grp = b.Grp and b.RowNum = 1
where a.RowNum = 1;
An alternative using results ranked in a temp table:
SELECT GRP, A, B,
ROW_NUMBER() OVER (PARTITION BY A ORDER BY GRP, A) ARank,
ROW_NUMBER() OVER (PARTITION BY B ORDER BY GRP, B) BRank
INTO #TMP
FROM MyTable
SELECT t1.GRP,
(SELECT TOP 1 A FROM #TMP WHERE GRP = t1.Grp ORDER BY ARank DESC) A,
(SELECT TOP 1 B FROM #TMP WHERE GRP = t1.Grp ORDER BY BRank DESC) B
FROM MyTable t1
GROUP BY T1.GRP
DROP TABLE #TMP
Full Solution on SQL Fiddle
Schema Setup:
CREATE TABLE MyTable
([GRP] varchar(3), [A] int, [B] int)
;
INSERT INTO MyTable
([GRP], [A], [B])
VALUES
('Cat', 1, 1),
('Cat', 2, 1),
('Cat', 3, 2),
('Cat', 3, 3),
('Dog', 5, 6),
('Dog', 5, 7),
('Dog', 6, 7)
;
Query 1:
SELECT GRP, A, B,
ROW_NUMBER() OVER (PARTITION BY A ORDER BY GRP, A) ARank,
ROW_NUMBER() OVER (PARTITION BY B ORDER BY GRP, B) BRank
INTO #TMP
FROM MyTable
SELECT t1.GRP,
(SELECT TOP 1 A FROM #TMP WHERE GRP = t1.Grp ORDER BY ARank DESC) A,
(SELECT TOP 1 B FROM #TMP WHERE GRP = t1.Grp ORDER BY BRank DESC) B
FROM MyTable t1
GROUP BY T1.GRP
DROP TABLE #TMP
Results:
| GRP | A | B |
|-----|---|---|
| Cat | 3 | 1 |
| Dog | 5 | 7 |
I'll start out this answer by saying this is NOT going to be more efficient to run - it should just be easier to add/subtract columns. To do this you just add them into the code in two places.
You can use dynamic SQL to build your result set like this:
CREATE TABLE ##fields (id INT IDENTITY(1,1),fieldname VARCHAR(255))
INSERT INTO ##fields
( fieldname )
VALUES ('A'),('B') --Add field names here
DECLARE #maxid INT
SELECT #maxid = MAX(id) FROM ##fields
CREATE TABLE ##Output (GRP VARCHAR(3), A INT, B INT) --Add field names here
INSERT INTO ##Output
( GRP )
SELECT DISTINCT GRP FROM MyTable
DECLARE #SQL NVARCHAR(MAX)
DECLARE #i INT = 1
WHILE #i <=#maxid
BEGIN
SELECT #SQL = 'with cte as (SELECT GRP , ' + fieldname + ' ,
ROW_NUMBER() OVER ( PARTITION BY GRP ORDER BY COUNT(*) DESC ) AS F_RANK
FROM MyTable
GROUP BY GRP , ' + fieldname + ')
UPDATE O
SET O.' + fieldname + ' = cte.' + fieldname + '
FROM ##Output O
INNER JOIN cte ON O.GRP = cte.GRP AND cte.F_Rank = 1' FROM ##fields WHERE id = #i
EXEC sp_executesql #sql
SET #i = #i + 1
END
SELECT *
FROM ##Output
DROP TABLE ##fields
DROP TABLE ##Output
Using your simple example above, I received the following performance stats:
Dynamic SQL
CPU = 31
Reads = 504
Duration = 39
Your SQL
CPU = 0
Reads = 6
Duration = 1
Clearly, this way is not a more efficient way of doing this. I did want to throw it out there anyway as an alternative to your current method.
First we create the test data:
DECLARE #MyTable TABLE
(
GRP varchar(10),
A int,
B int
)
INSERT INTO #MyTable
( GRP, A, B)
VALUES
('Cat', 1, 1),
('Cat', 2, 1),
('Cat', 3, 2),
('Cat', 3, 3),
('Dog', 5, 6),
('Dog', 5, 7),
('Dog', 6, 7);
Now we use first_value from a subselect (or a cte if you wanted) and grab the top cat and dog columns
SELECT DISTINCT
GRP,
FIRST_VALUE(A) OVER(PARTITION BY GRP ORDER BY d.A_CNT DESC) AS A_RANK,
FIRST_VALUE(B) OVER(PARTITION BY GRP ORDER BY d.B_CNT DESC) AS B_RANK
FROM
(
SELECT
GRP,
A,
ROW_NUMBER() OVER (PARTITION BY A ORDER BY GRP, A) AS A_CNT,
B,
ROW_NUMBER() OVER (PARTITION BY B ORDER BY GRP, B) AS B_CNT
FROM #MyTable
) d
Output:
GRP A_RANK B_RANK
Cat 3 1
Dog 5 7

How to find the parent ID in a return table

I have following table:
ID ParentID
1 NULL
2 1
3 2
4 NULL
5 4
6 5
7 3
I want to find the first ID of a specific child ID.
Example: ID=7 and the result is 1
ID=6 and the result is 4
How to do it?
You need to do a bit of recursive CTE magic to solve this one.
Given the data in a table variable thusly:
declare #data table(id int, parentid int)
insert into #data
select 1, null
union select 2,1
union select 3,2
union select 4, null
union select 5,4
union select 6,5
union select 7,3
The following should do the trick:
;with recursiveTable as
(
select d.id, d.parentId, 0 as depth
from #data d
where d.id=6 -- Parameterize this
union all
select d.id, d.parentid, r.depth-1
from #data d
inner join recursiveTable r
on d.id = r.parentId
)
select top 1 id
from recursiveTable
order by depth
Plugging 6 in as above returns 4. Changing this to 7 returns 1 as requested.
Try this:
CREATE TABLE childpar(ID int,ParentID int)
INSERT INTO childpar
values(1,NULL),
(2, 1),
(3, 2),
(4, NULL),
(5, 4),
(6, 5),
(7, 3)
DECLARE #inpyID int
SET #inpyID=7
;WITH CTE1 as (
select * from childpar where id=#inpyID
union all
select c2.* from CTE1 c1 inner join childpar c2 on c1.ParentID = c2.ID
)
select top 1 id from CTE1 order by id asc

Resources