SQL Recursive CTE: Finding objects linked by property - sql-server

I'm just trying to understand CTE and recursion to solve an issue that I would previously have used a cursor for.
create table ##ACC (
AccNo int,
Property char
)
Insert into ##ACC
VALUES (1,'A'),(1,'B'),(2,'A'),(2,'C'),(3,'C'),(4,'D')
What I'm trying to achieve is to get a list of all AccNo's, and all AccNo's they're related to via Property. So my expected results are
PrimaryAccNo | LinkedAccNo
1 | 1
1 | 2
1 | 3
2 | 1
2 | 2
2 | 3
3 | 1
3 | 2
3 | 3
4 | 4
I've attempted the following code and variations but I either get 4 results (PrimaryAccNo=LinkedAccNo) only or I hit 100 recursions.
WITH Groups(PrimaryAccNo, LinkedAccNo)
AS
(
Select distinct AccNo, AccNo from ##ACC
UNION ALL
Select g.PrimaryAccNo, p.AccNo from
##ACC p inner join Groups g on p.AccNo=g.LinkedAccNo
inner join ##ACC pp on p.Property=pp.Property
where p.AccNo<> pp.AccNo
)
Select PrimaryAccNo,LinkedAccNo
from Groups
What am I doing wrong?

You're running into an infinite loop caused by cycles within your data, e.g.: 1 > 2 > 3 > 2 > ... . The solution is to keep track of the rows that have already been "consumed". Due to limitations in CTEs, this has to be done by including the history within each CTE row, e.g. by assembling the path followed to arrive at each row. You can uncomment the , Path on the final select to see what is going on.
-- Sample data.
declare #ACC as Table ( AccNo Int, Property Char );
insert into #ACC values
( 1, 'A' ), ( 1, 'B' ), ( 2, 'A' ), ( 2, 'C' ), ( 3, 'C' ), ( 4, 'D' );
select * from #ACC;
-- Recursive CTE.
with Groups as (
select distinct AccNo, AccNo as LinkedAccNo,
Cast( '|' + Cast( AccNo as VarChar(10) ) + '|' as VarChar(1024) ) as Path
from #ACC
union all
select G.AccNo, A.AccNo, Cast( Path + Cast( A.AccNo as VarChar(10) ) + '|' as VarChar(1024) )
from Groups as G inner join -- Take the latest round of new rows ...
#ACC as AP on AP.AccNo = G.LinkedAccNo inner join -- ... and get the Property for each ...
#ACC as A on A.Property = AP.Property -- ... to find new linked rows.
where G.Path not like '%|' + Cast( A.AccNo as VarChar(10) ) + '|%' )
select AccNo, LinkedAccNo -- , Path
from Groups
order by AccNo, LinkedAccNo;

Another approach similar to yours but differs in the following:
The property value is included in the recursive CTE so that it can be used later
The < is used to prevent duplicates and the resulting infinite recursion
Another CTE is added AccGroups to provide the mirror of the relations
A demo fiddle has been included below:
CREATE TABLE ##ACC (
AccNo int,
Property char
);
INSERT INTO ##ACC
VALUES (1,'A'),(1,'B'),(2,'A'),(2,'C'),(3,'C'),(4,'D');
WITH Groups(PrimaryAccNo, LinkedAccNo, Property) AS (
SELECT AccNo, AccNo, Property FROM ##ACC
UNION ALL
SELECT g.PrimaryAccNo, pp.AccNo, pp.Property
FROM Groups g
INNER JOIN ##ACC p ON g.Property=p.Property AND
g.LinkedAccNo < p.AccNo
INNER JOIN ##ACC pp ON p.AccNo = pp.AccNo
),
AccGroups AS (
SELECT DISTINCT * FROM (
SELECT PrimaryAccNo, LinkedAccNo FROM Groups
UNION ALL
SELECT LinkedAccNo, PrimaryAccNo FROM Groups
) t
)
SELECT * FROM AccGroups
ORDER BY PrimaryAccNo,LinkedAccNo
GO
PrimaryAccNo | LinkedAccNo
-----------: | ----------:
1 | 1
1 | 2
1 | 3
2 | 1
2 | 2
2 | 3
3 | 1
3 | 2
3 | 3
4 | 4
db<>fiddle here

Related

SQL SERVER update or insert after left join

I have a Table Animals
Id | Name | Count | -- (other columns not relevant)
1 | horse | 11
2 | giraffe | 20
I want to try to insert or update values from a CSV string
Is it possible to do something like the following in 1 query?
;with results as
(
select * from
(
values ('horse'), ('giraffe'), ('lion')
)
animal_csv(aName)
left join animals on
animals.[Name] = animal_csv.aName
)
update results
set
[Count] = 1 + animals.[Count]
-- various other columns are set here
where Id is not null
--else
--insert into results ([Name], [Count]) values (results.aName, 1)
-- (essentially Where id is null)
It looks like what you're looking for is a table variable or temporary table rather than a common table expression.
If I understand your problem correctly, you are building a result set based on data you're getting from a CSV, merging it by incrementing values, and then returning that result set.
As I read your code, it looks as if your results would look like this:
aName | Id | Name | Count
horse | 1 | horse | 12
giraffe | 2 | giraffe | 21
lion | | |
I think what you're looking for in your final result set is this:
Name | Count
horse | 12
giraffe | 21
lion | 1
First, you can get from your csv and table to a resultset in a single CTE statement:
;WITH animal_csv AS (SELECT * FROM (VALUES('horse'),('giraffe'), ('lion')) a(aName))
SELECT ISNULL(Name, aName) Name
, CASE WHEN [Count] IS NULL THEN 1 ELSE 1 + [Count] END [Count]
FROM animal_csv
LEFT JOIN animals
ON Name = animal_csv.aName
Or, if you want to build your resultset using a table variable:
DECLARE #Results TABLE
(
Name VARCHAR(30)
, Count INT
)
;WITH animal_csv AS (SELECT * FROM (VALUES('horse'),('giraffe'), ('lion')) a(aName))
INSERT #Results
SELECT ISNULL(Name, aName) Name
, CASE WHEN [Count] IS NULL THEN 1 ELSE 1 + [Count] END [Count]
FROM animal_csv
LEFT JOIN animals
ON Name = animal_csv.aName
SELECT * FROM #results
Or, if you just want to use a temporary table, you can build it like this (temp tables are deleted when the connection is released/closed or when they're explicitly dropped):
;WITH animal_csv AS (SELECT * FROM (VALUES('horse'),('giraffe'), ('lion')) a(aName))
SELECT ISNULL(Name, aName) Name
, CASE WHEN [Count] IS NULL THEN 1 ELSE 1 + [Count] END [Count]
INTO #results
FROM animal_csv
LEFT JOIN animals
ON Name = animal_csv.aName
SELECT * FROM #results

Count unique field when another field is all NULL

I have a table with three columns, which can contain duplicate rows
org - int NULL
id - int NULL
complete - bit NULL
So I might have data like so:
org | id | complete
-------------------
1 | 1 | 1
1 | 2 | NULL
1 | 2 | 1
1 | 3 | 1
2 | 3 | 1
2 | 4 | 1
2 | 4 | NULL
I want to get a count of all distinct id by org. That's easy enough to do with a COUNT(DISTINCT id) expression. Where I'm running into trouble now is I also want a count of all distinct id where any of the complete values isn't 1.
So from the above I'd want this output:
org | distinct id | distinct incomplete id
------------------------------------------
1 | 3 | 1
2 | 2 | 1
So for org 2, because id of 4 included a NULL value, then I can't count id 4 as fully complete, thus just id 3 is complete, thus resulting in a 1 in the distinct incomplete id column. So I don't know how to fill in the ???? part of the below query.
SELECT org, COUNT(DISTINCT id) TotalPeople, ???? IncompletePeople
FROM table
GROUP BY org
Try the following approach
DECLARE #T TABLE
(
Org INT,
Id INT,
Complete BIT
)
INSERT INTO #T
VALUES(1,1,1),(1,2,NULL),(1,2,1),(1,3,1),(2,3,1),(2,4,1),(2,4,NULL)
SELECT
Org,
DistinctId = COUNT(DISTINCT Id),
DistinctIncompleteId = SUM(CASE Complete WHEN 1 THEN 0 ELSE 1 END)
FROM #T
GROUP BY Org
You may try this way
create table #temptable ( org int, id int, comp int )
Go
insert into #temptable ( org, id, comp )
values ( 1,1,1)
,( 1, 2, null)
,( 1, 2, 1)
,( 1, 3, 1)
,( 2, 3, 1)
,( 2, 4, null)
,( 2, 4, 1)
select d.org, d.DistinctId, f.incompleteId from (
select COUNT (distinct id) as DistinctId , org from #temptable group by org) as d full outer join (
select COUNT (distinct id) as incompleteId , org from #temptable where comp is null group by org) as f on d.org=f.org
go
drop table #temptable
Group it by "org" and by "complete". Then put HAVING complete=1. Hope the code below helps you:
SELECT org, COUNT(id) TotalPeople, complete
FROM table
GROUP BY org,complete
HAVING complete=1 (complete IS NULL *for incomplete*)

Converting rows to columns

I have a query with the columns 'Name', 'Amount', and 'ReasonId'. I want to sum the amount and put the reasons on one row to keep every name to a single line. There are about 50 distinct ReasonId's so I do not want to name the column the name of the ReasonId's. Instead, I would like to name the columns 'Reason1', 'Reason2', 'Reason3', and 'Reason4'. One single name can have up to 4 different reasons.
I have this:
Name Amount ReasonId
-------------------------
Bob $5 7
Bob $8 6
John $2 8
John $5 9
John $3 9
John $8 4
I want to produce the following:
Name Amount Reason1 Reason2 Reason3 Reason4
-----------------------------------------------------
Bob $13 7 6 NULL NULL
John $18 8 9 4 NULL
One way to do this is to use the dense_rank window function to number the rows, and then use conditional aggregation to put the reason in the correct columns.
I can't see anything that would give the specific order of the reason columns though, maybe there is some column missing that provides the order?
with cte as (
select
name,
reasonid,
amount,
dense_rank() over (partition by name order by reasonid) rn
from your_table
)
select
name,
sum(amount) amount,
max(case when rn = 1 then reasonid end) reason1,
max(case when rn = 2 then reasonid end) reason2,
max(case when rn = 3 then reasonid end) reason3,
max(case when rn = 4 then reasonid end) reason4
from cte
group by name
If you have some column that gives the order you want then change the order by clause used in the dense_rank function.
Sample SQL Fiddle (using PG as MSSQL seems to be offline).
The output from the query above would be:
| name | amount | reason1 | reason2 | reason3 | reason4 |
|------|--------|---------|---------|---------|---------|
| Bob | 13 | 6 | 7 | (null) | (null) |
| John | 18 | 4 | 8 | 9 | (null) |
You could also use a pivot to achieve this; if you know the columns you can enter them in the script, but if not, you can use dynamic sql (there are reasons why you might want to avoid the dynamic solution).
The advantage of this route is that you can enter the column list in a table and then changes to that table will result in changes to your output with change to the script involved. The disadvantages are all those associated with dynamic SQL.
In the interests of variation, here is a dynamic SQL solution using temp tables to hold your data, since a different possibility has been provided:
-- set up your data
CREATE TABLE #MyTab (Name VARCHAR(4), Amount INT, ReasonId INT)
CREATE TABLE #AllPossibleReasons (Id INT,Label VARCHAR(10))
INSERT #AllPossibleReasons
VALUES
(1,'Reason1')
,(2,'Reason2')
,(3,'Reason3')
,(4,'Reason4')
,(5,'Reason5')
,(6,'Reason6')
,(7,'Reason7')
,(8,'Reason8')
,(9,'Reason9')
INSERT #MyTab
VALUES
('Bob',7,7)
,('Bob',8,6)
,('John',2,8)
,('John',5,9)
,('John',3,9)
,('John',8,4)
-----------------------------------------------------------------------------
-- The actual query
DECLARE #ReasonList VARCHAR(MAX) = ''
DECLARE #SQL VARCHAR(MAX)
SELECT #ReasonList = #ReasonList + ',' + QUOTENAME(Label)
FROM #AllPossibleReasons
SET #ReasonList = SUBSTRING(#ReasonList,2,LEN(#ReasonList))
SET #SQL =
'SELECT Name,Value,' + #ReasonList + ' FROM
(SELECT
M.Name,SUM(Amount) AS This, Label, SUM(Total.Value) AS Value
FROM
#MyTab AS M
INNER JOIN #AllPossibleReasons AS Reason ON M.ReasonId = Reason.Id
INNER JOIN(SELECT T.Name, SUM(Amount)Value
FROM #MyTab T GROUP BY T.Name) AS Total ON M.Name = Total.Name
GROUP BY M.Name, Reason.Label) AS Up
PIVOT (SUM(THis) FOR Label IN (' + #ReasonList + ')) AS Pvt'
EXEC (#SQL)
DROP TABLE #AllPossibleReasons
DROP TABLE #MyTab
Working from the information in ListAGG in SQLSERVER, I came up with this somewhat ugly example:
with tbl1 as (
-- Set up initial data set
select 'Bob' name, 5 amount, 7 ReasonId
union all select 'Bob' , 3, 4
union all select 'Bob', 2, 1
union all select 'Brian', 8, 2
union all select 'Bob', 6, 4
union all select 'Brian', 1, 3
union all select 'Tim', 2, 2)
, TBL2 AS ( -- Add a blank to separate the concatenation
SELECT NAME
, AMOUNT
, CAST(ReasonId as varchar) + ' ' ReasonId from tbl1
)
select ta.name
, Total
, ReasonIds from (
(select distinct name, stuff((select distinct '' + t2.ReasonId from tbl2 t2
where t1.name = t2.name
for xml path(''), type).value('.','NVARCHAR(MAX)'),1,0,' ') ReasonIds from tbl2 t1) ta
inner join ( select name, sum(amount) Total from tbl1 group by name) tb on ta.name = tb.name) ;
This converts TBL1 to the following:
name Total ReasonIds
Bob 16 1 4 7
Brian 9 2 3
Tim 2 2

Duplicates on Self Left Join

I'm trying to pivot out a table of data stored in a vertical model into a more horizontal, SQL Server table-like model. Unfortunately due to the nature of the data, I cannot use the real data here so I worked up a generic example that follows the same model.
There are three columns to the table, an ID, column ID and value, where the ID and column ID form the Primary Key. Additionally none of the data is required (i.e. an ID can be missing column ID = 3 without breaking anything)
PetID | ColumnID | Value
---------------------------
1 | 1 | Gilda
1 | 2 | Cat
2 | 1 | Sonny
2 | 2 | Cat
2 | 3 | Black
Due to the fact that the Primary Key is a composite of two columns I cannot use the built in PIVOT functionality, so I tried doing a self LEFT JOIN:
SELECT T1.PetID
,T2.Value AS [Name]
,T3.Value AS [Type]
,T4.Value AS [Color]
FROM #Temp AS T1
LEFT JOIN #Temp AS T2 ON T1.PetID = T2.PetID
AND T2.ColumnID = 1
LEFT JOIN #Temp AS T3 ON T1.PetID = T3.PetID
AND T3.ColumnID = 2
LEFT JOIN #Temp AS T4 ON T1.PetID = T4.PetID
AND T4.ColumnID = 3;
The idea being that I want to take the ID from T1 and then do a self LEFT JOIN to get each of the values by ColumnID. However I'm getting duplicates in the data:
PetID | Name | Type | Color
------------------------------
1 | Gilda | Cat | NULL
1 | Gilda | Cat | NULL
2 | Sonny | Cat | Black
2 | Sonny | Cat | Black
2 | Sonny | Cat | Black
I am able to get rid of these duplicates using a DISTINCT, but the dataset is rather large, so the required sort action is slowing down the query tremendously. Is there a better way to accomplish this or am I just stuck with a slow query?
You can use a CASE statement and avoid the joins altogether.
SELECT
PetID,
MAX(CASE WHEN ColumnID = 1 THEN Value ELSE NULL END) AS Name,
MAX(CASE WHEN ColumnID = 2 THEN Value ELSE NULL END) AS Type,
MAX(CASE WHEN ColumnID = 3 THEN Value ELSE NULL END) AS Color
FROM #Temp
GROUP BY PetId
It is essential that PetID, ColumnID be your primary key for this to work correctly. Otherwise it will cause problems when the same ColumnID is used multiple times for the same PetID
You can use pivot if you'd like to..
SELECT *
FROM (SELECT PetID,
(CASE ColumnID
WHEN 1 THEN 'Name'
WHEN 2 THEN 'Type'
WHEN 3 THEN 'Color'
END) ValueType,
VALUE
FROM #Temp
) t
PIVOT
( MAX(Value)
FOR ValueType IN ([Name],[Type],[Color])
) p
Another way without the Sub query would be..
SELECT PetID,
[1] [Name],
[2] [Type],
[3] [Color]
FROM #Temp
PIVOT
( MAX(Value)
FOR ColumnID IN ([1],[2],[3])
) p
I don't understand your concern about sorting. You have a primary key so you also have an index. This is the correct way to do it:
select
PetID,
min(case when ColumnID = 1 then Value end) as Name,
min(case when ColumnID = 2 then Value end) as Type,
min(case when ColumnID = 3 then Value end) as Color
from #Temp
group by PetID
A fix for your duplication is simple though and will probably improve performance as well:
FROM (select distinct PetID from #Temp) AS T1
SELECT T1.PetID
,T1.Value AS [Name]
,T2.Value AS [Type]
,T3.Value AS [Color]
--select *
FROM #Temp AS T1
LEFT JOIN #Temp AS T2 ON T1.PetID = T2.PetID
AND T2.ColumnID = 2
LEFT JOIN #Temp AS T3 ON T1.PetID = T3.PetID
AND T3.ColumnID = 3
where t1.ColumnID = 1
Your problem was that you were joining to the main table that had multiple rows.

SQL EXCEPT Not Working

; WITH cte AS
(SELECT p.BudgetNumber, t.MilestoneNumber FROM
(SELECT DISTINCT BudgetNumber FROM tblMilestones) p
CROSS JOIN
(SELECT DISTINCT MilestoneNumber FROM tblMilestoneTemplate) t)
SELECT BudgetNumber, MilestoneNumber FROM cte
EXCEPT (SELECT BudgetNumber, MilestoneNumber FROM tblMilestones)
ORDER BY BudgetNumber, MilestoneNumber
The query above creates all possible BudgetNumber and MilestoneNumber combinations using a cross join, and then attempts to locate combinations that are not in the tblMilestones table (I didn't create this database, I know the table prefixes are weird and this db isn't normalized).
There are no NULL entries in any of these fields. When I use this query with the EXCEPT clause above, I get some missing values (but not all), but I also get some non-missing values. When I change the EXCEPT to a LEFT JOIN, I get the same results. When I change the EXCEPT to a WHERE NOT EXISTS, I get no results at all. Can anyone please help?
SQLFiddle Output:
| BUDGETNUMBER | MILESTONENUMBER |
|--------------|-----------------|
| BA04001 | 0 |
| BA04001 | 99 |
| BA04005 | 0 |
| BA04005 | 99 |
| BA05001 | 0 |
| BA05001 | 99 |
| BA05002 | 0 |
| BA05002 | 99 |
Here is the way you would need to use NOT EXISTS correctly. You need specify where clause inside subquery to get correct result.
;
WITH cte
AS (
SELECT p.BudgetNumber
,t.MilestoneNumber
FROM (
SELECT DISTINCT BudgetNumber
FROM tblMilestones
) p
CROSS JOIN (
SELECT DISTINCT MilestoneNumber
FROM tblMilestoneTemplate
) t
)
SELECT BudgetNumber
,MilestoneNumber
FROM cte t
WHERE NOT EXISTS ( SELECT 1
FROM tblMilestones s
WHERE t.BudgetNumber = s.BudgetNumber
AND t.MilestoneNumber = s.MilestoneNumber )
ORDER BY BudgetNumber
,MilestoneNumber
Look at following two examples
DECLARE #NoPrecision AS TABLE ( MyNumber DECIMAL )
INSERT INTO #NoPrecision ( MyNumber ) VALUES ( 12345.123456789 )
SELECT * FROM #NoPrecision
output: 12345
DECLARE #Precision AS TABLE ( MyNumber DECIMAL(10,4) )
INSERT INTO #Precision ( MyNumber ) VALUES ( 12345.123456789 )
SELECT * FROM #Precision
output: 12345.1235

Resources