SQL Server 2012: Check for Duplicates with Criteria

SQL Server 2012: Check for Duplicates with Criteria - sql-server

On SQL Server 2012, I have a table which is similar as below:
Id | SessionId | TypeId | Operation | Data
------------------------------------------------------------------------
1 | ABC-123 | 6 | I |<Record EmployeeName="Joe" />
2 | ABC-123 | 6 | U |<Record EmployeeName="Joe" />
For us, the second row is a duplicate (I want to remove the record with the operation 'U' since I have an 'I' operation already) and I want to remove it. However, my SQL I tried actually removes other records also (see below).
;WITH CTE AS (
SELECT [id],
[SessionId],
[TypeId],
[Operation],
[Data],
RN = ROW_NUMBER() OVER (PARTITION BY [SessionId], [Data] ORDER BY [Data])
FROM dbo.MyTable
WHERE SessionId = #sessionId
)
DELETE FROM CTE
WHERE [Operation] = 'U'
Can you help?

DECLARE #t TABLE (
Id INT IDENTITY(1,1) PRIMARY KEY,
SessionId VARCHAR(50),
TypeId INT,
Operation CHAR(1),
Data XML
)
INSERT INTO #t (SessionId, TypeId, Operation, Data)
VALUES
('ABC-123', 6, 'I', '<Record EmployeeName="Joe" />'),
('ABC-123', 6, 'U', '<Record EmployeeName="Joe" />')
;WITH CTE AS (
SELECT *,
RN = ROW_NUMBER() OVER (PARTITION BY [SessionId], CHECKSUM(CAST([Data] AS NVARCHAR(MAX))) ORDER BY Id)
FROM #t
)
DELETE FROM CTE
WHERE RN > 1
AND Operation = 'U'
SELECT * FROM #t
output -
Id SessionId TypeId Operation Data
--- ------------ ----------- --------- ------------------------------
1 ABC-123 6 I <Record EmployeeName="Joe" />

Related

Copying a branch of tree-like structured table

I have the following table, where ID is the pk of the table and is IDENTITY
+----+----------+-----------+-------------+
| ID | ParentID | SomeValue | FullPath |
+----+----------+-----------+-------------+
| 1 | NULL | A | (1) |
| 2 | 1 | A.1 | (1)/(2) |
| 3 | 2 | A.1.1 | (1)/(2)/(3) |
| 4 | NULL | B | (4) |
| 5 | 4 | B.1 | (4)/(5) |
| 6 | 4 | B.2 | (4)/(6) |
| 7 | 6 | B.2.1 | (4)/(6)/(7) |
+----+----------+-----------+-------------+
This table represents data stored in a hierarchical way. I am creating a procedure that will take as input an ID and new_ParentID as parameters; ID (and its children and children's children, etc) will be the branch to copy into new_ParentID.
I started the procedure, but I cannot figure out how will I get the new ID of the parent I created in order to add it's children. For example, if I want to copy A.1 (and A.1.1) into B.2, once A.1-Copied will be created, I do not know its ID to put it as ParentID of A.1.1-Copied. I'm aware of the function SCOPE_IDENTITY, but I don't know how to use it in a CTE. Here is what I have at the moment:
;WITH Branch
AS
(
SELECT ID,
ParentGroupID,
SomeValue
FROM
#Table1 A
WHERE
ID = #ID
UNION ALL
SELECT E.ID,
E.ParentGroupID,
E.SomeValue
FROM
#Table1 E
INNER JOIN Branch T
ON T.ID = E.ParentGroupID
)
INSERT INTO #Table1
SELECT
CASE WHEN ParentGroupID IS NULL
THEN #new_ParentID
ELSE ???,
SomeValue + '-Copied'
FROM
Branch
How can I manage to use SCOPE_IDENTITY to correctly set the new parent of children of my copied branch ?
EDITS:
Suppose I want to copy branch with ID 4 (so the whole B branch) into ID 2 (so A.1 branch), we should have data as follows:
+----+----------+------------+-----------------------+
| ID | ParentID | SomeValue | FullPath |
+----+----------+------------+-----------------------+
| 1 | NULL | A | (1) |
| 2 | 1 | A.1 | (1)/(2) |
| 3 | 2 | A.1.1 | (1)/(2)/(3) |
| 4 | NULL | B | (4) |
| 5 | 4 | B.1 | (4)/(5) |
| 6 | 4 | B.2 | (4)/(6) |
| 7 | 6 | B.2.1 | (4)/(6)/(7) |
| 8 | 2 | B-Copy | (1)/(2)/(8) |
| 9 | 8 | B.1-Copy | (1)/(2)/(8)/(9) |
| 10 | 8 | B.2-Copy | (1)/(2)/(8)/(10) |
| 11 | 10 | B.2.1-Copy | (1)/(2)/(8)/(10)/(11) |
+----+----------+------------+-----------------------+
I have procedures that update the SomeValue and FullPath values after, so don't worry about those! I'm interested in how to reproduce the hierarchy
Here is the code to insert sample data:
CREATE TABLE #Data
(
ID INT IDENTITY(1,1),
ParentID INT,
SomeValue VARCHAR(30),
FullPath VARCHAR(255)
)
INSERT INTO #Data VALUES(NULL,'A','(1)')
INSERT INTO #Data VALUES('1','A.1','(1)/(2)')
INSERT INTO #Data VALUES('2','A.1.1','(1)/(2)/(3)')
INSERT INTO #Data VALUES(NULL,'B','(4)')
INSERT INTO #Data VALUES('4','B.1','(4)/(5)')
INSERT INTO #Data VALUES('4','B.2','(4)/(6)')
INSERT INTO #Data VALUES('6','B.2.1','(4)/(6)/(7)')

OK, let's not beat around the bush, this is pretty messy, and takes a couple of sweeps.
We need to first use a MERGE here (with no UPDATE clause) so that we can OUTPUT the new and old ID values into a table variable. Then, afterwards we need to use an UPDATE to update all the paths for the new path.
You could likely UPDATE the prior level in the MERGE and at the same time INSERT the current level within the MERGE, however, I didn't go down that path, as it was potentially messier. Therefore, after inserting the rows, I use a further rCTe to create the new paths and UPDATE them.
This gives you the below (annotated) SQL:
USE Sandbox;
GO
CREATE TABLE dbo.Data
(
ID INT IDENTITY(1,1),
ParentID INT,
SomeValue VARCHAR(30),
FullPath VARCHAR(255)
)
INSERT INTO dbo.Data
--VALUES has supported multiple rows in 2008, you should be making use of it.
VALUES(NULL,'A','(1)')
,('1','A.1','(1)/(2)')
,('2','A.1.1','(1)/(2)/(3)')
,(NULL,'B','(4)')
,('4','B.1','(4)/(5)')
,('4','B.2','(4)/(6)')
,('6','B.2.1','(4)/(6)/(7)')
GO
--There are your parameters
DECLARE #BranchToCopy int,
#CopysParent int;
SET #BranchToCopy = 4;
SET #CopysParent = 2;
--Table which will have the data to INSERT in
DECLARE #NewData table (ID int,
ParentID int,
SomeValue varchar(30),
FullPath varchar(255),
Level int);
--Will be used in the MERGE's OUTPUT clause to link the new and old IDs
DECLARE #Keys table (OldID int,
NewID int,
Level int);
--Get the hierachical data and INSERT into the #NewData variable
WITH rCTE AS(
SELECT D.ID,
D.ParentID,
D.SomeValue,
D.FullPath,
1 AS Level
FROM dbo.Data D
WHERE ID = #BranchToCopy
UNION ALL
SELECT D.ID,
D.ParentID,
D.SomeValue,
D.FullPath,
r.[Level] + 1
FROM dbo.Data D
JOIN rCTE r ON D.ParentID = r.ID)
INSERT INTO #NewData (ID,ParentID,SomeValue,FullPath,Level)
SELECT r.ID,
r.ParentID,
CONCAT(r.SomeValue,'-Copy'),
r.FullPath,
r.[Level]
FROM rCTE r;
--Uncomment to see results
--SELECT *
--FROM #NewData;
--Yes, we're using a WHILE!
--This, however, is what is known as a "set based loop"
DECLARE #i int = 1;
WHILE #i <= (SELECT MAX(Level) FROM #NewData) BEGIN
--We use MERGE here as it allows us to OUTPUT columns that weren't inserted into the table
MERGE INTO dbo.Data USING (SELECT ND.ID,
CASE ND.ID WHEN #BranchToCopy THEN #CopysParent ELSE K.NewID END AS Parent,
ND.SomeValue,
ND.Level
FROM #NewData ND
LEFT JOIN #Keys K ON ND.ParentID = K.OldID
WHERE ND.Level = #i) U ON 0=1
WHEN NOT MATCHED THEN
INSERT (ParentID, SomeValue)
VALUES (U.Parent, U.SomeValue)
OUTPUT U.ID, inserted.ID, U.Level
INTO #Keys (OldID, NewID, Level);
--Increment
SET #i = #i + 1;
END;
--Uncomment to see results
--SELECT *
--FROM dbo.[Data];
--Now we need to do the FullPath, as that would be a pain to do on the fly
DECLARE #Paths table (ID int, NewPath varchar(255));
--Work out the new paths
WITH rCTE AS(
SELECT D.ID,
D.ParentID,
D.SomeValue,
D.FullPath,
CONVERT(varchar(255),NULL) AS NewPath
FROM dbo.Data D
WHERE D.ID = #CopysParent
UNION ALL
SELECT D.ID,
D.ParentID,
D.SomeValue,
D.FullPath,
CONVERT(varchar(255),CONCAT(ISNULL(r.FullPath,r.NewPath),'/(',D.ID,')'))
FROM dbo.Data D
JOIN rCTE r ON D.ParentID = r.ID
JOIN #Keys K ON D.ID = K.NewID) --As we want only the new rows
INSERT INTO #Paths (ID, NewPath)
SELECT ID, NewPath
FROM rCTe
WHERE FullPath IS NULL;
--Update the table
UPDATE D
SET FullPath = P.NewPath
FROM dbo.Data D
JOIN #Paths P ON D.ID = P.ID;
SELECT *
FROM dbo.Data;
GO
--Clean up
DROP TABLE dbo.Data;
DB<>Fiddle

Here's a solution, using only CTE's:
The path config as ( ... defines the from and to ids to be used for the computation. This could all be done in a TVF.
WITH T AS (
select 1 id, null parentid, 'A' somevalue, '(1)' fullpath union all
select 2 id, 1 parentid, 'A.1' somevalue, '(1)/(2)' fullpath union all
select 3 id, 2 parentid, 'A.1.1' somevalue, '(1)/(2)/(3)' fullpath union all
select 4 id, NULL parentid, 'B' somevalue, '(4)' fullpath union all
select 5 id, 4 parentid, 'B.1' somevalue, '(4)/(5)' fullpath union all
select 6 id, 4 parentid, 'B.2' somevalue, '(4)/(6)' fullpath union all
select 7 id, 6 parentid, 'B.2.1' somevalue, '(4)/(6)/(7)' fullpath
)
, config as (
select 4 from_id, 2 to_id
)
, maxid as (
select max(id) maxid from t
)
, initpath as (
select fullpath from t cross join config where id = to_id
)
, subset_from as (
select t.*, maxid + ROW_NUMBER() over (order by id) new_id, ROW_NUMBER() over (order by id) rn from t cross join config cross join maxid where fullpath like '(' + cast(from_id as varchar) + ')%'
)
, subset_count as (
select count(*) subset_count from subset_from
)
, fullpath_replacements (id, parentid, somevalue, new_id, fullpath, new_fullpath, lvl) as (
select id, parentid, somevalue, new_id, fullpath, replace(fullpath, '(' + cast((select sf.id from subset_from sf where rn = 1) as varchar) + ')', '(' + cast((select sf.new_id from subset_from sf where rn = 1) as varchar) + ')'), 1
from subset_from
union all
select id, parentid, somevalue, new_id, fullpath, replace(new_fullpath, '(' + cast((select sf.id from subset_from sf where sf.rn = fr.lvl + 1) as varchar) + ')', '(' + cast((select sf.new_id from subset_from sf where sf.rn = fr.lvl + 1) as varchar) + ')'), fr.lvl + 1
from fullpath_replacements fr where fr.lvl < (select subset_count from subset_count)
)
, final_replacement as (
select id, parentid, somevalue, new_id, fullpath, (select fullpath from t where t.id = (select to_id from config)) + '/' + new_fullpath new_fullpath, isnull((select sf.new_id from subset_from sf where sf.id = fr.parentid), (select to_id from config)) new_parentid
from fullpath_replacements fr where fr.lvl = (select subset_count from subset_count)
)
select id, parentid, somevalue, fullpath
from (
select * from t
union all
select new_id, new_parentid, somevalue, new_fullpath from final_replacement
) t order by id
The idea is to create new ids with the row_number window function (see subset_from part).
Then make the replacements in the fullpath id by id. That is done using a recursive CTE fullpath_replacements to simulate a loop.
This works because in the fullpath I can always use the brackets to identify which part of the fullpath needs to be exchanged.
This is the output:

Concatenate Multiple rows of a table in SQL Server 2014 / SQL Server 2016 [duplicate]

This question already has answers here:
How to use GROUP BY to concatenate strings in SQL Server?
(22 answers)
How to concatenate text from multiple rows into a single text string in SQL Server
(47 answers)
Closed 3 years ago.
I have a table like this :
id | movie | actorid | actor | roleid | rolename
----+---------+---------+---------+--------+------------------
1 | mi3 | 121 | tom | 6 | actor
2 | avenger | 104 | scarlett| 4 | actress
2 | avenger | 3 | russo | 2 | action director
I'm expecting the output like :
id | movie | actorid | actor | roleid | rolename
----+---------+---------+----------------+--------+--------------------------
1 | mi3 | 121 | tom | 6 | actor
2 | avenger | 104,3 | scarlett,russo | 4,2 | actress, action director
For latest SQL Server version, I saw the STRING_AGG function to concatenate columns or row data. But how can I achieve the expected output with SQL Server 2014 using STUFF ?

Try this:
DECLARE #DataSource TABLE
(
[id] INT
,[movie] VARCHAR(12)
,[actiorid] INT
,[actor] VARCHAR(12)
,[roleid] INT
,[rolename] VARCHAR(36)
);
INSERT INTO #DataSource ([id], [movie], [actiorid], [actor], [roleid], [rolename])
VALUES (1, 'mi3 ', 121, 'tom ', 6, 'actor')
,(2, 'avenger', 104, 'scarlett', 4, 'actress')
,(2, 'avenger', 3, 'russo', 2, 'action director');
-- SQL Server 2017
SELECT [id]
,[movie]
,STRING_AGG([actiorid], ',') AS [actorid]
,STRING_AGG([actor], ',') AS [actor]
,STRING_AGG([roleid], ',') AS [roleid]
,STRING_AGG([rolename], ',') AS [rolename]
FROM #DataSource
GROUP BY [id]
,[movie];
-- SQL Server
WITH DataSoruce AS
(
SELECT DISTINCT [id]
,[movie]
FROM #DataSource
)
SELECT *
FROM DataSoruce A
CROSS APPLY
(
SELECT STUFF
(
(
SELECT DISTINCT ',' + CAST([actiorid] AS VARCHAR(12))
FROM #DataSource S
WHERE A.[id] = S.[id]
AND A.[movie] = S.[movie]
FOR XML PATH, TYPE
).value('.', 'VARCHAR(MAX)')
,1
,1
,''
)
) R1 ([actiorid])
CROSS APPLY
(
SELECT STUFF
(
(
SELECT DISTINCT ',' + CAST([actor] AS VARCHAR(12))
FROM #DataSource S
WHERE A.[id] = S.[id]
AND A.[movie] = S.[movie]
FOR XML PATH, TYPE
).value('.', 'VARCHAR(MAX)')
,1
,1
,''
)
) R2 ([actor])
CROSS APPLY
(
SELECT STUFF
(
(
SELECT DISTINCT ',' + CAST([roleid] AS VARCHAR(12))
FROM #DataSource S
WHERE A.[id] = S.[id]
AND A.[movie] = S.[movie]
FOR XML PATH, TYPE
).value('.', 'VARCHAR(MAX)')
,1
,1
,''
)
) R3 ([roleid])
CROSS APPLY
(
SELECT STUFF
(
(
SELECT DISTINCT ',' + CAST([rolename] AS VARCHAR(12))
FROM #DataSource S
WHERE A.[id] = S.[id]
AND A.[movie] = S.[movie]
FOR XML PATH, TYPE
).value('.', 'VARCHAR(MAX)')
,1
,1
,''
)
) R4 ([rolename]);

Select ID for corresponding max date using GROUP BY

My table structure as below
Category Sex Last Modified Date Id
7 2 2015-01-16 87603
7 1 2014-11-27 87729
7 2 2018-09-06 87135
7 1 2017-12-27 87568
My sql query as below
SELECT
MAX(Id) AS Id
FROM
Table
GROUP BY
Category, Sex
Result as below
87603
87729
But I would like to get Id as Max Last Modified Date. Correct result should be as below
87135
87568

You can use ROW_NUMBER() to find most recent row per group:
SELECT Id, LastModifiedDate
FROM (
SELECT Id, LastModifiedDate, ROW_NUMBER() OVER (PARTITION BY Category, Sex ORDER BY LastModifiedDate DESC) AS rnk
FROM t
) AS cte
WHERE rnk = 1
Use RANK() if you're interested in finding all rows with ties for LastModifiedDate.

You can also get it as
SELECT T.*
FROM
(
SELECT Sex,
MAX([Last Modified Date]) [Last Modified Date],
Category
FROM T
GROUP BY Sex,
Category
) TT INNER JOIN T ON T.[Last Modified Date] = TT.[Last Modified Date]
WHERE T.Sex = TT.Sex
AND
T.Category = TT.Category;
Returns:
+----------+-----+---------------------+-------+
| Category | Sex | Last Modified Date | Id |
+----------+-----+---------------------+-------+
| 7 | 2 | 06/09/2018 00:00:00 | 87135 |
| 7 | 1 | 27/12/2017 00:00:00 | 87568 |
+----------+-----+---------------------+-------+

We can get the solution by joining the same table with its grouped set:
SELECT MIN(T.Id)
FROM Table T
INNER JOIN (SELECT Category,
Sex,
MAX(LastModifiedDate) AS LastModifiedDate
FROM Table
GROUP BY Category, Sex) GT
ON GT.Category = T.Category
AND GT.Sex = T.Sex
AND GT.LastModifiedDate = T.LastModifiedDate
GROUP BY T.Category, T.Sex

Other option is to use correlated subquery :
select t.*
from table t
where t.LastModifiedDate = (select max(t1.LastModifiedDate)
from table t1
where t1.Category = t.Category and t1.Sex = t.Sex
);

Here are a few different approaches... (in no particular order)
IF OBJECT_ID('tempdb..#TestData', 'U') IS NOT NULL
DROP TABLE #TestData;
GO
CREATE TABLE #TestData (
Category TINYINT NOT NULL,
Sex TINYINT NOT NULL,
LastModifiedDate DATE NOT NULL,
Id INT NOT NULL
);
GO
INSERT #TestData(Category, Sex, LastModifiedDate, Id) VALUES
(7, 2, '2015-01-16', 87603),
(7, 1, '2014-11-27', 87729),
(7, 2, '2018-09-06', 87135),
(7, 1, '2017-12-27', 87568);
GO
/* nonclustered index to support the query. */
CREATE UNIQUE NONCLUSTERED INDEX ix_TestData_Category_Sex_LastModifiedDate
ON #TestData (Category ASC, Sex ASC, LastModifiedDate DESC)
INCLUDE (Id);
GO
--====================================================
-- option 1: TOP(n) WITH TIES...
SELECT TOP (1) WITH TIES
td.Id
FROM
#TestData td
ORDER BY
ROW_NUMBER() OVER (PARTITION BY td.Category, td.Sex ORDER BY td.LastModifiedDate DESC);
GO
-----------------------------------------------------
-- option 2: Filter on ROW_NUMBER()...
WITH
cte_AddRN AS (
SELECT
td.Id,
rn = ROW_NUMBER() OVER (PARTITION BY td.Category, td.Sex ORDER BY td.LastModifiedDate DESC)
FROM
#TestData td
)
SELECT
arn.Id
FROM
cte_AddRN arn
WHERE
arn.rn = 1;
GO
-----------------------------------------------------
-- option 3: binary concatination...
SELECT
Id = CONVERT(INT, SUBSTRING(MAX(bv.bin_val), 4, 4))
FROM
#TestData td
CROSS APPLY ( VALUES (CONVERT(BINARY(3), td.LastModifiedDate) + CONVERT(BINARY(4), td.Id)) ) bv (bin_val)
GROUP BY
td.Category,
td.Sex;
GO
--====================================================

Select all second highest values only from temp table

Using T-SQL (SQL Server 2008 R2), I'm trying to list only the rows with the second highest value in a particular column from a temp table and then place the results into a new temp table. The PK is the ID, which can have increasing version numbers and then unique codes.
Example:
ID | Name| Version | Code
------------------------
1 | A | 1 | 10
1 | A | 2 | 20
1 | A | 3 | NULL
2 | B | 1 | 40
2 | B | 2 | 50
2 | C | 1 | 60
The desired outcome of the query is
ID | Version | Code
------------------------
1 | 2 | 20
2 | 1 | 40
To achieve this I need the below query to be adapted to pull the second highest value as long as the result gives a version number greater than 1. These results come from a temp table and will then be placed into a final results temp table. EDIT: Please note this will be applied over 33000 rows of data so I would prefer something neater than INSERT VALUES. Thanks.
Current query:
SELECT
ID
,Version
,Code
INTO
#table2
FROM
#table1
SELECT *
FROM #table2
WHERE Version > 1
ORDER BY ID asc
DROP TABLE #table1
DROP TABLE #table2
I have tried running the where clause WHERE Version < (SELECT MAX(VERSION) FROM #TABLE 2) but this has no effect, presumably due to the unique code values and in any case wouldn't work where I have more than 3 Versions.
Ideas would be gratefully received.
Thanks in advance.

i HAVE TEST THE BELOW CODE AND IT IS GIVING OUTPUT AS PER The YOUR desired outcome of the query is
SELECT ID,Name,[Version],Code
FROM (
SELECT ROW_NUMBER() OVER (PARTITION BY NAME ORDER BY [Version] DESC) AS RNK,*
FROM
(
SELECT 1 ID, 'A' Name ,1 [Version] ,10 Code
UNION ALL
SELECT 1, 'A', 2 ,20
UNION ALL
SELECT 1, 'A', 3 ,30
UNION ALL
SELECT 1, 'A', 4 ,NULL
UNION ALL
SELECT 2, 'B', 1 ,40
UNION ALL
SELECT 2, 'B', 2 ,50
UNION ALL
SELECT 2, 'C', 1 ,60
)B
)BASE
WHERE RNK =2

If your primary key is only ID, you have duplicate rows. So I assume your primary key is something else, for example ID, Version, Name. You have two rows with the same ID and same Version, what kind of rule do you want to apply on this ? Lowest number ?
I made an example that does kind of what you want:
First declare the necessary tables:
declare #table1 table (
Id int,
Name nvarchar(20),
[Version] int,
Code int
)
insert into #table1 values (1,'A',1,10),(1,'A',2,20),(1,'A',3,30),(1,'A',4,NULL)
,(2,'B',1,40),(2,'B',2,50),(2,'C',1,60);
And then the query to get the results:
with HighestVersions (Id, MaxVersion) As
(
select Id, max(version) from #table1 group by Id
)
select
t1.Id,
t1.[Version],
min(t1.Code) as Code
from
#table1 t1
inner join
HighestVersions hv
on
hv.Id = t1.Id
and (hv.MaxVersion-1) = t1.[Version]
group by
t1.Id
,t1.[Version]
I had to do a little dirty trick with the outermost select, this is because of the duplicate 'Id' and 'Version'. Else you would have gotten two rows with ID = 2, Version = 1
If you want to remove the NULL value you can change the WITH part (according to your last edit):
with HighestVersions (Id, MaxVersion) As
(
select Id, max(version) from #table1 where Code is not null group by Id
)

Try this:
DECLARE #List TABLE (ID int, Name char(1), Version int, Code int NULL)
INSERT INTO #List
VALUES
(1, 'A', 1, 10),
(1, 'A', 2, 20),
(1, 'A', 3, 30),
(1, 'A', 4, NULL),
(2, 'B', 1, 40),
(2, 'B', 2, 50),
(2, 'C', 1, 60)
SELECT
ID, Name, Version, Code
FROM
(
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY ID, Name ORDER BY Version DESC) Rn
FROM #List
) a
WHERE
a.Rn = 2

Recursive sum in tree structure

I have a tree struture in a single table. The table is a tree of categories that can be nested endlessly. Each category has a ProductCount column that tells how many products are directly in the category (not summing child categories).
Id | ParentId | Name | ProductCount
------------------------------------
1 | -1 | Cars | 0
2 | -1 | Bikes | 1
3 | 1 | Ford | 10
4 | 3 | Mustang | 7
5 | 3 | Focus | 4
I would like to make a sql query that for each row/category gives me the number of products including the ones in the child categories.
The output for the table above should be
Id | ParentId | Name | ProductCount | ProductCountIncludingChildren
--------------------------------------------------------------------------
1 | -1 | Cars | 0 | 21
2 | -1 | Bikes | 1 | 1
3 | 1 | Ford | 10 | 21
4 | 3 | Mustang | 7 | 7
5 | 3 | Focus | 4 | 4
I know I probably should use CTE, but cant quite get it working the way it should.
Any help is appreciated!

You can use a recursive CTE where you in the anchor part get all rows and in the recursive part join to get the child rows. Remember the original Id aliased RootID from the anchor part and do sum aggregate in the main query grouped by RootID.
SQL Fiddle
MS SQL Server 2012 Schema Setup:
create table T
(
Id int primary key,
ParentId int,
Name varchar(10),
ProductCount int
);
insert into T values
(1, -1, 'Cars', 0),
(2, -1, 'Bikes', 1),
(3, 1, 'Ford', 10),
(4, 3, 'Mustang', 7),
(5, 3, 'Focus', 4);
create index IX_T_ParentID on T(ParentID) include(ProductCount, Id);
Query 1:
with C as
(
select T.Id,
T.ProductCount,
T.Id as RootID
from T
union all
select T.Id,
T.ProductCount,
C.RootID
from T
inner join C
on T.ParentId = C.Id
)
select T.Id,
T.ParentId,
T.Name,
T.ProductCount,
S.ProductCountIncludingChildren
from T
inner join (
select RootID,
sum(ProductCount) as ProductCountIncludingChildren
from C
group by RootID
) as S
on T.Id = S.RootID
order by T.Id
option (maxrecursion 0)
Results:
| ID | PARENTID | NAME | PRODUCTCOUNT | PRODUCTCOUNTINCLUDINGCHILDREN |
|----|----------|---------|--------------|-------------------------------|
| 1 | -1 | Cars | 0 | 21 |
| 2 | -1 | Bikes | 1 | 1 |
| 3 | 1 | Ford | 10 | 21 |
| 4 | 3 | Mustang | 7 | 7 |
| 5 | 3 | Focus | 4 | 4 |

This is the same concept as Tom's answer, but less code (and way faster).
with cte as
(
select v.Id, v.ParentId, v.Name, v.ProductCount,
cast('/' + cast(v.Id as varchar) + '/' as varchar) Node
from Vehicle v
where ParentId = -1
union all
select v.Id, v.ParentId, v.Name, v.ProductCount,
cast(c.Node + CAST(v.Id as varchar) + '/' as varchar)
from Vehicle v
join cte c on v.ParentId = c.Id
)
select c1.Id, c1.ParentId, c1.Name, c1.ProductCount,
c1.ProductCount + SUM(isnull(c2.ProductCount, 0)) ProductCountIncludingChildren
from cte c1
left outer join cte c2 on c1.Node <> c2.Node and left(c2.Node, LEN(c1.Node)) = c1.Node
group by c1.Id, c1.ParentId, c1.Name, c1.ProductCount
order by c1.Id
SQL Fiddle (I added some extra data rows for testing)

Actually this could be a good use of HIERARCHYID in SQL Server..
CREATE TABLE [dbo].[CategoryTree]
(
[Id] INT,
[ParentId] INT,
[Name] VARCHAR(100),
[ProductCount] INT
)
GO
INSERT [dbo].[CategoryTree]
VALUES
(1, -1, 'Cars', 0),
(2, -1, 'Bikes', 1),
(3, 1, 'Ford', 10),
(4, 3, 'Mustang', 7),
(5, 3, 'Focus', 4)
--,(6, 1, 'BMW', 100)
GO
Query
WITH [cteRN] AS (
SELECT *,
ROW_NUMBER() OVER (
PARTITION BY [ParentId] ORDER BY [ParentId]) AS [ROW_NUMBER]
FROM [dbo].[CategoryTree]
),
[cteHierarchy] AS (
SELECT CAST(
CAST(hierarchyid::GetRoot() AS VARCHAR(100))
+ CAST([ROW_NUMBER] AS VARCHAR(100))
+ '/' AS HIERARCHYID
) AS [Node],
*
FROM [cteRN]
WHERE [ParentId] = -1
UNION ALL
SELECT CAST(
hierarchy.Node.ToString()
+ CAST(RN.[ROW_NUMBER] AS VARCHAR(100)
) + '/' AS HIERARCHYID),
rn.*
FROM [cteRN] rn
INNER JOIN [cteHierarchy] hierarchy
ON rn.[ParentId] = hierarchy.[Id]
)
SELECT x.[Node].ToString() AS [Node],
x.[Id], x.[ParentId], x.[Name], x.[ProductCount],
x.[ProductCount] + SUM(ISNULL(child.[ProductCount],0))
AS [ProductCountIncludingChildren]
FROM [cteHierarchy] x
LEFT JOIN [cteHierarchy] child
ON child.[Node].IsDescendantOf(x.[Node]) = 1
AND child.[Node] <> x.[Node]
GROUP BY x.[Node], x.[Id], x.[ParentId], x.[Name], x.[ProductCount]
ORDER BY x.[Id]
Result

This wont be optimal but it works, however it involves 2 CTEs. 1 main CTE and a CTE in a table valued function to sum up the values for each sub tree.
The first CTE
;WITH cte
AS
(
SELECT
anchor.Id,
anchor.ParentId,
anchor.Name,
anchor.ProductCount,
s.Total AS ProductCountIncludingChildren
FROM
testTable anchor
CROSS APPLY SumChild(anchor.id) s
WHERE anchor.parentid = -1
UNION ALL
SELECT
child.Id,
child.ParentId,
child.Name,
child.ProductCount,
s.Total AS ProductCountIncludingChildren
FROM
cte
INNER JOIN testTable child on child.parentid = cte.id
CROSS APPLY SumChild(child.id) s
)
SELECT * from cte
AND the function
CREATE FUNCTION SumChild
(
#id int
)
RETURNS TABLE
AS
RETURN
(
WITH cte
AS
(
SELECT
anchor.Id,
anchor.ParentId,
anchor.ProductCount
FROM
testTable anchor
WHERE anchor.id = #id
UNION ALL
SELECT
child.Id,
child.ParentId,
child.ProductCount
FROM
cte
INNER JOIN testTable child on child.parentid = cte.id
)
SELECT SUM(ProductCount) AS Total from CTE
)
GO
Which results in:
from the source table
Apologies about formatting.

I couldn't come up with a good T-SQL, set based answer, but I did come up with an answer:
The temp table mimics your table structure. The table variable is a work table.
--Initial table
CREATE TABLE #products (Id INT, ParentId INT, NAME VARCHAR(255), ProductCount INT)
INSERT INTO #products
( ID,ParentId, NAME, ProductCount )
VALUES ( 1,-1,'Cars',0),(2,-1,'Bikes',1),(3,1,'Ford',10),(4,3,'Mustang',7),(5,3,'Focus',4)
--Work table
DECLARE #products TABLE (ID INT, ParentId INT, NAME VARCHAR(255), ProductCount INT, ProductCountIncludingChildren INT)
INSERT INTO #products
( ID ,
ParentId ,
NAME ,
ProductCount ,
ProductCountIncludingChildren
)
SELECT Id ,
ParentId ,
NAME ,
ProductCount,
0
FROM #products
DECLARE #i INT
SELECT #i = MAX(id) FROM #products
--Stupid loop - loops suck
WHILE #i > 0
BEGIN
WITH cte AS (SELECT ParentId, SUM(ProductCountIncludingChildren) AS ProductCountIncludingChildren FROM #products GROUP BY ParentId)
UPDATE p1
SET p1.ProductCountIncludingChildren = p1.ProductCount + isnull(p2.ProductCountIncludingChildren,0)
FROM #products p1
LEFT OUTER JOIN cte p2 ON p1.ID = p2.ParentId
WHERE p1.ID = #i
SELECT #i = #i - 1
END
SELECT *
FROM #products
DROP TABLE #products
I'd be very interested to see a better, set based approach. The problem I ran into is that when you use recursive cte's, you start with the parent and work toward the children - this doesn't really work for getting a sum at the parent levels. You'd have to do some kind of backward recursive cte.