TSql equality on groups of rows - sql-server

I have a table that contains information on groups. There can be any number of members in a group. There is a group identifier and then an element identifier. I want to be able to in a single statement determine whether or not a given set exists in the table
#groupTable is an example of the data that already exists in the database
#inputData is the data that I want to see if it already exists in #groupTable
declare #groupData table
(
groupIdentifier int,
elementIdentifier uniqueidentifier
)
insert into #groupData values
(1, 'dfce40b1-3719-4e4c-acfa-65f728677700'),
(1, '89e7e6be-cee8-40a7-8135-a54659e0d88c')
declare #inputData table
(
tempGroupIdentifier int,
elementIdentifier uniqueidentifier
)
insert into #inputData values
(42, 'dfce40b1-3719-4e4c-acfa-65f728677700'),
(42, '89e7e6be-cee8-40a7-8135-a54659e0d88c'),
(55, 'dfce40b1-3719-4e4c-acfa-65f728677700'),
(55, '2395a42c-94f4-4cda-a773-221b26ea5e44'),
(55, 'f22db9df-a1f4-4078-b74c-90e34376eff6')
Now I want to run a query that will show the relationship of the sets, showing which groupIdentifier is associated with which tempGroupIdentifier. If there is no matching set then I need to know that too.
desired output:
groupIdentifier, tempGroupIdentifier
1, 42
null, 55
Does anyone any suggestions on how to approach this problem?
I could probably pivot the rows and concat all elementIdentifiers into a giant string for each group that then do equality on, but that doesn't seem like a good solution.

SELECT DISTINCT
T1.tempgroupIdentifier, T2.GroupIdentifier
FROM
(
SELECT
COUNT(*) OVER (PARTITION BY tempgroupIdentifier) AS GroupCount,
ROW_NUMBER() OVER (PARTITION BY tempgroupIdentifier ORDER BY elementIdentifier) AS GroupRN,
tempgroupIdentifier, elementIdentifier
FROM
#inputData
) T1
LEFT JOIN
(
SELECT
COUNT(*) OVER (PARTITION BY GroupIdentifier) AS GroupCount,
ROW_NUMBER() OVER (PARTITION BY GroupIdentifier ORDER BY elementIdentifier) AS GroupRN,
GroupIdentifier, elementIdentifier
FROM
#groupData
) T2 ON T1.elementIdentifier = T2.elementIdentifier AND
T1.GroupCount = T2.GroupCount AND
T1.GroupRN = T2.GroupRN
Edit: this will also deal with the same value in a given set

SELECT
(
CASE WHEN matchCount = gdCount AND matchCount = idCount
THEN groupIdentifier
ELSE NULL
END) groupIdentifier,
cj.tempGroupIdentifier
FROM
(
SELECT gd.groupIdentifier, id.tempGroupIdentifier, COUNT(1) matchCount
FROM #groupData gd
CROSS JOIN #inputData id
WHERE id.elementIdentifier = gd.elementIdentifier
GROUP BY gd.groupIdentifier, id.tempGroupIdentifier) as cj
CROSS APPLY (SELECT COUNT(groupIdentifier) from #groupData gdca WHERE gdca.groupIdentifier = cj.groupIdentifier) as gdc(gdCount)
CROSS APPLY (SELECT COUNT(tempGroupIdentifier) from #inputData idca WHERE idca.tempGroupIdentifier = cj.tempGroupIdentifier) as idc(idCount)

Related

row number without using the ROW_NUMBER window function

Suppose you have a table with non-unique values such as this:
CREATE TABLE accounts ( fname VARCHAR(20), lname VARCHAR(20))
GO
INSERT accounts VALUES ('Fred', 'Flintstone')
INSERT accounts VALUES ('Fred', 'Flintstone')
INSERT accounts VALUES ('Fred', 'Flintstone')
SELECT * FROM accounts
GO
Now using a ROW_NUMBER function, you can get a unique incrementing row number.
select *, ROW_NUMBER() over(order by (select null)) as rn
from accounts
But how do we this without using a ROW_NUMBER function. I tried giving each row a unique ID using NEWID() and then counting the rows as given below but it did not work as it gives me a non-unique number which does not start with 1.
Note that I do not want to alter the table to add a new column.
;with cte as
(select *
from accounts as e
cross apply (select newid()) as a(id)
)
select *, (select count(*)+1 from cte as c1 where c.id > c1.id) as rn
from cte as c
order by rn
SQL Fiddle for toying around is http://sqlfiddle.com/#!18/c270f/3/0
The following demonstrates why your code fails, but does not provide an alternative to Row_Number().
A column, TopId, is added to the final select that should get the minimum value generated by NewId() and report it in every row. Instead, a new value is generated for each row.
-- Sample data.
declare #Samples as Table ( FName VarChar(20), LName VarChar(20) );
insert into #Samples ( FName, LName ) values
( 'Fred', 'Flintstone' ), ( 'Fred', 'Flintstone' ), ( 'Fred', 'Flintstone' );
select * from #Samples;
-- Cross apply NewId() in a CTE.
;with cte as
( select *
from #Samples as S
cross apply ( select NewId() ) as Ph( Id ) )
select *, ( select count(*) from cte as c1 where c1.Id >= c.Id ) as RN,
-- The following column should output the minimum Id value from the table for every row.
-- Instead, it generates a new unique identifier for each row.
( select top 1 id from cte order by id ) as TopId
from cte as c
order by RN;
The execution plan shows that the CTE is treated as a view that is being evaluated repeatedly, thus generating conflicting Id values.
How about this:
SELECT
src.*,
SUM(DummyVal) OVER(ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS RowId
FROM (
SELECT a.*, 1 AS DummyVal
FROM MyTable a
) src
It's still a window function, though, not sure if that matters.
Fiddle me this
You can create an function yourself to compute the row_number,
In this example, I had to calculate an index for a lesson within a course.
Window Function version:
SELECT *, ROW_NUMBER() OVER(PARTITION BY courseId) AS row_num FROM lessons;
I created helper-function, to compute the row_number without window function:
DELIMITER $$
CREATE FUNCTION getRowNumber (lessonId int, courseId int)
RETURNS int
DETERMINISTIC
BEGIN
DECLARE count int;
select count(l2.id) into count from lessons l2 where l2.courseId=courseId
and l2.id<=lessonId;
RETURN count;
END$$
DELIMITER ;
so, the final query is:
SELECT l.*, getRowNumber(l.id,l.courseId) as row_num FROM lessons l;
got the same result as the first query!
MySQL:
SELECT #rownum := #rownum + 1 AS rank, a.*
FROM accounts a,(SELECT #rownum := 0) r;
In ORACLE it would simply be
SELECT ROWNUM, a.*
FROM accounts a;
Both without window

SQL Server: How do I get the highest value not set of an int column?

Let's take an example. These are the rows of the table I want get the data:
The column I'm talking about is the reference one. The user can set this value on the web form, but the system I'm developing must suggest the lowest reference value still not used.
As you can see, the smallest value of this column is 35. I could just take the smaller reference and sum 1, but, in that case, the value 36 is already used. So, the value I want is 37.
Is there a way to do this without a loop verification? This table will grow so much.
This is for 2012+
DECLARE #Tbl TABLE (id int, reference int)
INSERT INTO #Tbl
( id, reference )
VALUES
(1, 49),
(2, 125),
(3, 35),
(4, 1345),
(5, 36),
(6, 37)
SELECT
MIN(A.reference) + 1 Result
FROM
(
SELECT
*,
LEAD(reference) OVER (ORDER BY reference) Tmp
FROM
#Tbl
) A
WHERE
A.reference - A.Tmp != -1
Result: 37
Here is yet another place where the tally table is going to prove invaluable. In fact it is so useful I keep a view on my system that looks like this.
create View [dbo].[cteTally] as
WITH
E1(N) AS (select 1 from (values (1),(1),(1),(1),(1),(1),(1),(1),(1),(1))dt(n)),
E2(N) AS (SELECT 1 FROM E1 a cross join E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a cross join E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS
(
SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
)
select N from cteTally
Next of course we need some sample data and table to hold it.
create table #Something
(
id int identity
, reference int
, description varchar(10)
)
insert #Something (reference, description)
values (49, 'data1')
, (125, 'data2')
, (35, 'data3')
, (1345, 'data4')
, (36, 'data5')
, (7784, 'data6')
Now comes the magic of the tally table.
select top 1 t.N
from cteTally t
left join #Something s on t.N = s.reference
where t.N >= (select MIN(reference) from #Something)
and s.id is null
order by t.N
This is ugly, but should get the job done:
select
top 1 reference+1
from
[table]
where
reference+1 not in (select reference from [table])
order by reference
I used a table valued express to get the next value. I first left outer joined the table to itself (shifting the key in the join by +1). I then looked only at rows that had no corresponding match (b.ID is null). The minimum a.ReferenceID + 1 gives us the answer we are looking for.
create table MyTable
(
ID int identity,
Reference int,
Description varchar(20)
)
insert into MyTable values (10,'Data')
insert into MyTable values (11,'Data')
insert into MyTable values (12,'Data')
insert into MyTable values (15,'Data')
-- Find gap
;with Gaps as
(
select a.Reference+1 as 'GapID'
from MyTable a
left join MyTable b on a.Reference = b.Reference-1
where b.ID is null
)
select min(GapID) as 'NewReference'
from Gaps
NewReference
------------
13
I hope the code was clearer than my description.
CREATE TABLE #T(ID INT , REFERENCE INT, [DESCRIPTION] VARCHAR(50))
INSERT INTO #T
SELECT 1,49 , 'data1' UNION ALL
SELECT 2,125 , 'data2' UNION ALL
SELECT 3,35 , 'data3' UNION ALL
SELECT 4,1345, 'data4' UNION ALL
SELECT 5,36 , 'data5' UNION ALL
SELECT 6,7784, 'data6'
SELECT TOP 1 REFERENCE + 1
FROM #T T1
WHERE
NOT EXISTS
(
SELECT 1 FROM #T T2 WHERE T2.REFERENCE = T1.REFERENCE + 1
)
ORDER BY T1.REFERENCE
--- OR
SELECT MIN(REFERENCE) + 1
FROM #T T1
WHERE
NOT EXISTS
(
SELECT 1 FROM #T T2 WHERE T2.REFERENCE = T1.REFERENCE + 1
)
How about using a Tally table. The following illustrates the concept. It would be better to use a persisted numbers table as opposed to the cte however the code below illustrates the concept.
For further reading as to why you should use a persisted table, check out the following link: sql-auxiliary-table-of-numbers
DECLARE #START int = 1, #END int = 1000
CREATE TABLE #TEST(UsedValues INT)
INSERT INTO #TEST(UsedValues) VALUES
(1),(3),(5),(7),(9),(11),(13),(15),(17)
;With NumberSequence( Number ) as
(
Select #start as Number
union all
Select Number + 1
from NumberSequence
where Number < #end
)
SELECT MIN(Number)
FROM NumberSequence n
LEFT JOIN #TEST t
ON n.Number = t.UsedValues
WHERE UsedValues IS NULL
OPTION ( MAXRECURSION 1000 )
You could try using a descending order:
SELECT DISTINCT reference
FROM `Resultsados`
ORDER BY `reference` ASC;
As far as I know, there is no way to do this without a loop. To prevent multiple values from returning be sure to use DISTINCT.

Column based intersect on two tables

I'm trying to do something similar to a column based intersect on two tables.
The tables are:
LogTag: a log can have zero or more tags
MatchingRule: a matching rule consists of one or more tags that define the rule
A log can have zero or more rules matched to it. I will be passing in a MatchingRuleID and expecting to return all logs that match that rule.
Expected Result: A result set of matching LogIDs. Eg. passing in MatchingRuleID = 30 should return LogID 101. MatchingRuleID = 31 should return LogID 101 & 100.
Also, the LogTag table could have millions of rows so an efficient query is preferred.
The question: How to find all LogIDs that match with a specified rule definition?
Schema:
CREATE TABLE dbo.Tag
(
TagID INT,
TagName NVARCHAR(50)
)
INSERT INTO dbo.Tag (TagID, TagName)
VALUES (1, 'tag1'), (2, 'tag2'), (3, 'tag3')
CREATE TABLE dbo.LogTag
(
LogID INT,
TagID INT
)
INSERT INTO dbo.LogTag (LogID, TagID)
VALUES (100, 1), (101, 1), (101, 2), (101, 3), (101, 4), (102, 2), (102, 3)
CREATE TABLE dbo.MatchingRule
(
MatchingRuleID INT,
TagID INT
)
INSERT INTO dbo.MatchingRule (MatchingRuleID, TagID)
VALUES (30, 1), (30, 2), (30, 3), (31, 1)
Important to have the proper clustered index on the tables. I've put an alternative index in comments for #log_tag which might improve performance for large sets. Since I do not have the proper sample to test on, you will have to verify which is best.
CREATE TABLE #tag(tag_id INT PRIMARY KEY,tag_name NVARCHAR(50));
INSERT INTO #tag (tag_id,tag_name)VALUES
(1,'tag1'),(2,'tag2'),(3,'tag3');
-- Try this key for large sets: PRIMARY KEY(tag_id,log_id));
CREATE TABLE #log_tag(log_id INT,tag_id INT,PRIMARY KEY(log_id,tag_id))
INSERT INTO #log_tag (log_id,tag_id)VALUES
(100,1),(101,1),(101,2),(101,3),(101,4),(102,2),(102,3);
CREATE TABLE #matching_rule(matching_rule_id INT,tag_id INT,PRIMARY KEY(matching_rule_id,tag_id));
INSERT INTO #matching_rule(matching_rule_id,tag_id)VALUES
(30,1),(30,2),(30,3),(31,1);
DECLARE #matching_rule_id INT=31;
;WITH required_tags AS (
SELECT tag_id
FROM #matching_rule
WHERE matching_rule_id=#matching_rule_id
)
SELECT lt.log_id
FROM required_tags AS rt
INNER JOIN #log_tag AS lt ON
lt.tag_id=rt.tag_id
GROUP BY lt.log_id
HAVING COUNT(*)=(SELECT COUNT(*) FROM required_tags);
DROP TABLE #log_tag;
DROP TABLE #matching_rule;
DROP TABLE #tag;
The results are the ones in your Expected Result for both 30 & 31.
Execution plan for the index used in the script:
Try this query
Fiddle Here
DECLARE #InputMatchingRuleId INT = 30
;WITH CTE1
AS
(
SELECT DENSE_RANK() OVER(ORDER BY LT.TAGID) AS RN,LT.TagID,LT.LOGID
FROM MatchingRule MR INNER JOIN LogTag LT ON LT.TagID = MR.TagID
WHERE MatchingRuleID=#InputMatchingRuleId
),
CTE2
AS
(
SELECT 1 AS RN2,LOGID FROM CTE1 C1 WHERE C1.RN=1
UNION ALL
SELECT RN2+1 as RN2,C2.LOGID
FROM CTE1 C1 INNER JOIN CTE2 C2 ON C1.RN = C2.RN2+1 AND C1.LOGID = C2.LOGID
)
SELECT DISTINCT LOGID FROM CTE2
WHERE RN2>(CASE WHEN (SELECT MAX(RN2) FROM CTE2)=1 THEN 0 ELSE 1 END)
NOTE: This will only work with SQL Server 2008+
Here's the query I came up with:
DECLARE #RuleID INT
SELECT #RuleID = 30
SELECT LogID
FROM LogTag lt
INNER JOIN (
SELECT TagID, MatchingRuleID, COUNT(*) OVER (PARTITION BY MatchingRuleID) TagCount
FROM MatchingRule
) mr
ON lt.TagID = mr.TagID
AND mr.MatchingRuleID = #RuleID
GROUP BY LogID, TagCount
HAVING COUNT(*) = TagCount
So basically I match all TagID's within the specified matching rule and then once I know that all tags match I check to see if the count of tags from the MatchingRule table matches the (now filtered and grouped) count of tags from the LogTag table.
should be
; with rules as
(
select TagID, cnt = sum(count(*)) over()
from dbo.MatchingRule
where MatchingRuleID = #MatchingRuleID
group by TagID
)
select LogID
from rules r
inner join LogTag lt on r.TagID = lt.TagID
group by LogID, cnt
having count(*) = r.cnt
select l.LogID
from dbo.MatchingRule r
inner join dbo.LogTag l on l.TagID = r.TagID
where r.MatchingRuleID = 31
another approach is to identify all tags and then:
select l.LogID
from dbo.LogTag l
where exists(select 1 from #Tags t where t.TagID = l.TagID)

SQL - Combine rows

I have rows in a table that looks like this:
[date],[name],[duty],[holiday],[hdaypart],[sick],[sdaypart]
2015-04-27, person1, 1,0,NULL,0,NULL
2015-04-27, person1, 0 1,'fd',0,NULL
I would like to combine these rows to:
[date],[name],[duty],[holiday],[hdaypart],[sick],[sdaypart]
2015-04-27, person1, 1,1,'fd',0,NULL
The duty, holiday and sick columns as BIT columns.
Is there way to do this?
The one solution I can come up with is using subqueries, but it consumes a lot of time. A faster solution would be nice.
This is what I have now:
SELECT DISTINCT [name],[date],[region],[cluster]
,CASE WHEN (SELECT SUM(CONVERT(INT,callduty)) FROM planning AS t2
WHERE t1.[Date] = #datum AND t2.[Name] = t1.[name] AND t2.[Date] = t1.[date] ) > 0
THEN 1 ELSE 0 END AS [CallDuty]
,CASE WHEN (SELECT SUM(CONVERT(INT,holiday)) FROM planning AS t2
WHERE t1.[Date] = #datum AND t2.[Name] = t1.[name] AND t2.[Date] = t1.[date] ) > 0
THEN 1 ELSE 0 END AS [Holiday]
FROM planning AS t1
where t1.[Date] = #datum AND t1.[Name] like #naam
group by t1.[date],t1.[name], t1.Region, t1.cluster
order by t1.[name]
You seem to want to group by date and name and select either the maximum or the not null values within each group. MAX aggregate function is suitable for both of these selections:
SELECT [date],[name], MAX([duty]), MAX([holiday]),
MAX([hdaypart]), MAX([sick]), MAX([sdaypart])
FROM mytable
GROUP BY [date],[name]
By looking at your example, I assume that you want to get the maximum values for a specific user.
You could do this using a group by and max
select max([date]),[name],max([duty]),max([holiday]),max([hdaypart]),max([sick]),max([sdaypart])
from yourtable
group by name
This is not really pretty but should perform better than using subqueries.
EDIT:
If you have columns with bit sql types, use
max(cast([bitColumn] as int))
Adding the date column in the group by, as suggested by Giorgos Betsos, the result is
select [date],
[name],
max([duty]),
max([holiday]),
max(cast([hdaypart] as int)),
max(cast([sick] as int)),
max(cast([sdaypart] as int))
from yourtable
group by [date],[name]
declare #t table ([date] date,[name] varchar(10),[duty] varchar(10),[holiday] int,[hdaypart] varchar(10),[sick] int,[sdaypart]
int
)
insert into #t([date],[name],[duty],[holiday],[hdaypart],[sick],[sdaypart])values ('2015-04-27','person1',1,0,NULL,0,NULL),
('2015-04-27','person1',1,0,'fd',0,NULL)
select MAX([date]),MAX([name]),MAX([duty]),MAX([holiday]),MAX([hdaypart]), [sick],[sdaypart] from #t
group by sick,[sdaypart]
OR
select [date],[name],[duty],[holiday],MAX([hdaypart])AS H,[sick],[sdaypart] from #t
group by [date],[name],[duty],[holiday],[sick],[sdaypart]
UNION
select [date],[name],[duty],[holiday],MAX([hdaypart])AS H,[sick],[sdaypart] from #t
group by [date],[name],[duty],[holiday],[sick],[sdaypart]
CREATE TABLE #Combine
(
[date] date,
[name] VARCHAR(10),
[duty] CHAR(1),
[holiday] CHAR(1),
[hdaypart] CHAR(5),
[sick] CHAR(1),
[sdaypart] VARCHAR(10)
)
INSERT INTO #Combine VALUES('2015-04-27', 'person1', '1','0',NULL,'0',NULL),
('2015-04-27', 'person1', '0','1','fd','0',NULL)
SELECT MAX(Date) [date],MAX(name) [name],MAX(Duty) [duty],MAX(holiday) holiday,
MAX(hdaypart) hdaypart,max(sick) sick,max(sdaypart)sdaypart FROM #Combine

Get all parents for a child

I want to retrieve the parentid of an id, if that parentid has a parent again retrieve it, and so on.
Kind of hierarchy table.
id----parentid
1-----1
5-----1
47894--5
47897--47894
am new to sql server and tried, some queries like:
with name_tree as
(
select id, parentid
from Users
where id = 47897 -- this is the starting point you want in your recursion
union all
select c.id, c.parentid
from users c
join name_tree p on p.id = c.parentid -- this is the recursion
)
select *
from name_tree;
It is giving me only one row.
and also I want to insert these records into a temporary table variable.
How can I do this. thanks in advance. sorry for asking the simple question(though not to me)
Try this to get all parents of a child
;with name_tree as
(
select id, parentid
from Users
where id = 47897 -- this is the starting point you want in your recursion
union all
select C.id, C.parentid
from Users c
join name_tree p on C.id = P.parentid -- this is the recursion
-- Since your parent id is not NULL the recursion will happen continously.
-- For that we apply the condition C.id<>C.parentid
AND C.id<>C.parentid
)
-- Here you can insert directly to a temp table without CREATE TABLE synthax
select *
INTO #TEMP
from name_tree
OPTION (MAXRECURSION 0)
SELECT * FROM #TEMP
Click here to view result
EDIT :
If you want to insert into a table variable, you can do something like:
-- Declare table varialbe
Declare #TABLEVAR table (id int ,parentid int)
;with name_tree as
(
select id, parentid
from #Users
where id = 47897 -- this is the starting point you want in your recursion
union all
select C.id, C.parentid
from #Users c
join name_tree p on C.id = P.parentid -- this is the recursion
-- Since your parent id is not NULL the recursion will happen continously.
-- For that we apply the condition C.id<>C.parentid
AND C.id<>C.parentid
)
-- Here you can insert directly to table variable
INSERT INTO #TABLEVAR
select *
from name_tree
OPTION (MAXRECURSION 0)
SELECT * FROM #TABLEVAR
Click here to view result
Your query is doing recursion but in opposite direction. So if you change starting point to:
where id = 1
then you will have user 1 and all his successors
you didn't mention the desired output and input.
However you can try like this,
Declare #t table (id int ,parentid int)
insert into #t
select 1,1 union all
select 5,1 union all
select 47894,5 union all
select 47897,47894
;With CTE as
(
select * from #t where id=1
union all
Select a.* from #t a inner join cte b
on b.id=a.parentid and
a.id<>b.id
)
select * from cte

Resources