SQL Query Uniqueness with subjoin - sql-server

Help! Here is a very simple a,b,c sample of what I need to accomplish. I have been pulling my hair out. I've written this before but can't get my head around it now! So here it is, with actual and expected results demonstrated below:
set nocount on
declare #a table (id int, a varchar(10))
declare #b table (ref int, b varchar(10), c varchar(20))
insert into #a select 1, 'bingo'
insert into #a select 2, 'bongo'
insert into #b select 1, 'T5', 'asdfwef'
insert into #b select 1, 'T8', 'asfqwez'
insert into #b select 1, 'T6', 'qweoae'
insert into #b select 1, 'T8', 'qzoeqe'
insert into #b select 1, 'T9', 'oqeizef'
insert into #b select 2, 'T3', 'awega'
insert into #b select 2, 'T6', 'fhaeaw'
insert into #b select 2, 'T3', 'fqsegw'
select * from #a a join #b b on a.id = b.ref
-- Expected (Uniqueness is: a’s id to b’s ref and the first b value ingoring b’s c value)
----1,bingo,1,T5,asdfwef
----1,bingo,1,T8,asfqwez
----1,bingo,1,T6,qweoae
----1,bingo,1,T9,oqeizef
----2,bongo,2,T3,awega
----2,bongo,2,T6,fhaeaw
-- Actual
----1,bingo,1,T5,asdfwef
----1,bingo,1,T8,asfqwez
----1,bingo,1,T6,qweoae
----1,bingo,1,T8,qzoeqe
----1,bingo,1,T9,oqeizef
----2,bongo,2,T3,awega
----2,bongo,2,T6,fhaeaw
----2,bongo,2,T3,fqsegw

Your query is returning the correct results. All the matching values from #b.
If you want the first b value, you need to do two things. First, you need to include an ordering column in b so you know what "first" is. Remember, SQL tables are unordered. This is easy:
declare #b table (id int identity(1,1) not null, ref int, b varchar(10), c varchar(20));
You then have to change the inserts to insert all but the id:
insert into #b(ref, b, c) select 1, 'T5', 'asdfwef';
Now you are ready for the actual query:
select *
from #a a join
(select b.*, row_number() over (partition by b.ref, b.b order by b.id) as seqnum
from #b b
) b
on a.id = b.ref and b.seqnum = 1

Related

TSQL IF/ELSE or CASE (UPSERT)

Not sure if IF/ELSE is the right way to go for the following. It always returns ELSE so it seems its not working correctly.
IF ((SELECT COUNT(CAST(StudentuserID AS int)) FROM StudentAttendance WHERE StudentUserID=1)>0)
PRINT 'Yes'
ELSE
PRINT 'No'
This test should result in yes as the data is 8>0
I will be replacing PRINT with an UPDATE ELSE INSERT statement.
IF ((SELECT COUNT(CAST(StudentuserID AS int)) FROM StudentAttendance WHERE StudentUserID=1)>0)
UPDATE StudentAttendance
SET
CID = CAST('[querystring:CID]' AS int),
CalendarEventID = CAST('[querystring:CEID]' AS int),
StudentUserID = CAST('[StudentUserID]' AS int),
Attendance = '[Attendance]'
ELSE
INSERT INTO StudentAttendance
(CID,CalendarEventID,StudentUserID,Attendance)
VALUES
(CAST('[querystring:CID]' AS int), CAST('[querystring:CEID]' AS int), CAST('[StudentsUserID]' AS int),'[Attendance]')
It looks like your IF/ELSE would work fine (it looks like you're doing this for one record in a stored procedure or something?). If it's currently returning 'No' and you don't think it should be, I'd perhaps do a more basic check on your table, e.g.:
SELECT *
FROM StudentAttendance
WHERE StudentUserID = 1
You can also use a MERGE statement for this, and you can use multiple source tables by joining them within the USING part. Here is a basic example of that:
DECLARE #A table (Aid int, value int)
DECLARE #B table (Aid int, Cid int)
DECLARE #C table (Cid int, value int)
INSERT INTO #A VALUES (1, 1)
INSERT INTO #B VALUES (1, 2)
INSERT INTO #B VALUES (2, 3)
INSERT INTO #C VALUES (2, 4)
INSERT INTO #C VALUES (3, 6)
;
SELECT *
FROM #A
;
MERGE INTO #A tgt
USING (SELECT B.Aid, B.Cid, C.value FROM #B B JOIN #C C ON B.Cid = C.Cid) src
ON tgt.Aid = src.Aid
WHEN MATCHED THEN UPDATE
SET tgt.value = src.value
WHEN NOT MATCHED THEN
INSERT
(
Aid
, value
)
VALUES
(
src.Aid
, src.value
)
;
SELECT *
FROM #A
;

T-SQL: Evaluate ISNUMERIC before join

I am working with a SQL query whereby I need to filter based on an ISNUMERIC return value. The ISNUMERIC is important because on my join, I implicitly convert the value to an "int", so VARCHARs are a no-no.
I've looked at the order that queries are supposed to be processed in, and the FROM is processed and then the ON which is before the WHERE. Is there a way I can suggest that the ISNUMERIC be evaluated first WITHOUT using a Subquery? I'm not against Sub-Queries, I'm just wondering.
SELECT l.* FROM [dbo].[CRM_SD_Working_1] l
LEFT JOIN [dbo].[CRM_SD_Working_1] r ON l.[PlzVon] = r.[PlzBis] + 1
WHERE
ISNUMERIC(l.[PlzVon]) = 1
AND ISNUMERIC(l.[PlzBis]) = 1
AND l.PlzVon <> l.PlzBis
AND r.ID IS NULL
Can you not just put the isnumeric into your on criteria?
This works for me:
declare #a table(a nvarchar(10))
declare #b table(b nvarchar(10))
insert into #a values('1'),('2'),('3'),('4'),('a'),('5')
insert into #b values('1'),('2'),('3'),('5'),('6'),('b')
select *
from #a a
left join #b b
on(case when isnumeric(a.a) = 1
then a.a
else null
end
=
case when isnumeric(b.b) = 1
then b.b + 1
else null
end
)
If you are on version 2012 or higher, you can also use try_convert:
declare #a table(a nvarchar(10))
declare #b table(b nvarchar(10))
insert into #a values('1'),('2'),('3'),('4'),('a'),('5')
insert into #b values('1'),('2'),('3'),('5'),('6'),('b')
select *
from #a a
left join #b b
on(try_convert(int, a.a) = try_convert(int, b.b)+1
)
I don't think this is guaranteed to work but works for this data
The answer from iamdave looks good to me
declare #a table(a varchar(10))
declare #b table(b varchar(10))
insert into #a values('1'),('2'),('3'),('4'),('5'),('6'),('c')
insert into #b values('1'),('2'),('3'),('5'),('6'),('a')
select *
from
( select a from #a where isnumeric(a) = 1 ) aa
left join
( select b from #b where isnumeric(b) = 1 ) bb
on aa.a = bb.b + 1

SQL Server select (top) two rows into two temp variables

I have a query which results in two or more rows (just one column) and I want to catch the first row value into first temp variable and second row value into second temp variable without using multiple times the select top 1 and select top 1 order by desc
Something like this;
Select row1 value into #tempvariable1, row2 value into #tempvariable2 from blah blah
You need somehow to identify the row (I am using a row ID in the example below, ordering by value - you can order by id or something else):
DECLARE #DataSource TABLE
(
[value] VARCHAR(12)
);
INSERT INTO #DataSource
VALUES ('value 1')
,('value 2')
,('value 3');
DECLARE #tempVariable1 VARCHAR(12)
,#tempVariable2 VARCHAR(12);
WITH DataSource ([value], [rowID]) AS
(
SELECT [value]
,ROW_NUMBER() OVER (ORDER BY [value])
FROM #DataSource
)
SELECT #tempVariable1 = IIF([rowID] = 1, [value], #tempVariable1)
,#tempVariable2 = IIF([rowID] = 2, [value], #tempVariable2)
FROM DataSource;
SELECT #tempVariable1
,#tempVariable2;
You can use a CTE where you will get the X values you need and then select from it:
declare #data table(id int);
insert into #data(id) values(8), (6), (4), (3);
with vals(id, n) as (
Select top(2) id, ROW_NUMBER() over(order by id)
From #data
)
Select #A = (Select id From vals Where n = 1)
, #B = (Select id From vals Where n = 2)
You could also use PIVOT:
Select #A = [1], #B = [2]
From (
Select id, ROW_NUMBER() over(order by id)
From #data
) v(id, n)
PIVOT (
max(id) FOR n in ([1], [2])
) as piv
You have two options
Let's say we test case is build as below
create table dbo.Test
(
value varchar(100) not null
)
GO
insert into dbo.Test
values
('A'),('B'),('NO THIS ONE'),('NO THIS ONE'),('NO THIS ONE')
GO
Now let's say you fetch your data as below
select t.value
from dbo.Test t
where t.value != 'NO THIS ONE'
GO
The first and easier option is to save the data in a temp table
declare #results as Table (value varchar(100))
insert into #results
select t.value
from dbo.Test t
where t.value != 'NO THIS ONE'
you still use TOP 1 BUT not in the entire data, only in the results.
Use TOP 1 to find the first result and a second TOP 1 where value is different from the first.
declare #A varchar(100), #B varchar(100)
set #A = (select top 1 r.value from #results r)
set #B = (select top 1 r.value from #results r where r.value != #A)
select #A, #B
GO
This approach have the advantage of performance.
Of course that don't work great if both values are equal. You can fix it by using a top 1 and ordering in the inverse order.
There's a better alternative using rownumber.
It works because if you set a variable when returning multiple rows the varible sticks with the last one (in fact it's reseted for each row iteration).
The case statement makes sure the variable #A is seted only on the first row iteration.
declare #A varchar(100), #B varchar(100)
/* This way #B receives the last value and #A the first */
select #B = t.value,
#A = (case when ROW_NUMBER() OVER(order by t.Value) = 1
then t.Value else #A
end)
from dbo.Test t
where t.value != 'NO THIS ONE'
select #A, #B

How to concatenate using in sql server

I have a table where the data are like
Data
a
b
c
I need to write a SQL query to bring the following output
Data
abc
How to do the same by using in SQL Server 2000
Thanks
I don't know how/if it can be done with XML RAW. This approach works in SQL2000 though.
DECLARE #Data varchar(8000)
set #Data =''
select #Data = #Data + Data
FROM #t
ORDER BY Data
SELECT #Data
Edit Oh I've just seen your other question where Cade gave you a link. Doesn't KM's answer on that link work for you?
KM's test query
--combine parent and child, children are CSV onto parent row
CREATE TABLE #TableA (RowID int, Value1 varchar(5), Value2 varchar(5))
INSERT INTO #TableA VALUES (1,'aaaaa','A')
INSERT INTO #TableA VALUES (2,'bbbbb','B')
INSERT INTO #TableA VALUES (3,'ccccc','C')
CREATE TABLE #TableB (RowID int, TypeOf varchar(10))
INSERT INTO #TableB VALUES (1,'wood')
INSERT INTO #TableB VALUES (2,'wood')
INSERT INTO #TableB VALUES (2,'steel')
INSERT INTO #TableB VALUES (2,'rock')
INSERT INTO #TableB VALUES (3,'plastic')
INSERT INTO #TableB VALUES (3,'paper')
SELECT
a.*,dt.CombinedValue
FROM #TableA a
LEFT OUTER JOIN (SELECT
c1.RowID
,STUFF(REPLACE(REPLACE(
(SELECT
', ' + TypeOf as value
FROM (SELECT
a.RowID,a.Value1,a.Value2,b.TypeOf
FROM #TableA a
LEFT OUTER JOIN #TableB b ON a.RowID=b.RowID
) c2
WHERE c2.rowid=c1.rowid
ORDER BY c1.RowID, TypeOf
FOR XML RAW
)
,'<row value="',''),'"/>','')
, 1, 2, '') AS CombinedValue
FROM (SELECT
a.RowID,a.Value1,a.Value2,b.TypeOf
FROM #TableA a
LEFT OUTER JOIN #TableB b ON a.RowID=b.RowID
) c1
GROUP BY RowID
) dt ON a.RowID=dt.RowID

T-SQL: How do I get the rows from one table whose values completely match up with values in another table?

Given the following:
declare #a table
(
pkid int,
value int
)
declare #b table
(
otherID int,
value int
)
insert into #a values (1, 1000)
insert into #a values (1, 1001)
insert into #a values (2, 1000)
insert into #a values (2, 1001)
insert into #a values (2, 1002)
insert into #b values (-1, 1000)
insert into #b values (-1, 1001)
insert into #b values (-1, 1002)
How do I query for all the values in #a that completely match up with #b?
{#a.pkid = 1, #b.otherID = -1} would not be returned (only 2 of 3 values match)
{#a.pkid = 2, #b.otherID = -1} would be returned (3 of 3 values match)
Refactoring tables can be an option.
EDIT: I've had success with the answers from James and Tom H.
When I add another case in #b, they fall a little short.
insert into #b values (-2, 1000)
Assuming this should return two additional rows ({#a.pkid = 1, #b.otherID = -2} and {#a.pkid = 2, #b.otherID = -2}, it doesn't work. However, for my project this is not an issue.
This is more efficient (it uses TOP 1 instead of COUNT), and works with (-2, 1000):
SELECT *
FROM (
SELECT ab.pkid, ab.otherID,
(
SELECT TOP 1 COALESCE(ai.value, bi.value)
FROM (
SELECT *
FROM #a aii
WHERE aii.pkid = ab.pkid
) ai
FULL OUTER JOIN
(
SELECT *
FROM #b bii
WHERE bii.otherID = ab.otherID
) bi
ON ai.value = bi.value
WHERE ai.pkid IS NULL OR bi.otherID IS NULL
) unmatch
FROM
(
SELECT DISTINCT pkid, otherid
FROM #a a , #b b
) ab
) q
WHERE unmatch IS NOT NULL
Probably not the cheapest way to do it:
SELECT a.pkId,b.otherId FROM
(SELECT a.pkId,CHECKSUM_AGG(DISTINCT a.value) as 'ValueHash' FROM #a a GROUP BY a.pkId) a
INNER JOIN (SELECT b.otherId,CHECKSUM_AGG(DISTINCT b.value) as 'ValueHash' FROM #b b GROUP BY b.otherId) b
ON a.ValueHash = b.ValueHash
You can see, basically I'm creating a new result set for each representing one value for each Id's set of values in each table and joining only where they match.
The following query gives you the requested results:
select A.pkid, B.otherId
from #a A, #b B
where A.value = B.value
group by A.pkid, B.otherId
having count(B.value) = (
select count(*) from #b BB where B.otherId = BB.otherId)
Works for your example, and I think it will work for all cases, but I haven't tested it thoroughly:
SELECT
SQ1.pkid
FROM
(
SELECT
a.pkid, COUNT(*) AS cnt
FROM
#a AS a
GROUP BY
a.pkid
) SQ1
INNER JOIN
(
SELECT
a1.pkid, b1.otherID, COUNT(*) AS cnt
FROM
#a AS a1
INNER JOIN #b AS b1 ON b1.value = a1.value
GROUP BY
a1.pkid, b1.otherID
) SQ2 ON
SQ2.pkid = SQ1.pkid AND
SQ2.cnt = SQ1.cnt
INNER JOIN
(
SELECT
b2.otherID, COUNT(*) AS cnt
FROM
#b AS b2
GROUP BY
b2.otherID
) SQ3 ON
SQ3.otherID = SQ2.otherID AND
SQ3.cnt = SQ1.cnt
-- Note, only works as long as no duplicate values are allowed in either table
DECLARE #validcomparisons TABLE (
pkid INT,
otherid INT,
num INT
)
INSERT INTO #validcomparisons (pkid, otherid, num)
SELECT a.pkid, b.otherid, A.cnt
FROM (select pkid, count(*) as cnt FROM #a group by pkid) a
INNER JOIN (select otherid, count(*) as cnt from #b group by otherid) b
ON b.cnt = a.cnt
DECLARE #comparison TABLE (
pkid INT,
otherid INT,
same INT)
insert into #comparison(pkid, otherid, same)
SELECT a.pkid, b.otherid, count(*)
FROM #a a
INNER JOIN #b b
ON a.value = b.value
GROUP BY a.pkid, b.otherid
SELECT COMP.PKID, COMP.OTHERID
FROM #comparison comp
INNER JOIN #validcomparisons val
ON comp.pkid = val.pkid
AND comp.otherid = val.otherid
AND comp.same = val.num
I've added a few extra test cases. You can change your duplicate handling by changing the way you use distinct keywords in your aggregates. Basically, I'm getting a count of matches and comparing it to a count of required matches in each #a and #b.
declare #a table
(
pkid int,
value int
)
declare #b table
(
otherID int,
value int
)
insert into #a values (1, 1000)
insert into #a values (1, 1001)
insert into #a values (2, 1000)
insert into #a values (2, 1001)
insert into #a values (2, 1002)
insert into #a values (3, 1000)
insert into #a values (3, 1001)
insert into #a values (3, 1001)
insert into #a values (4, 1000)
insert into #a values (4, 1000)
insert into #a values (4, 1001)
insert into #b values (-1, 1000)
insert into #b values (-1, 1001)
insert into #b values (-1, 1002)
insert into #b values (-2, 1001)
insert into #b values (-2, 1002)
insert into #b values (-3, 1000)
insert into #b values (-3, 1001)
insert into #b values (-3, 1001)
SELECT Matches.pkid, Matches.otherId
FROM
(
SELECT a.pkid, b.otherId, n = COUNT(*)
FROM #a a
INNER JOIN #b b
ON a.Value = b.Value
GROUP BY a.pkid, b.otherId
) AS Matches
INNER JOIN
(
SELECT
pkid,
n = COUNT(DISTINCT value)
FROM #a
GROUP BY pkid
) AS ACount
ON Matches.pkid = ACount.pkid
INNER JOIN
(
SELECT
otherId,
n = COUNT(DISTINCT value)
FROM #b
GROUP BY otherId
) AS BCount
ON Matches.otherId = BCount.otherId
WHERE Matches.n = ACount.n AND Matches.n = BCount.n
How do I query for all the values in #a that completely match up with #b?
I'm afraid this definition is not quite perfectly clear. It seems from your additional example that you want all pairs of a.pkid, b.otherID for which every b.value for the given b.otherID is also an a.value for the given a.pkid.
In other words, you want the pkids in #a that have at least all the values for otherIDs in b. Extra values in #a appear to be okay. Again, this is reasoning based on your additional example, and the assumption that (1, -2) and (2, -2) would be valid results. In both of those cases, the a.value values for the given pkid are more than the b.value values for the given otherID.
So, with that in mind:
select
matches.pkid
,matches.otherID
from
(
select
a.pkid
,b.otherID
,count(1) as cnt
from #a a
inner join #b b
on b.value = a.value
group by
a.pkid
,b.otherID
) as matches
inner join
(
select
otherID
,count(1) as cnt
from #b
group by otherID
) as b_counts
on b_counts.otherID = matches.otherID
where matches.cnt = b_counts.cnt
To iterate the point further:
select a.*
from #a a
inner join #b b on a.value = b.value
This will return all the values in #a that match #b
If you are trying to return only complete sets of records, you could try this. I would definitely recommend using meaningful aliases, though ...
Cervo is right, we need an additional check to ensure that a is an exact match of b and not a superset of b. This is more of an unwieldy solution at this point, so this would only be reasonable in contexts where analytical functions in the other solutions do not work.
select
a.pkid,
a.value
from
#a a
where
a.pkid in
(
select
pkid
from
(
select
c.pkid,
c.otherid,
count(*) matching_count
from
(
select
a.pkid,
a.value,
b.otherid
from
#a a inner join #b b
on a.value = b.value
) c
group by
c.pkid,
c.otherid
) d
inner join
(
select
b.otherid,
count(*) b_record_count
from
#b b
group by
b.otherid
) e
on d.otherid = e.otherid
and d.matching_count = e.b_record_count
inner join
(
select
a.pkid match_pkid,
count(*) a_record_count
from
#a a
group by
a.pkid
) f
on d.pkid = f.match_pkid
and d.matching_count = f.a_record_count
)
1) i assume that you don't have duplicate id
2) get the key with the same number of value
3) the row with the number of key value equal to the number of equal value is the target
I hope it's what you searched for (you don't search performance don't you ?)
declare #a table( pkid int, value int)
declare #b table( otherID int, value int)
insert into #a values (1, 1000)
insert into #a values (1, 1001)
insert into #a values (2, 1000)
insert into #a values (2, 1001)
insert into #a values (2, 1002)
insert into #a values (3, 1000)
insert into #a values (3, 1001)
insert into #a values (4, 1000)
insert into #a values (4, 1001)
insert into #b values (-1, 1000)
insert into #b values (-1, 1001)
insert into #b values (-1, 1002)
insert into #b values (-2, 1001)
insert into #b values (-2, 1002)
insert into #b values (-3, 1000)
insert into #b values (-3, 1001)
select cntok.cntid1 as cntid1, cntok.cntid2 as cntid2
from
(select cnt.cnt, cnt.cntid1, cnt.cntid2 from
(select acnt.cnt as cnt, acnt.cntid as cntid1, bcnt.cntid as cntid2 from
(select count(pkid) as cnt, pkid as cntid from #a group by pkid)
as acnt
full join
(select count(otherID) as cnt, otherID as cntid from #b group by otherID)
as bcnt
on acnt.cnt = bcnt.cnt)
as cnt
where cntid1 is not null and cntid2 is not null)
as cntok
inner join
(select count(1) as cnt, cnta.cntid1 as cntid1, cnta.cntid2 as cntid2
from
(select cnt, cntid1, cntid2, a.value as value1
from
(select cnt.cnt, cnt.cntid1, cnt.cntid2 from
(select acnt.cnt as cnt, acnt.cntid as cntid1, bcnt.cntid as cntid2 from
(select count(pkid) as cnt, pkid as cntid from #a group by pkid)
as acnt
full join
(select count(otherID) as cnt, otherID as cntid from #b group by otherID)
as bcnt
on acnt.cnt = bcnt.cnt)
as cnt
where cntid1 is not null and cntid2 is not null)
as cntok
inner join #a as a on a.pkid = cntok.cntid1)
as cnta
inner join
(select cnt, cntid1, cntid2, b.value as value2
from
(select cnt.cnt, cnt.cntid1, cnt.cntid2 from
(select acnt.cnt as cnt, acnt.cntid as cntid1, bcnt.cntid as cntid2 from
(select count(pkid) as cnt, pkid as cntid from #a group by pkid)
as acnt
full join
(select count(otherID) as cnt, otherID as cntid from #b group by otherID)
as bcnt
on acnt.cnt = bcnt.cnt)
as cnt
where cntid1 is not null and cntid2 is not null)
as cntok
inner join #b as b on b.otherid = cntok.cntid2)
as cntb
on cnta.cntid1 = cntb.cntid1 and cnta.cntid2 = cntb.cntid2 and cnta.value1 = cntb.value2
group by cnta.cntid1, cnta.cntid2)
as cntequals
on cntok.cnt = cntequals.cnt and cntok.cntid1 = cntequals.cntid1 and cntok.cntid2 = cntequals.cntid2
Several ways of doing this, but a simple one is to create a union view as
create view qryMyUinion as
select * from table1
union all
select * from table2
be careful to use union all, not a simple union as that will omit the duplicates
then do this
select count( * ), [field list here]
from qryMyUnion
group by [field list here]
having count( * ) > 1
the Union and Having statements tend to be the most overlooked part of standard SQL, but they can solve a lot of tricky issues that otherwise require procedural code
As CQ says, a simple inner join is all you need.
Select * -- all columns but only from #a
from #a
inner join #b
on #a.value = #b.value -- only return matching rows
where #a.pkid = 2

Resources