Select row with Distinct max(column value) - sql-server

[ExactReplica].[FilteredOpportunityProduct] Table
Opportunityid baseamount
1 500
1 500
2 600
2 700
[ExactReplica].FilteredOpportunity Table
Opportunityid name
1 ABC
2 CDF
I want to take the maximum baseamount; however, am facing issue when there exists duplicate of the baseamount, how can I take only one record
My Query
select
MaxAmount.[baseamount] ,
c.name
FROM [ExactReplica].FilteredOpportunity c
Left JOIN
(
SELECT opportunityid,
MAX((baseamount)) baseamount
FROM [ExactReplica].[FilteredOpportunityProduct]
GROUP BY opportunityid
) MaxAmount ON c.opportunityid = MaxAmount.opportunityid
inner JOIN
[ExactReplica].[FilteredOpportunityProduct] p ON MaxAmount.opportunityid = p.opportunityid
AND MaxAmount.baseamount = p.baseamount

Try this:
select max(baseamount) baseamount,a.name
from
(select
baseamount, ROW_NUMBER() over (partition by p.opportunityid,baseamount order by p.baseamount desc) rn,
c.name
FROM FilteredOpportunity c
inner JOIN
[FilteredOpportunityProduct] p ON c.opportunityid = p.opportunityid) a
where rn=1
group by a.name
OUTPUT:
baseamount name
500 ABC
700 CDF

Can you try below query for expected result, I have executed the below scripts :
For Table Creation :
CREATE TABLE FILTEREDOPPORTUNITYPRODUCT (
OPPORTUNITYID INT NULL,
BASEAMOUNT VARCHAR(24) NULL
)
CREATE TABLE FILTEREDOPPORTUNITY (
OPPORTUNITYID INT NULL,
NAME VARCHAR(24) NULL
)
Insertion:
INSERT INTO FILTEREDOPPORTUNITYPRODUCT (OPPORTUNITYID,BASEAMOUNT) VALUES
(1,500),(1,500),(2,600),(2,700)
INSERT INTO FILTEREDOPPORTUNITY (OPPORTUNITYID,NAME) VALUES
(1,'ABC'),(2,'CDF')
Selection:
SELECT
A.OPPORTUNITYID,B.NAME,MAX(BASEAMOUNT) AS BASEAMOUNT
FROM FILTEREDOPPORTUNITYPRODUCT AS A
JOIN FILTEREDOPPORTUNITY AS B
ON A.OPPORTUNITYID = B.OPPORTUNITYID
GROUP BY A.OPPORTUNITYID,B.NAME

Related

Return only rows from the joined table with the latest date

By running the following query I realized that I have duplicates on the column QueryExecutionId.
SELECT DISTINCT qe.QueryExecutionid AS QueryExecutionId,
wfi.workflowdefinitionid AS FlowId,
qe.publishing_date AS [Date],
c.typename AS [Type],
c.name As Name
INTO #Send
FROM
[QueryExecutions] qe
JOIN [Campaign] c ON qe.target_campaign_id = c.campaignid
LEFT JOIN [WorkflowInstanceCampaignActivities] wfica ON wfica.queryexecutionresultid = qe.executionresultid
LEFT JOIN [WorkflowInstances] wfi ON wfica.workflowinstanceid = wfi.workflowinstanceid
WHERE qe.[customer_idhash] IS NOT NULL;
E.g. When I test with one of these QueryExecutionIds, I can two results
select * from ##Send
where QueryExecutionId = 169237
We realized the reason is that these two rows have a different FlowId (second returned value in the first query). After discussing this issue, we decided to take the record with a FlowId that has the latest date. This date is a column called lastexecutiontime that sits in the third joined table [WorkflowInstances] which is also the table where FlowId comes from.
How do I only get unique values of QueryExecutionId with the latest value of WorkflowInstances.lastexecution time and remove the duplicates?
You can use a derived table with first_value partitioned by workflowinstanceid ordered by lastexecutiontime desc:
SELECT DISTINCT qe.QueryExecutionid AS QueryExecutionId,
wfi.FlowId,
qe.publishing_date AS [Date],
c.typename AS [Type],
c.name As Name
INTO #Send
FROM
[QueryExecutions] qe
JOIN [Campaign] c ON qe.target_campaign_id = c.campaignid
LEFT JOIN [WorkflowInstanceCampaignActivities] wfica ON wfica.queryexecutionresultid = qe.executionresultid
LEFT JOIN
(
SELECT DISTINCT workflowinstanceid, FIRST_VALUE(workflowdefinitionid) OVER(PARTITION BY workflowinstanceid ORDER BY lastexecutiontime DESC) As FlowId
FROM [WorkflowInstances]
) wfi ON wfica.workflowinstanceid = wfi.workflowinstanceid
WHERE qe.[customer_idhash] IS NOT NULL;
Please note that your distinct query is pertaining to the selected variables,
eg. Data 1 (QueryExecutionId = 169237 and typename = test 1)
    Data 2 (QueryExecutionId = 169237 and typename = test 2)
The above 2 data are considered as distinct
Try partition by and selection the [seq] = 1 (the below code are partition by their date)
SELECT *
into #Send
FROM
(
SELECT *,ROW_NUMBER() OVER (PARTITION BY [QueryExecutionid] ORDER BY [Date] DESC) [Seq]
FROM
(
SELECT qe.QueryExecutionid AS QueryExecutionId,
wfi.FlowId,
qe.publishing_date AS [Date], --should not have any null values
qe.[customer_idhash]
c.typename AS [Type],
c.name As Name
FROM [QueryExecutions] qe
JOIN [Campaign] c
ON qe.target_campaign_id = c.campaignid
LEFT JOIN [WorkflowInstanceCampaignActivities] wfica
ON wfica.queryexecutionresultid = qe.executionresultid
LEFT JOIN
(
SELECT DISTINCT workflowinstanceid, FIRST_VALUE(workflowdefinitionid) OVER(PARTITION BY workflowinstanceid ORDER BY lastexecutiontime DESC) As FlowId
FROM [WorkflowInstances]
) wfi ON wfica.workflowinstanceid = wfi.workflowinstanceid
) a
WHERE [customer_idhash] IS NOT NULL
) b
WHERE [Seq] = 1
ORDER BY [QueryExecutionid]

MS SQL Server - Join two tables where ID is equivalent to new ID

I am trying to join two tables but did not able to success
Test Supplier Table
SID NAME
1 Test
2 Test2
Test Stock Table
ID NewID SupID Qty
1 101 1 2
2 102 1 5
3 103 2 6
101 1 4
101 1 7
101 2 5
103 2 10
The output I am looking for
ID NAME Qty
2 Test 5
101 Test 13
101 Test2 5
103 Test2 16
My code is -
Select S.NAME, ST.ID, SUM(ST.Qty)
From Stock ST
Inner Join ST.SupID = S.SID
I need to combine those ID's which are matching with the new ID's with another ID's. If you see the results, I need to combine ID 1 qty with ID 101 because ID 1 has new ID 101 and no need to display ID 1. I have tried inner join but did not work.
First, you find those with NEW ID and those without NEW ID. For those with NewID, use NEWID, for those without use ID (old ID). then you use UNION ALL to combine both result and join to the Supplier table to obtain the NAME.
; with
cte as
(
-- with NewID
select ID = NewID, SupID, Qty = sum(Qty)
from Stock ST
where exists
(
select *
from Stock x
where x.ID = ST.NewID
)
group by NewID, SupID
union all
-- without NewID
select ID, SupID, Qty = sum(Qty)
from Stock ST
where not exists
(
select *
from Stock x
where x.ID = ST.NewID
)
group by ID, SupID
)
select c.ID, SP.NAME, Qty = sum(Qty)
from cte c
inner join Supplier SP on c.SupID = SP.SID
group by c.ID, SP.NAME
Start with the Stock table and join to the Supplier table (remembering to name the table in the join) and then self-left join to the IDs in the Stock table to determine if they exist or not. Then group by on whichever ID you want to keep.
SELECT
COALESCE(ST2.ID, ST.ID) ID
, S.NAME NAME
, SUM(ST.Qty) Qty
FROM Stock ST
INNER JOIN Supplier S
ON ST.SupID = S.SID
LEFT JOIN (
SELECT DISTINCT ID
FROM Stock
) ST2
ON ST.NewID = ST2.ID
GROUP BY
COALESCE(ST2.ID, ST.ID)
, S.NAME

How to test against a list of items in an if statement

I have a large table (130 columns). It is a monthly dataset that is separated by month (jan,feb,mar,...). every month I get a small set of duplicate rows. I would like to remove one of the rows, it does not matter which row to be deleted.
This query seems to work ok when I only select the ID that I want to filter the dups on, but when I select everything "*" from the table I end up with all of the rows, dups included. My goal is to filter out the dups and insert the result set into a new table.
SELECT DISTINCT a.[ID]
FROM MonthlyLoan a
JOIN (SELECT COUNT(*) as Count, b.[ID]
FROM MonthlyLoan b
GROUP BY b.[ID])
AS b ON a.[ID] = b.[ID]
WHERE b.Count > 1
and effectiveDate = '01/31/2017'
Any help will be appreciated.
This will show you all duplicates per ID:
;WITH Duplicates AS
(
SELECT ID
rn = ROW_NUMBER() OVER (PARTITION BY ID ORDER BY ID)
FROM MonthlyLoan
)
SELECT ID,
rn
FROM Duplicates
WHERE rn > 1
Alternatively, you can set rn = 2 to find the immediate duplicate per ID.
Since your ID is dupped (A DUPPED ID!!!!)
all you need it to use the HAVING clause in your aggregate.
See the below example.
declare #tableA as table
(
ID int not null
)
insert into #tableA
values
(1),(2),(2),(3),(3),(3),(4),(5)
select ID, COUNT(*) as [Count]
from #tableA
group by ID
having COUNT(*) > 1
Result:
ID Count
----------- -----------
2 2
3 3
To insert the result into a #Temporary Table:
select ID, COUNT(*) as [Count]
into #temp
from #tableA
group by ID
having COUNT(*) > 1
select * from #temp

Left join with Sum Clause with more than 1 table gives incorrect Sum

I am trying to get the Sum of rows while applying a left join with more than 1 table. It seems it is creating a matrix of result which results in wrong sum function.
Example:
First Table: Customer
Second Table: TotalAssets
Third Table: TotalLiability
Table Structure:
Customer
CustID(int) CustomerName(varchar)
1 Abc
2 Def
3 Ghi
TotalAssets
CustID Amount
1 2000
1 1000
2 600
TotalLiability
CustID Amount
1 1000
1 1000
2 800
Output Expected
CustID TotalAssets TotalLiability
1 3000 2000
2 600 800
Current Query
Select c.CustID , Sum(a.Amount) , Sum(l.Amount) From Customer c
left join TotalAssests a on a.CustID = c.CustID
left join TotalLiability l on l.CustID = c.CustID
Group by c.CustID
The problem with this current query is the sum is not correct as i think the first left join create a first set with multiple records and then second one is applied.
Any help is appreciated
UPDATE:
I find some luck by following method but it seems a bad/hacky option as in my case i have over 7-8 elements in group by and adding more left clauses results in query difficult to manage.
New Query which is resulting correct result but looks very bad to maintains
Select Set1.CustID , Set1.TotalAssets, Sum(l.Amount) from (Select c.CustID , Sum(a.Amount) as TotalAssets From Customer c
left join TotalAssests a on a.CustID = c.CustID
Group by c.CustID)Set1
left join TotalLiability l on l.CustID = Set1.CustID.
Group by Set1.CustID , Set1.TotalAssets
I think this gets you what you want with minimum complexity:
select c.CustId, isnull(a.Amount, 0) as TotalAssets, isnull(l.Amount, 0) as TotalLiability
from Customers c
left join (
select CustId, sum(Amount) as Amount from TotalAssets group by CustId
) a on a.CustId = c.CustId
left join (
select CustId, sum(Amount) as Amount from TotalLiability group by CustId
) l on l.CustId = c.CustId
You need to group/sum the two tables separately, since the data in them is independent. Left-joining both to the customers table ensures that customers with no entries in either/both tables are still reported.
This should work:
Select c.CustID
, (select sum(a.amount) from TotalAssests a where a.CustId = c.CustID) as SumAsset
, (select Sum(l.Amount) TotalLiability l where l.CustID = c.CustID) as SumLiability
From Customer c
Hope the below works with less maintenance,
DECLARE #Customer TABLE (CustID int, CustomerName varchar(50)) DECLARE #TotalAssets TABLE (CustID int, Amount INT) DECLARE #TotalLiability TABLE (CustID int, Amount INT)
INSERT INTO #Customer
SELECT 1,
'ABC'
UNION
SELECT 2,
'DEF'
UNION
SELECT 3,
'GHI'
--Select * From #Customer
INSERT INTO #TotalAssets
SELECT 1,
2000
UNION
SELECT 1,
1000
UNION
SELECT 2,
600
--Select * From #TotalAssets
INSERT INTO #TotalLiability
SELECT 1,
1000
UNION
SELECT 1,
1000
UNION
SELECT 2,
800
--Select * From #TotalLiability
SELECT *
FROM #Customer
SELECT C.CustID,
C.CustomerName,
Sum(A.Amount) TotalAssets,
Sum(L.Amount) TotalLiability
FROM #Customer C
JOIN #TotalAssets A ON C.CustID = A.CustID
JOIN #TotalLiability L ON C.CustId = L.CustID
GROUP BY C.CustID,
C.CustomerName

SQL Server 2014 Consolidate Tables avoiding duplicates

I have 36 Sales tables each referred to one store:
st1.dbo.Sales
st2.dbo.Sales
...
st35.dbo.Sales
st36.dbo.Sales
Each record has the following key columns:
UserName, PostalCode, Location, Country, InvoiceAmount, ItemsCount, StoreID
Here is SQLFiddle
I need to copy into Customers table all Username (and their details) that are not already present into Customers
in case of duplicated it is required to use the fields of record where InvoiceAmount is MAX
I tried to build a query but looks too complicated and it is also wrong because in CROSS APPLY should consider the full list of Sales Tables
INSERT INTO Customers (.....)
SELECT distinct
d.UserName,
w.postalCode,
w.location,
W.country,
max(w.invoiceamount) invoiceamount,
max(w.itemscount) itemscount,
w.storeID
FROM
(SELECT * FROM st1.dbo.Sales
UNION
SELECT * FROM st2.dbo.Sales
UNION
...
SELECT * FROM st36.dbo.Sales) d
LEFT JOIN
G.dbo.Customers s ON d.Username = s.UserName
CROSS APPLY
(SELECT TOP (1) *
FROM s.dbo.[Sales]
WHERE d.Username=w.Username
ORDER BY InvoiceAmount DESC) w
WHERE
s.UserName IS NULL
AND d.username IS NOT NULL
GROUP BY
d.UserName, w.postalCode, w.location,
w.country, w.storeID
Can somebody please give some hints?
As a basic SQL query, I'd create a row_number in the inner subquery and then join to customers and then isolated the max invoice number for each customer not in the customer table.
INSERT INTO Customers (.....)
SELECT w.UserName,
w.postalCode,
w.location,
w.country,
w.invoiceamount,
w.itemscount,
w.storeID
FROM (select d.*,
row_number() over(partition by d.Username order by d.invoiceamount desc) rownumber
from (SELECT *
FROM st1.dbo.Sales
UNION
SELECT *
FROM st2.dbo.Sales
UNION
...
SELECT *
FROM st36.dbo.Sales
) d
LEFT JOIN G.dbo.Customers s
ON d.Username = s.UserName
WHERE s.UserName IS NULL
AND d.username IS NOT NULL
) w
where w.rownumber = 1
Using your fiddle this will select distinct usernames rows with max invoiceamount
with d as(
SELECT * FROM Sales
UNION
SELECT * FROM Sales2
)
select *
from ( select *,
rn = row_number() over(partition by Username order by invoiceamount desc)
from d) dd
where rn=1;
step 1 - use cte .
select username , invoiceamount ,itemscount from Sales
UNION all
select user name , invoiceamount ,itemscount from Sales
.....
...
step 2
next cte use group by and get max invoiceamount ,itemscount for user of last result set.
,cte2 as (
select user name , max (invoiceamount) as invoiceamount ,max(itemscount) as itemscount from cte)
step3
use left join with user table and find missing record and itemscount invoiceamount

Resources