Inefficient SQL query with joined tables - sql-server

I have a table with 1700 rows of data. I'm querying it using the query below which pulls complimentary data from related tables too. It's currently running very slowly (around 10 seconds).
How can I improve the efficiency of this query?
SELECT [jobID] ,
(SELECT orgname
FROM pm_clients c
WHERE c.orgID IN
(SELECT orgid
FROM pm_jobs j
WHERE j.jobid=t.jobid
AND j.jobStatus>=13)) AS orgname ,
(SELECT sector
FROM pm_clients c
WHERE c.orgID IN
(SELECT orgid
FROM pm_jobs j
WHERE j.jobid=t.jobid
AND j.jobStatus>=13)) AS sector ,
(SELECT region= CASE country
WHEN 1 THEN region
ELSE
(SELECT countryname
FROM AT_A_CountryCodes x
WHERE x.id= l.country)
END
FROM PM_ClientDetails l
WHERE l.userid =
(SELECT userid
FROM pm_jobs j
WHERE j.jobid=t.jobid)) AS region ,
(SELECT postcode
FROM PM_ClientDetails l
WHERE l.userid =
(SELECT userid
FROM pm_jobs j
WHERE j.jobid=t.jobid)) AS postcode ,
(SELECT firstname
FROM users u
WHERE u.userid =
(SELECT pmid
FROM pm_jobs j
WHERE j.jobid=t.jobid)) AS PM ,
[creationDate] ,
(SELECT statusName
FROM pm_jobstatus j
WHERE j.[statusID]=t.jobStatus) AS JobStatus ,
[completionDate] ,
[deadline],
[jobTitle] ,
(SELECT currencysymbol
FROM at_a_currency c
WHERE c.currencyID =
(SELECT top(1) quoteCurrency
FROM PM_Quotes q
WHERE q.taskid IN
(SELECT taskid
FROM pm_tasks x
WHERE x.jobID=t.jobid))) AS currency ,
(SELECT sum(quoteSubTotal)
FROM PM_Quotes q
WHERE q.taskid IN
(SELECT taskid
FROM pm_tasks x
WHERE x.jobID=t.jobid)) AS subtotal ,
(SELECT sum(quoteVAT)
FROM PM_Quotes q
WHERE q.taskid IN
(SELECT taskid
FROM pm_tasks x
WHERE x.jobID=t.jobid)) AS VAT ,
(SELECT sum(quoteTotal)
FROM PM_Quotes q
WHERE q.taskid IN
(SELECT taskid
FROM pm_tasks x
WHERE x.jobID=t.jobid)) AS total ,
(SELECT [purchaseOrder]
FROM pm_jobs j
WHERE j.jobid=t.jobid) AS purchaseOrder ,
(SELECT [clientReference]
FROM pm_jobs j
WHERE j.jobid=t.jobid) AS clientReference ,
(SELECT CASE
WHEN [deadline]='1900-01-01 00:00:00' THEN 1
WHEN [completiondate]>dateadd(dd,1,[deadline]) THEN 0
WHEN [completiondate]<=dateadd(dd,1,[deadline])THEN 1
WHEN [completiondate] IS NULL THEN 0
END) AS completedOnTime
FROM [PM_jobs] t
WHERE jobStatus>=13
Edit
Thanks to #GuidoG for the response. Here's the amended query which is much faster now.
SELECT j.jobid,
c.orgname,
c.sector,
(SELECT region= CASE country
WHEN 1 THEN region
ELSE (SELECT countryname
FROM at_a_countrycodes x
WHERE x.id = l.country)
END) AS region,
l.postcode,
(SELECT firstname
FROM users u
WHERE u.userid = J.pmid) AS PM,
j.creationdate,
(SELECT statusname
FROM pm_jobstatus x
WHERE x.[statusid] = j.jobstatus) AS JobStatus,
j.[completiondate],
j.[deadline],
j.[jobtitle],
j.purchaseorder,
j.clientreference,
(SELECT currencysymbol
FROM at_a_currency c
WHERE c.currencyid = l.clientcurrency) AS currency,
Sum(q.quotesubtotal) AS subtotal,
Sum(q.quotevat) AS VAT,
Sum(q.quotetotal) AS total,
(SELECT CASE
WHEN j.[deadline] = '1900-01-01 00:00:00' THEN 1
WHEN j.[completiondate] > Dateadd(dd, 1, j.[deadline]) THEN 0
WHEN j.[completiondate] <= Dateadd(dd, 1, j.[deadline])THEN 1
WHEN j.[completiondate] IS NULL THEN 0
END) AS completedOnTime,
Count(t.taskid) AS taskcount
FROM [pm_jobs] j
INNER JOIN pm_clients c
ON j.orgid = c.orgid
INNER JOIN pm_clientdetails l
ON j.userid = l.userid
INNER JOIN pm_tasks t
ON j.jobid = t.jobid
INNER JOIN pm_quotes q
ON q.taskid = t.taskid
AND t.jobid = j.jobid
WHERE jobstatus >= 13
GROUP BY j.jobid,
c.orgname,
c.sector,
l.country,
l.region,
l.postcode,
l.firstname,
j.creationdate,
j.jobstatus,
j.completiondate,
j.deadline,
j.jobtitle,
j.purchaseorder,
j.clientreference,
l.clientcurrency,
J.pmid
ORDER BY completiondate DESC

You should consider joining instead of subquerying. Here is a small example to get you on your way:
SELECT t.jobID ,
c.orgName,
c.sector
FROM [SQL2012_921487_atlas].[dbo].[PM_jobs] t
inner join pm_clients c on t.orgID = c.orgID
WHERE jobStatus>=13
When subquerying like you did, you force SQL Server to read table pm_clients several times; joining enables it to read pm_clients only 1 time.

Related

SQL query filling up tempdb

I am running the below query which is failing when it fills up the tempdb (170GB). It fails after around 1 hour.
the script below :
--Select Query BOM collection retail report
Declare #Company Nvarchar(50) ='HMFI'
Declare #Product Nvarchar(50) =Null
select Upper (Be.DataAreaId)Company ,BE.BOM,BE.Product
,Max(ProItemName)ProItemName
,Max(ProUnitID)ProUnitID
,Be.Material,Max(MaterialItemName)MaterialItemName
,Be.UNITID MaterialUnitID
,Sum (Be.BOMQTY)MaterialQty
,Max (MaterialService) MaterialType
from ExpBom_HMFI BE
Outer Apply (SELECT UNITID ProUnitID FROM INVENTTABLEMODULE A With (Nolock) WHERE DATAAREAID = #Company AND A.ITEMID =BE.Product AND MODULETYPE = 0)ProUnitID
Outer Apply(SELECT ITEMNAME ProItemName FROM INVENTTABLE B With (Nolock) WHERE DATAAREAID = #Company AND B.ITEMID = BE.Product)ProItemName
Outer Apply(SELECT ITEMNAME MaterialItemName FROM INVENTTABLE C With (Nolock) WHERE DATAAREAID = #Company AND C.ITEMID = Be.Material)MaterialItemName
Outer Apply(SELECT Case When ITEMTYPE=0 Then 'Item' When ITEMTYPE=1 Then 'BOM' When ITEMTYPE=2 Then 'Service Item' End MaterialService
FROM INVENTTABLE With (Nolock) WHERE DATAAREAID = #Company AND ITEMID = Be.Material)MaterialService
Where BE.DataAreaId in (#Company) and (#Product Is null Or Be.Product In(Select StringValue From Split(#Product,',')))
Group by Be.DataAreaId,BE.BOM,BE.Product,Be.Material ,Be.UNITID
Order By Be.DataAreaId,BE.BOM,BE.Product,Be.Material
option (maxrecursion 0)
--now Viewing the data collected
with ExpBom (
DataAreaId,
Bom,
Product,
Material,
BomDepth,
BOMQTY,
Unitid,
BomPath
) as (
select
bv.DataAreaId,
bv.BomId,
bv.ItemId,
b.ItemId,
1,
Convert (NUMERIC(18,8), b.BOMQTY) BOMQTY,
Convert (Nvarchar(10),b.UNITID )Unitid,
convert(Nvarchar(max), bv.ItemId + '|' + b.ItemId) as BomPath
from BomVersion bv With (Nolock)
join InventTable ibv With (Nolock)
on ibv.DataAreaId = bv.DataAreaId
and ibv.ItemId = bv.ItemId
join Bom b With (Nolock)
on b.DataAreaId = bv.DataAreaId
and b.BomId = bv.BomId
join InventTable ib With (Nolock)
on ib.DataAreaId = b.DataAreaId
and ib.ItemId = b.ItemId
where bv.Approved = 1
and bv.Active = 1
and bv.FromDate < getdate()
and (bv.ToDate = '01-01-1900' or bv.ToDate >= getdate())
and b.FromDate < getdate()
and (b.ToDate = '01-01-1900' or b.ToDate >= getdate())
and b.DATAAREAID in ('HMFI')
union all
select
bv.DataAreaId,
bv.BomId,
bv.ItemId,
eb.Material,
eb.BomDepth + 1,
Convert (NUMERIC(18,8),B.BOMQTY * eb.BOMQTY)BOMQTY,
Convert (Nvarchar(10),eb.UNITID )Unitid,
convert(Nvarchar(max), bv.ItemId + '|' + eb.BomPath) as BomPath
from BomVersion bv With (Nolock)
join InventTable ibv With (Nolock)
on ibv.DataAreaId = bv.DataAreaId
and ibv.ItemId = bv.ItemId
join Bom b With (Nolock)
on b.DataAreaId = bv.DataAreaId
and b.BomId = bv.BomId
join ExpBom eb
on eb.DataAreaId = b.DataAreaId
and eb.Product = b.ItemId
where bv.Approved = 1
and bv.Active = 1
and bv.FromDate < getdate()
and (bv.ToDate = '01-01-1900' or bv.ToDate >= getdate())
and b.FromDate < getdate()
and (b.ToDate = '01-01-1900' or b.ToDate >= getdate())
and b.DATAAREAID in ('HMFI')
)
select * from ExpBOM
Where Material Not in (Select BOMV.ITEMID From BomVersion BOMV With (Nolock) Where BOMV.DataAreaId In( 'HMFI' ) and BOMV.Approved = 1
and BOMV.Active = 1
and BOMV.FromDate < getdate()
and (BOMV.ToDate = '01-01-1900' or BOMV.ToDate >= getdate()) )
I'm not sure if the JOINS are causing the issue
Estimated execution plan is below:
Data collection :
https://www.brentozar.com/pastetheplan/?id=S1UsXn4Po
Data view:
https://www.brentozar.com/pastetheplan/?id=BJDUBn4wi
Please advise
this report was working fine on daily basis without filling tempdb usualy it was taking 1 min to execute suddenly it stoped for unknown reason although there's no changes done on server/database levels

Nested SQL select statement in SQL Server with aggregate function

I have a query where first table Project will be fetched as a list. So every project row should have a unique ProjectID.
Now the second table Contract should get one row for each project. Although there are multiple rows for some projects in the Contract table. But I have to get only one row based on the select count condition I have applied in the below query.
Right now my code is returning two rows and I want only the second row not first row. Logic is basically only Approve 0 & 1 will be picked which cannot be more than one row together. But value 2 is multiple and destroying my result set. But for multiple projects of different ProjectId is should return multiple results.
SELECT
a.ProjectId
,a.Title
,a.CreationDate
,a.Status
,DATEDIFF(second, CAST(SWITCHOFFSET(SYSDATETIMEOFFSET(), '+05:30') AS DATETIME), BidEndDateTime) / 3600 As BidTimeLeft
,(SELECT COUNT(*) FROM Bidding b WHERE a.ProjectId = b.ProjectId) AS BidsCount
,(SELECT e.CompanyName FROM Bidding b inner join Partner e on b.PartnerId = e.PartnerId WHERE a.ProjectId = b.ProjectId and b.BidAccepted = 1) AS BidSelected
--,h.CompanyName
--Contact table column ApprovedbyCustomer will only have one 0 or 1 for same project, Partner can only create contract either for the first time or if it is rejected by Customer
,(SELECT COUNT(*) FROM Contract x WHERE a.ProjectId = x.ProjectId and x.ApprovedbyCustomer in (0,1) ) AS ContractCount
,g.ContractId
--,(SELECT c.ContractId FROM Contract c WHERE a.ProjectId = c.ProjectId and c.ApprovedbyCustomer in (1,2)) AS ContractId
,g.ProjectValue
, g.Duration
, (CASE g.DurationType WHEN 1 THEN 'Days' WHEN 2 THEN 'Weeks' ELSE 'Months' END) As DurationType
, g.StartDate
, g.EndDate
, g.ApprovedByCustomer
--0 - No Action, 1- Accepted, 2- Send Back
,(SELECT COUNT(*) FROM PaymentRequest e WHERE a.ProjectId = e.ProjectId) AS PaymentCount
FROM
Project a
LEFT JOIN
Contract g ON a.ProjectId = g.ProjectId
-- LEFT JOIN Partner h ON g.PartnerId = h.PartnerId
WHERE
a.CustomerId = 11111;
Try a subquery with ROW_NUMBER() OVER (PARTITION BY a.ProjectId ORDER BY g.ApprovedByCustomer) AS RowNum. In the outer query, add the criteria WHERE RowNum = 1:
SELECT *
FROM
(
SELECT
ROW_NUMBER() OVER (PARTITION BY a.ProjectId ORDER BY g.ApprovedByCustomer) AS RowNum
,a.ProjectId
,a.Title
,a.CreationDate
,a.Status
,DATEDIFF(second, CAST(SWITCHOFFSET(SYSDATETIMEOFFSET(), '+05:30') AS DATETIME), BidEndDateTime) / 3600 As BidTimeLeft
,(SELECT COUNT(*) FROM Bidding b WHERE a.ProjectId = b.ProjectId) AS BidsCount
,(SELECT e.CompanyName FROM Bidding b inner join Partner e on b.PartnerId = e.PartnerId WHERE a.ProjectId = b.ProjectId and b.BidAccepted = 1) AS BidSelected
--,h.CompanyName
--Contact table column ApprovedbyCustomer will only have one 0 or 1 for same project, Partner can only create contract either for the first time or if it is rejected by Customer
,(SELECT COUNT(*) FROM Contract x WHERE a.ProjectId = x.ProjectId and x.ApprovedbyCustomer in (0,1) ) AS ContractCount
,g.ContractId
--,(SELECT c.ContractId FROM Contract c WHERE a.ProjectId = c.ProjectId and c.ApprovedbyCustomer in (1,2)) AS ContractId
,g.ProjectValue
, g.Duration
, (CASE g.DurationType WHEN 1 THEN 'Days' WHEN 2 THEN 'Weeks' ELSE 'Months' END) As DurationType
, g.StartDate
, g.EndDate
, g.ApprovedByCustomer
--0 - No Action, 1- Accepted, 2- Send Back
,(SELECT COUNT(*) FROM PaymentRequest e WHERE a.ProjectId = e.ProjectId) AS
PaymentCount
FROM
Project a
LEFT JOIN
Contract g ON a.ProjectId = g.ProjectId
-- LEFT JOIN Partner h ON g.PartnerId = h.PartnerId
WHERE
a.CustomerId = 11111
) OrderedProjects
WHERE RowNum = 1;
You need to add the filter to the main query:
and g.ApprovedbyCustomer in (0,1)

FOLLOW UP to SQL query to retrieve the latest status of a process

The original question and schema are shown at the following link:
SQL query to retrieve the latest status of a process
The solution provided by #mendosi was perfect. However, now that the deadline for submission is past, management wants more information. I've been able to give them the information they want using the following query (incorporating the aforementioned solution into the "EXISTS" clause):
SELECT
proposalPackage.proposalPackageID, refProposalType.name, proposalPackage.title,
[user].lastName, [user].firstName, [user].email, [user].phone,
proposalReviewAction.approvalTypeID
FROM
proposalReviewAction, proposalPackage
INNER JOIN
refProposalType ON proposalPackage.proposalTypeID = refProposalType.proposalTypeID
INNER JOIN
proposalManagerAssignment ON proposalPackage.proposalPackageID = proposalManagerAssignment.proposalPackageID
INNER JOIN
[user] ON proposalManagerAssignment.userID = [user].userID
WHERE
EXISTS (SELECT ls.*
FROM
(SELECT
r.proposalPackageID, r.approvalTypeID,
RowNr = ROW_NUMBER() OVER (PARTITION BY r.proposalPackageID ORDER BY r.reviewedDate DESC)
FROM
proposalReviewAction AS r
JOIN
proposalPackage AS pp ON pp.proposalPackageID = r.proposalPackageID
WHERE
pp.proposalCallID = 7) AS ls
WHERE
ls.RowNr = 1
AND (ls.approvalTypeID = 50))
GROUP BY
proposalPackage.proposalTypeID, [user].lastName, [user].firstName,
[user].email, [user].phone, proposalPackage.title,
refProposalType.name, proposalManagerAssignment.isPrimary,
proposalPackage.proposalCallID, approvalTypeID,
proposalPackage.proposalPackageID, proposalReviewAction.approvalTypeID
HAVING
(proposalManagerAssignment.isPrimary = 1)
AND (proposalPackage.proposalCallID = 7)
AND (approvalTypeID = 50)
ORDER BY
proposalPackage.proposalPackageID
My problem seems to be that the subquery in the Exists clause returns 95 rows (as it should) limiting the results to those with a status of 50.
As I understand the EXISTS clause, the results should be limited to those records that "exist" in the subquery that follows... right? So, in this case, if a record does not exist in the subquery, it will not exist in the final result...??
The problem is, I'm getting 112 records when there are only 95 records to choose from (or join on) in the results list of the subquery.
So, I try to limit is by adding some additional qualifiers and joins to the subquery:
SELECT
proposalPackage.proposalPackageID, refProposalType.name,
proposalPackage.title,
[user].lastName, [user].firstName, [user].email, [user].phone,
proposalReviewAction.approvalTypeID
FROM
proposalReviewAction, proposalPackage
INNER JOIN
refProposalType ON proposalPackage.proposalTypeID = refProposalType.proposalTypeID
INNER JOIN
proposalManagerAssignment ON proposalPackage.proposalPackageID = proposalManagerAssignment.proposalPackageID
INNER JOIN
[user] ON proposalManagerAssignment.userID = [user].userID
WHERE
EXISTS (SELECT ls.*
FROM
(SELECT
r.proposalPackageID,
r.approvalTypeID,
RowNr = ROW_NUMBER() OVER (PARTITION BY r.proposalPackageID ORDER BY r.reviewedDate DESC)
FROM
proposalReviewAction AS r
JOIN
proposalPackage AS pp ON pp.proposalPackageID = r.proposalPackageID
WHERE
pp.proposalCallID = 7) AS ls
WHERE
ls.RowNr = 1
AND (ls.approvalTypeID = 50)) AS distinctified
INNER JOIN
proposalPackage ON distinctified.proposalPackageID = proposalPackage.proposalPackageID
INNER JOIN
refProposalApprovalType ON distinctified.approvalTypeID = refProposalApprovalType.approvalTypeID
GROUP BY
proposalPackage.proposalTypeID, [user].lastName, [user].firstName,
[user].email, [user].phone, proposalPackage.title, refProposalType.name,
proposalManagerAssignment.isPrimary, proposalPackage.proposalCallID,
approvalTypeID, proposalPackage.proposalPackageID, proposalReviewAction.approvalTypeID
HAVING
(proposalManagerAssignment.isPrimary = 1)
AND (proposalPackage.proposalCallID = 7)
AND (distinctified.approvalTypeID = 50)
ORDER BY
proposalPackage.proposalPackageID
Now, when I add the "AS distinctified" statement with a couple of JOINS to the subquery, I get a "SYNTAX ERROR near AS" error. I also get an "Expecting ( or SELECT" at each of the "HAVING" qualifiers.
I don't think I'm making this too complicated but that remains a possibility. It seems to me it is a matter (at this point) of overlooking a character somewhere.
Thanks in advance for the assist... AGAIN!!
This isn't really an answer to your much more complex example, but it should explain what the root cause is hopefully?
DECLARE #x TABLE (id INT);
INSERT INTO #x SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3;
DECLARE #y TABLE (id INT);
INSERT INTO #y SELECT 1 UNION ALL SELECT 3;
--This is what you are doing
SELECT * FROM #x WHERE EXISTS (SELECT * FROM #y);
--This is what you should be doing
SELECT * FROM #x x WHERE EXISTS (SELECT * FROM #y y WHERE y.id = x.id);
I really have no idea what you are trying to accomplish but here is what your query might look like with some aliases and formatting. I also moved the joins before the where clause and removed the alias on your EXISTS predicate. But you are referencing distinctified in the code which I just don't get. As such there are some issues in this query still but without an understanding of what the need is I don't know what to do to help.
SELECT pp.proposalPackageID
, pt.name
, pp.title
, u.lastName
, u.firstName
, u.email
, u.phone
, pra.approvalTypeID
FROM proposalReviewAction pra
--, proposalPackage pp --why is this table here? It is joined to again later
INNER JOIN refProposalType pt ON pp.proposalTypeID = pt.proposalTypeID
INNER JOIN proposalManagerAssignment ma ON pp.proposalPackageID = ma.proposalPackageID
INNER JOIN [user] u ON ma.userID = u.userID
INNER JOIN proposalPackage pp ON distinctified.proposalPackageID = pp.proposalPackageID
INNER JOIN refProposalApprovalType pat ON distinctified.approvalTypeID = pat.approvalTypeID
WHERE EXISTS
(
SELECT ls.*
FROM
(
SELECT r.proposalPackageID,
r.approvalTypeID,
RowNr = ROW_NUMBER() OVER (PARTITION BY r.proposalPackageID ORDER BY r.reviewedDate DESC)
FROM proposalReviewAction AS r
JOIN proposalPackage AS pp2 ON pp2.proposalPackageID = r.proposalPackageID
WHERE pp2.proposalCallID = 7
) AS ls
WHERE ls.RowNr = 1
AND ls.approvalTypeID = 50
)
GROUP BY pp.proposalTypeID
, u.lastName
, u.firstName
, u.email
, u.phone
, pp.title
, pt.name
, ma.isPrimary
, pp.proposalCallID
, approvalTypeID
, pp.proposalPackageID
, pra.approvalTypeID
HAVING ma.isPrimary = 1
AND pp.proposalCallID = 7
AND distinctified.approvalTypeID = 50
ORDER BY pp.proposalPackageID
So, I figured it out... once I realized I had the wrong understanding of EXISTS (thanks #Richard Hansel). Final query: (properly formatted and aliased)
SELECT pp.proposalPackageID
, r_pt.name
, pp.title
, u.lastName
, u.firstName
, u.email
, u.phone
, pra.approvalTypeID
FROM proposalReviewAction AS pra
, proposalPackage AS pp
INNER JOIN refProposalType AS r_pt ON pp.proposalTypeID = r_pt.proposalTypeID
INNER JOIN proposalManagerAssignment AS pma ON pp.proposalPackageID = pma.proposalPackageID
INNER JOIN [user] AS u ON pma.userID = u.userID
WHERE EXISTS
(SELECT ls.*
FROM
(SELECT r.proposalPackageID,
r.approvalTypeID,
RowNr = ROW_NUMBER() OVER (PARTITION BY r.proposalPackageID ORDER BY r.reviewedDate DESC)
FROM proposalReviewAction AS r
JOIN proposalPackage AS pp ON pp.proposalPackageID = r.proposalPackageID
WHERE pp.proposalCallID = 7) AS ls
WHERE ls.RowNr = 1
AND (ls.approvalTypeID = 50)
AND (pra.proposalPackageID = pp.proposalPackageID))
GROUP BY pp.proposalTypeID
, u.lastName
, u.firstName
, u.email
, u.phone
, pp.title
, r_pt.name
, pma.isPrimary
, pp.proposalCallID
, approvalTypeID
, pp.proposalPackageID
, pra.approvalTypeID
HAVING (pma.isPrimary = 1)
AND (pp.proposalCallID = 7)
AND (pra.approvalTypeID = 50)
ORDER BY pp.proposalTypeID

Top N percent Desc and Top M percent Asc

I am trying to get top 5 customertypes and show data for each 5 customer types, The balance (which can be any amount) I show them as "Other Customer Types". my issue is since the rows can be random and not perfectly divisible by a number then there can be repeated values in the top 5 showing up in the "Other" group which overstates the Total sales.
the Data is also being rendered in SSRS
My code using TOP PERCENT:
select final.[description], sum(final.YTDSales$) as YTDSales$
FROM(
select top 25 percent pytd2.[Description], sum(pytd2.YTDSales$) as YTDSales$
FROM(
-- ytd sales
select re.SIC_Desc as [description], sum((ol.NetAmt - ol.WhlOrdDiscAmt) / #exrt) AS YTDSales$
from dbo.order_line_invoice ol
INNER JOIN dbo.Vendor vd ON ol.Cono = vd.Cono AND vd.VendId = ol.VendId
inner join Product_Warehouse pw on ol.ProdId = pw.prodid and ol.WhseId = pw.whseid and ol.cono = pw.cono
inner join Customer c on ol.custId = c.CustId and ol.Cono = c.Cono
left join MDData.dbo.RetailEnvironment re on c.SIC = re.SIC
where ol.InvoiceDate BETWEEN #FStartDate AND #EndDate AND ol.Cono = 1 and ol.VendId IN(#Vendid) and ol.prodcatid NOT LIKE 'GP%'
group by re.SIC_Desc
)PYTD2
group by pytd2.[description]
order by sum(pytd2.YTDSales$) DESC
UNION ALL
select top 75 percent 'Other' as 'description', sum(pytd.YTDSales$) as YTDSales$
FROM(
-- ytd sales
select re.SIC_Desc as [description], sum((ol.NetAmt - ol.WhlOrdDiscAmt) / #exrt) AS YTDSales$
from dbo.order_line_invoice ol
INNER JOIN dbo.Vendor vd ON ol.Cono = vd.Cono AND vd.VendId = ol.VendId
inner join Product_Warehouse pw on ol.ProdId = pw.prodid and ol.WhseId = pw.whseid and ol.cono = pw.cono
inner join Customer c on ol.custId = c.CustId and ol.Cono = c.Cono
left join MDData.dbo.RetailEnvironment re on c.SIC = re.SIC
where ol.InvoiceDate BETWEEN #FStartDate AND #EndDate AND ol.Cono = 1 and ol.VendId IN(#Vendid) and ol.prodcatid NOT LIKE 'GP%'
group by re.SIC_Desc
)PYTD
group by Ppytd.[description]
order by sum(pytd.YTDSales$)
)final
group by final.[Description]
order by sum(final.YTDSales$) DESC
my results:
As you can see the Large Independent and Other has the same figure of $2280.60 in YTDQty since it is being repeated
I was picturing something like this:
with data as (
-- your base query here grouped and summarized by customer type
), rankedData as (
select *, row_number() over (order by YTDSales$ desc) as CustTypeRank
from data
)
select
case when CustTypeRank <= 5 then min("description") else 'Others' end as "description",
sum(YTDSales$) as YTDSales$
from rankedData
group by case when CustTypeRank <= 5 then CustTypeRank else 999 end
order by case when CustTypeRank <= 5 then CustTypeRank else 999 end
I actually used RANK instead which worked great :-
select 0 as rankytd, RANK() OVER(ORDER BY sum(ol.NetAmt - ol.WhlOrdDiscAmt) DESC) as rankpytd, re.sic, ol.VendId, vd.name, re.SIC_Desc As [description], 0 AS YTDQty, sum(ol.Quantity) AS PYTDQty
from dbo.order_line_invoice ol
INNER JOIN dbo.Vendor vd ON ol.Cono = vd.Cono AND vd.VendId = ol.VendId
inner join dbo.Product p on ol.Cono = p.Cono and ol.prodid = p.ProdId and p.ProdCatId in (#pcat)
inner join Product_Warehouse pw on ol.ProdId = pw.prodid and ol.WhseId = pw.whseid and ol.cono = pw.cono
inner join Customer c on ol.custId = c.CustId and ol.Cono = c.Cono
left join MDData.dbo.RetailEnvironment re on c.SIC = re.SIC
where ol.InvoiceDate BETWEEN DATEADD(YEAR, -1,#FStartDate) AND DATEADD(YEAR, -1, #EndDate) and ol.Cono = 1 and ol.VendId IN(#Vendid) and ol.prodcatid NOT LIKE 'GP%'
group by re.sic, ol.VendId, vd.Name, re.SIC_Desc

GROUP BY in SQL Server in complex query

I need to group this by T.TopicID to only receive the last result.
Whatever I try I get errors like the other T. items rant included in group by or aggregate etc
ALTER PROCEDURE [dbo].[SPGetFollowingTopics]
#id int = null
,#UserGroupId int = null
,#lastvisit DateTime = null
AS
SELECT *
FROM
(SELECT
ROW_NUMBER() OVER (ORDER BY TopicOrder DESC,
(CASE
WHEN M.MessageCreationDate > T.TopicCreationDate
THEN M.MessageCreationDate
ELSE T.TopicCreationDate
END) DESC) AS RowNumber,
T.TopicId, T.TopicTitle, T.TopicShortName,
T.TopicDescription, T.TopicCreationDate, T.TopicViews,
T.TopicReplies, T.UserId, T.TopicTags, T.TopicIsClose,
T.TopicOrder, T.LastMessageId, U.UserName,
M.MessageCreationDate, T.ReadAccessGroupId,
T.PostAccessGroupId, TF.userid AS Expr1, U.UserGroupId,
U.UserPhoto, U.UserFullName, M.UserId AS MessageUserId,
MU.UserName AS MessageUserName
FROM
Topics AS T
LEFT OUTER JOIN
Messages AS M ON M.TopicId = T.TopicId AND M.Active = 1 AND M.MessageCreationDate < #lastvisit
INNER JOIN
topicfollows AS TF ON T.TopicId = TF.topicid
INNER JOIN
Users AS U ON U.UserId = T.UserId
LEFT JOIN
Users MU ON MU.UserId = M.UserId
WHERE
(TF.userid = #id)
) T
It isn't clear what the requirement is (in my view) but I think you are seeking:
"the latest message"
PER TOPIC
for a given user
In this situation ROW_NUMBER() is a good option but I believe you need to PARTITION the ROW_NUMBER as well as ordering it.
SELECT
*
FROM (
SELECT
ROW_NUMBER() OVER (PARTITION BY TF.userid, T.TopicId
ORDER BY
(CASE
WHEN M.MessageCreationDate > T.TopicCreationDate THEN M.MessageCreationDate
ELSE T.TopicCreationDate
END) DESC) AS ROWNUMBER
, T.TopicId, T.TopicTitle, T.TopicShortName, T.TopicDescription
, T.TopicCreationDate, T.TopicViews, T.TopicReplies, T.UserId
, T.TopicTags, T.TopicIsClose, T.TopicOrder, T.LastMessageId
, U.UserName, M.MessageCreationDate, T.ReadAccessGroupId
, T.PostAccessGroupId, TF.userid AS EXPR1
, U.UserGroupId, U.UserPhoto, U.UserFullName
, M.UserId AS MESSAGEUSERID, MU.UserName AS MESSAGEUSERNAME
FROM Topics AS T
LEFT OUTER JOIN Messages AS M ON M.TopicId = T.TopicId
AND M.Active = 1
AND M.MessageCreationDate < #lastvisit
INNER JOIN topicfollows AS TF ON T.TopicId = TF.topicid
INNER JOIN Users AS U ON U.UserId = T.UserId
LEFT JOIN Users MU ON MU.UserId = M.UserId
WHERE (TF.userid = #id)
) T
WHERE ROWNUMBER = 1
You could change your left join to any outer apply, and add TOP 1:
SELECT ...
FROM
Topics AS T
OUTER APPLY
( SELECT TOP 1 M.MessageCreationDate, M.UserId
FROM Messages AS M
WHERE M.TopicId = T.TopicId
AND M.Active = 1
AND M.MessageCreationDate < #lastvisit
ORDER BY M.MessageCreationDate DESC
) AS m
This allows you to use TOP 1 and still get one row per topicID
Alternatively you can use ROW_NUMBER() OVER(PARTITION BY m.TopicID ORDER BY M.MessageCreationDate DESC)
SELECT ...
FROM
Topics AS T
LEFT OUTER JOIN
( SELECT M.TopicId,
M.MessageCreationDate,
M.UserId,
RowNum = ROW_NUMBER() OVER(PARTITION BY m.TopicID ORDER BY M.MessageCreationDate DESC)
FROM Messages AS M
WHERE M.Active = 1
AND M.MessageCreationDate < #lastvisit
) AS m
ON M.TopicId = T.TopicId
AND m.RowNum = 1
I would test both methods and see which one works best for you.

Resources