SQL Server - Query with MAX(CASE ..) and GROUP BY runs too long - sql-server

I added to my query max() and group by to exclude duplicates and now it's running very long, 56 minutes.
But before adding max() and group by it was running 7-8 min. It's ok, I'm using aggregate view in join and it takes time.
Without max() and group by I had duplicates of customers who opted-in and not opted-in. Where I wanted just to put value "1" if I see that customer has OptIn as "1".
Here is my code:
WITH
cteURC AS (
SELECT
distinct scvid
,MAX(CASE WHEN who = 'urc' AND OptIn = '1' THEN 1 ELSE 0 END) AS URC_OptIn
FROM
scv.OptIn
where who = 'urc'
),
cteSponsor AS (
SELECT
distinct scvid
,MAX(CASE WHEN who = 'sponsor/3rd party' AND OptIn = '1' THEN 1 ELSE 0 END) AS Partner_OptIn
FROM
scv.OptIn
where who = 'sponsor/3rd party'
GROUP BY scvid
),
cteLeinster AS (
SELECT
distinct scvid
,MAX(CASE WHEN who = 'leinster rugby' AND OptIn = '1' THEN 1 ELSE 0 END) AS Leinster_OptIn
FROM
scv.OptIn
where who = 'leinster rugby'
)
SELECT distinct(c.[ScvId])
,Title
,FirstName
,LastName
,EmailAddress
,DateOfBirth
,Address1
,City
,Telephone
,Gender
,URC_OptIn
,Partner_OptIn
,Leinster_OptIn
,MAX(CASE WHEN c.ScvId = T.ScvId AND ProductId = '2019' THEN 1 ELSE 0 END) AS FinalTicketPurchaser_2019
,MAX(CASE WHEN c.ScvId = T.ScvId AND ProductId = '2018' THEN 1 ELSE 0 END) AS FinalTicketPurchaser_2018
FROM scv.vwCustomer c
LEFT JOIN cteURC U ON c.scvid = U.scvid
LEFT JOIN cteSponsor S ON c.ScvId = S.ScvId
LEFT JOIN cteLeinster L on c.ScvId = L.ScvId
LEFT JOIN vwAggTransaction T ON c.ScvId = T.ScvId
group by c.[ScvId]
Title,
FirstName,
LastName,
EmailAddress,
DateOfBirth,
Address1,
City,
Telephone,
Gender,
URC_OptIn,
Partner_OptIn,
Leinster_OptIn,
order by c.scvid
How can I change query to make it run quicker?
Update
I became brave and re-wrote query. I was hesitating as my friend wrote the ctes and I thought that they are necessary.
This version works well and fast. Thank you everyone. I really appreciate your input!
SELECT (c.[ScvId])
,Title
,FirstName
,LastName
,EmailAddress
,DateOfBirth
,Address1
,City
,Telephone
,Gender
,MAX(CASE WHEN who = 'urc' AND OptIn = '1' THEN 1 ELSE 0 END) AS URC_OptIn
,MAX(CASE WHEN who = 'sponsor/3rd party' AND OptIn = '1' THEN 1 ELSE 0 END) AS Partner_OptIn
,MAX(CASE WHEN who = 'leinster rugby' AND OptIn = '1' THEN 1 ELSE 0 END) AS Leinster_OptIn
,MAX(CASE WHEN who = 'connacht rugby' AND OptIn = '1' THEN 1 ELSE 0 END) AS Connacht_OptIn
,MAX(CASE WHEN c.ScvId = T.ScvId AND ProductId = '2019' THEN 1 ELSE 0 END) AS FinalTicketPurchaser_2019
,MAX(CASE WHEN c.ScvId = T.ScvId AND ProductId = '2018' THEN 1 ELSE 0 END) AS FinalTicketPurchaser_2018
FROM vwCustomer c
LEFT JOIN OptIn o ON c.scvid = o.scvid
LEFT JOIN vwAggTransaction T ON c.ScvId = T.ScvId
group by c.[ScvId]
,Title
,FirstName
,LastName
,EmailAddress
,DateOfBirth
,Address1
,City
,Telephone
,Gender
order by c.scvid

I'm taking a stab at it without more information. I converted the CTE into OUTER APPLY. You might need to tweak it a bit for the cases when OptIn=0. Give it a try and see if the performance is any better.
SELECT distinct c.[ScvId]
,Title
,FirstName
,LastName
,EmailAddress
,DateOfBirth
,Address1
,City
,Telephone
,Gender
,URC_OptIn
,Partner_OptIn
,Leinster_OptIn
,MAX(CASE WHEN c.ScvId = T.ScvId AND ProductId = '2019' THEN 1 ELSE 0 END) AS FinalTicketPurchaser_2019
,MAX(CASE WHEN c.ScvId = T.ScvId AND ProductId = '2018' THEN 1 ELSE 0 END) AS FinalTicketPurchaser_2018
FROM scv.vwCustomer c
OUTER APPLY (
SELECT TOP 1 OptIn AS URC_OptIn
FROM scv.OptIn oiu
WHERE oiu.scvid = c.Scvid AND oiu.OptIn = '1' AND oiu.who = 'urc'
) AS U
OUTER APPLY (
SELECT TOP 1 OptIn AS Partner_OptIn
FROM scv.OptIn ois
WHERE ois.scvid = c.Scvid AND ois.OptIn = '1' AND ois.who = 'sponsor/3rd party'
) AS S
OUTER APPLY (
SELECT TOP 1 OptIn AS Leinster_OptIn
FROM scv.OptIn oil
WHERE oil.scvid = c.Scvid AND oil.OptIn = '1' AND oil.who = 'leinster rugby'
) AS L
LEFT JOIN vwAggTransaction T ON c.ScvId = T.ScvId
GROUP BY c.[ScvId]
Title,
FirstName,
LastName,
EmailAddress,
DateOfBirth,
Address1,
City,
Telephone,
Gender,
URC_OptIn,
Partner_OptIn,
Leinster_OptIn,
ORDER BY c.scvid

Related

Better performance for running this query

I want to improve the performance for running the following SQL.
Apparently its taking up 3.28 minutes to run 784 for the following query,
is there a better practice, or better solution to improve the performance of the code? Thanks so much
I've tried with Sum, Count, pivot and looks like Count is faster
with Accounts as
(select ID,displayName AccountName from u3_system.dbo.account with(nolock)
where
ID = '0439b9d9-f2c9-43dc-aa2d-08cdea94db21'
or ID = 'f815d2dd-e118-4930-a67d-08cf28b410e2'
or ID = 'f606fcab-dcbd-4d02-8161-08d635cd8971'
or ID = '7d8f44d4-0f17-41f1-87e2-08cf28b4343b'),
Campaign as
(select C.ID, Accounts.AccountName,C.displayName [Mailout Folder] from Accounts
left join u3_mail.dbo.Campaign C with(nolock)
on C.listID = Accounts.ID),
mo as
(select
campaign.AccountName
,campaign.[Mailout Folder]
,mo.ID [MailOutID]
,mo.displayName [Mailout Name]
,[date] [Send Date]
,Details.value('(//content)[1]/subject[1]', 'varchar(200)') AS [Subject Line]
from campaign
inner join [u3_mail].[dbo].[mailout] mo WITH(NOLOCK)
on mo.campaignID = campaign.ID),
ML as
(select mailoutID,
count(mailoutId) [Total Messages],
count(case when status = 'Delivered' then 1 else null end) Delivered,
count(case when status <> 'Delivered' then 1 else null end) Undelivered,
count(case when ReadDate is not null then 1 else null end) [Read],
count(case when HasClicked = '1' then 1 else null end) [Unique Clicks],
count(case when BounceDate is not null then 1 else null end) [Bounced],
count(case when OptOutDate is not null then 1 else null end) [Opted Out]
from
[u3_data].[data].[MailLog_0439b9d9f2c943dcaa2d08cdea94db21] ML with(nolock)
group by MailoutID
union
select mailoutID,
count(mailoutId) [Total Messages],
count(case when status = 'Delivered' then 1 else null end) Delivered,
count(case when status <> 'Delivered' then 1 else null end) Undelivered,
count(case when ReadDate is not null then 1 else null end) [Read],
count(case when HasClicked = '1' then 1 else null end) [Unique Clicks],
count(case when BounceDate is not null then 1 else null end) [Bounced],
count(case when OptOutDate is not null then 1 else null end) [Opted Out]
from
[u3_data].[data].[MailLog_f815d2dde1184930a67d08cf28b410e2] ML with(nolock)
group by MailoutID
union
select mailoutID,
count(mailoutId) [Total Messages],
count(case when status = 'Delivered' then 1 else null end) Delivered,
count(case when status <> 'Delivered' then 1 else null end) Undelivered,
count(case when ReadDate is not null then 1 else null end) [Read],
count(case when HasClicked = '1' then 1 else null end) [Unique Clicks],
count(case when BounceDate is not null then 1 else null end) [Bounced],
count(case when OptOutDate is not null then 1 else null end) [Opted Out]
from
[u3_data].[data].[MailLog_f606fcabdcbd4d02816108d635cd8971] ML with(nolock)
group by MailoutID
union
select mailoutID,
count(mailoutId) [Total Messages],
count(case when status = 'Delivered' then 1 else null end) Delivered,
count(case when status <> 'Delivered' then 1 else null end) Undelivered,
count(case when ReadDate is not null then 1 else null end) [Read],
count(case when HasClicked = '1' then 1 else null end) [Unique Clicks],
count(case when BounceDate is not null then 1 else null end) [Bounced],
count(case when OptOutDate is not null then 1 else null end) [Opted Out]
from
[u3_data].[data].[MailLog_7d8f44d40f1741f187e208cf28b4343b] ML with(nolock)
group by MailoutID
),
ME as
(select MailoutID,
count(case when [Event] = 'LinkClicked' then 1 else null end) [Total Clicks],
count(case when [Event] = 'AbuseReport' then 1 else null end) [Mark as Spam]
from
[u3_data].[data].[MailEvent_0439b9d9f2c943dcaa2d08cdea94db21] ME with(nolock)
inner join [u3_data].[data].[MailLog_0439b9d9f2c943dcaa2d08cdea94db21] ML with(nolock)
on ML.ID = ME.messageID
where [Event] in ('LinkClicked','AbuseReport')
group by MailoutID
union
select MailoutID,
count(case when [Event] = 'LinkClicked' then 1 else null end) [Total Clicks],
count(case when [Event] = 'AbuseReport' then 1 else null end) [Mark as Spam]
from
[u3_data].[data].[MailEvent_f815d2dde1184930a67d08cf28b410e2] ME with(nolock)
inner join [u3_data].[data].[MailLog_f815d2dde1184930a67d08cf28b410e2] ML with(nolock)
on ML.ID = ME.messageID
where [Event] in ('LinkClicked','AbuseReport')
group by MailoutID
union
select MailoutID,
count(case when [Event] = 'LinkClicked' then 1 else null end) [Total Clicks],
count(case when [Event] = 'AbuseReport' then 1 else null end) [Mark as Spam]
from
[u3_data].[data].[MailEvent_f606fcabdcbd4d02816108d635cd8971] ME with(nolock)
inner join [u3_data].[data].[MailLog_f606fcabdcbd4d02816108d635cd8971] ML with(nolock)
on ML.ID = ME.messageID
where [Event] in ('LinkClicked','AbuseReport')
group by MailoutID
union
select MailoutID,
count(case when [Event] = 'LinkClicked' then 1 else null end) [Total Clicks],
count(case when [Event] = 'AbuseReport' then 1 else null end) [Mark as Spam]
from
[u3_data].[data].[MailEvent_7d8f44d40f1741f187e208cf28b4343b] ME with(nolock)
inner join [u3_data].[data].[MailLog_7d8f44d40f1741f187e208cf28b4343b] ML with(nolock)
on ML.ID = ME.messageID
where [Event] in ('LinkClicked','AbuseReport')
group by MailoutID)
select
mo.AccountName [Account Name]
,mo.[Mailout Folder]
,mo.[Mailout Name]
,mo.[Send Date]
,mo.[Subject line]
,ml.[Total Messages]
,ml.Delivered
,ml.Undelivered
,ml.[Read]
,ml.Delivered - ml.[Read] [Unread]
,ml.[Unique Clicks]
,me.[Total Clicks]
,ml.[Bounced]
,ml.[Opted Out]
,me.[Mark as Spam]
from mo
left join ml
on ml.mailoutID = mo.mailoutID
left join me
on me.mailoutID = ml.mailoutID

Need to add a subquery in a CASE expression that is part of an aggregate function

I need to add the number of records in which the status is set to 'reopened'. But the 'reopened' status has several IDs.
This is the subquery that will Id the 'reopen' statuses:
SELECT (CASE WHEN s.sr_status_recid = 1 THEN 1 ELSE 0 END) AS Reopened
from v_rpt_service s
where vsrv.sr_status_recid in
(select distinct SR_Status_RecID from SR_Status where [Description] like '%Re-opened%'))
This is the main query that the above query needs to be a part:
SELECT DATEPART(WK, vsrv.date_entered) as WkNumber,
COUNT(vsrv.TicketNbr) AS OpenedIssues, --total ticket count
SUM(CASE WHEN vsrv.Closed_Flag = 1 THEN 1 ELSE 0 END) AS ClosedIssues, --sum of tickets with closed_flag = 1
(SELECT SUM(CASE WHEN s.sr_status_recid = 1 THEN 1 ELSE 0 END)
from v_rpt_service s
where vsrv.sr_status_recid in
(select distinct SR_Status_RecID from SR_Status where [Description] like '%Re-opened%')) AS ReopenedIssues,
SUM(CASE WHEN vsrvy.Surveys_Completed = 1 THEN 1 ELSE 0 END) AS SurveysCompletedWithConnectWise, -- Surveys_Completed flag in view is 1
SUM(CASE WHEN Source = 'Portal' THEN 1 ELSE 0 END) AS IssueLoggedPortal,
SUM(CASE WHEN Source = 'Email Connector' THEN 1 ELSE 0 END) AS IssueLoggedEmai
FROM v_rpt_service vsrv LEFT OUTER JOIN v_rpt_SurveysByTicket vsrvy ON vsrv.TicketNbr = Vsrvy.SR_Service_RecID
WHERE vsrv.company_name <> 'XYZ Test Company' AND vsrv.date_entered BETWEEN '01/01/2016' AND '10/07/2016'
GROUP BY DATEPART(WK, vsrv.date_entered)
ORDER BY WkNumber
How can I have a subquery that uses a CASE statement and the CASE statement is aggregated?
You can use CROSS APPLY
SELECT DATEPART(WK, vsrv.date_entered) as WkNumber,
COUNT(vsrv.TicketNbr) AS OpenedIssues, --total ticket count
SUM(CASE WHEN vsrv.Closed_Flag = 1 THEN 1 ELSE 0 END) AS ClosedIssues, --sum of tickets with closed_flag = 1
SUM(CountReopen.YesNo) AS NumberOfReopen,
SUM(CASE WHEN vsrvy.Surveys_Completed = 1 THEN 1 ELSE 0 END) AS SurveysCompletedWithConnectWise, -- Surveys_Completed flag in view is 1
SUM(CASE WHEN Source = 'Portal' THEN 1 ELSE 0 END) AS IssueLoggedPortal,
SUM(CASE WHEN Source = 'Email Connector' THEN 1 ELSE 0 END) AS IssueLoggedEmail
FROM v_rpt_service vsrv
LEFT OUTER JOIN v_rpt_SurveysByTicket vsrvy
ON vsrv.TicketNbr = Vsrvy.SR_Service_RecID
CROSS APPLY (
SELECT IIF(COUNT(*) > 0,1,0) YesNo
FROM SR_Status
where [Description] like '%Re-opened%'
AND SR_Status_ID = vsrv.sr_status_recid
) CountReopen(YesNo)
WHERE vsrv.company_name <> 'XYZ Test Company'
AND vsrv.date_entered BETWEEN '01/01/2016' AND '10/07/2016'
GROUP BY DATEPART(WK, vsrv.date_entered)
ORDER BY WkNumber

Create new columns depending on the columns in a table

I have code in the this following example.
legacy_id phone_type phone_number
1 f 1234567890
1 b 1233854100
1 f 4110256565
2 f 0707070770
2 b 7895120044
I want the data to end up like the following.
legacy_id f_phone_number_1 b_phone_number_1 f_phone_number_2
1 1234567890 1233854100 4110256565
2 0707070770 7895120044
My initial approach works but I was hoping there is a more efficient what of doing this.
Select fill.legacy_id, max(fill.f_phone_number_1),max(fill.b_phone_number_1),max(fill.f_phone_number_2)
from
(
Select
a.legacy_id as legacy_id, a.phone_type as phone_type,
case
when a.phone_type = 'F' then a.phone_number and
dense_rank() over (partition by a.legacy_id, a.phone_type order by a.legacy_id, a.phone_type, a.phone_number) = 1
else null
end as f_phone_number_1,
case
when a.phone_type = 'F' then a.phone_number and
dense_rank() over (partition by a.legacy_id, a.phone_type order by a.legacy_id, a.phone_type, a.phone_number) = 2
else null
end as f_phone_number_2,
case
when a.phone_type = 'b' then a.phone_number and
dense_rank() over (partition by a.legacy_id, a.phone_type order by a.legacy_id, a.phone_type, a.phone_number) = 1
else null
end as b_phone_number_1
from table a
group by a.legacy_id, a.phone_type, a.phone_number
) fill
group by fill.legacy_id
Is there a more efficient way of approaching this?
If you don't need to go dynamic, a conditional aggregation should do the trick
Declare #YourTable table (legacy_id int,phone_type varchar(25),phone_number varchar(25))
Insert Into #YourTable values
(1,'f','1234567890'),
(1,'b','1233854100'),
(1,'f','4110256565'),
(2,'f','0707070770'),
(2,'b','7895120044')
Select legacy_id
,f_phone_number_1 = max(case when phone_type='f' and RowNr=1 Then phone_number else '' end)
,b_phone_number_1 = max(case when phone_type='b' and RowNr=1 Then phone_number else '' end)
,f_phone_number_2 = max(case when phone_type='f' and RowNr=2 Then phone_number else '' end)
,b_phone_number_2 = max(case when phone_type='b' and RowNr=2 Then phone_number else '' end)
From (
Select *
,RowNr=Row_Number() over (Partition By legacy_id,phone_type Order By (Select NULL) )
From #YourTable
) A
Group By legacy_id
Returns
legacy_id f_phone_number_1 b_phone_number_1 f_phone_number_2 b_phone_number_2
1 4110256565 1233854100 1234567890
2 0707070770 7895120044

how to join table group by count

I have query_1:
select id,
count(case when no01 ='B' then 1 END) +
count(case when no02='B' then 1 END) +
count(case when no03='B' then 1 END) as Count_All
From tabel_a
where date ='20150201'
group by id
and also I have query_2:
select id, COUNT (*) from tabel_b
where ids <> 'T' and idt ='C'
group by id
How can I join query_1 with query_2 via id?
You could try using sub selects. Something like
SELECT *
FROM (
select id,
count(case when no01 ='B' then 1 END) + count(case when no02='B' then 1 END) + count(case when no03='B' then 1 END) as Count_All
From tabel_a
where date ='20150201'
group by id
) a INNER JOIN
(
select id,
COUNT (*) cnt
from tabel_b
where ids <> 'T'
and idt ='C'
group by id
) b ON a.ID = b.ID
Seeing as you specified SQL Server, you could also make use of a Common Table Expression.
Something like
;WITH a AS (
select id,
count(case when no01 ='B' then 1 END) + count(case when no02='B' then 1 END) + count(case when no03='B' then 1 END) as Count_All
From tabel_a
where date ='20150201'
group by id
)
, b as (
select id,
COUNT (*) cnt
from tabel_b
where ids <> 'T'
and idt ='C'
group by id
)
SELECT *
FROm a INNER JOIN
b ON a.ID = b.ID

SQL Server: Left outer join on whether ID exists

The first three items from table ProviderValueCard add their respective amount if = 1. I'm also trying to add 50 to my TotalScore if the ProviderID exists in table SubscriptionsTV. GroupID is also from SubscriptionsTV in which the condition needs to be met. I beleive I need to have a left outer join in the 2nd query on ProviderID columns from both tables.
DECLARE #ProviderID int = '1717';
WITH cte as(
SELECT TOP 1 ProviderID, Time_Stamp,
SUM(CASE WHEN [AdditionalReports] = '1' THEN 5 ELSE 0 END) as AdditionalReports,
SUM(CASE WHEN [UniqueReportRequests] = '1' THEN 15 ELSE 0 END) as UniqueReportsRequests,
SUM(CASE WHEN [SurveyCompleted] = '1' THEN 30 ELSE 0 END) as SurveyCompleted
--IF #ProviderID EXISTS SUM(THEN 50 ELSE 0 END)
FROM ProviderValueCard
WHERE ProviderID = #ProviderID
GROUP BY Time_Stamp, ProviderID
ORDER BY Time_Stamp DESC
)
SELECT ProviderID, Time_Stamp, (AdditionalReports + UniqueReportsRequests + SurveyCompleted) AS TotalScore
FROM cte
--WHERE GroupID = 2
returns
ProviderID Time_Stamp TotalScore
----------- ----------------------- -----------
1717 2014-08-28 13:03:30.593 45
ProviderValueCard table
ProviderID AdditionalReports UniqueReportRequests SurveyCompleted Time_Stamp
----------- ----------------- -------------------- --------------- -----------------------
1717 0 1 1 2014-08-28 13:03:30.593
SubscriptionsTV table
ProviderID GroupID
----------- -----------
1717 2
My final result is this:
DECLARE #ProviderID int = '1717';
WITH cte as(
SELECT TOP 1 a.ProviderID, Time_Stamp,
SUM(CASE WHEN [AdditionalReports] = '1' THEN 5 ELSE 0 END) as AdditionalReports,
SUM(CASE WHEN [UniqueReportRequests] = '1' THEN 15 ELSE 0 END) as UniqueReportsRequests,
SUM(CASE WHEN [SurveyCompleted] = '1' THEN 30 ELSE 0 END) as SurveyCompleted,
MAX(CASE WHEN b.ProviderID IS NULL THEN 0 ELSE 50 END) as SubscriptionExists
FROM ProviderValueCard a
LEFT JOIN SubscriptionsTV b
ON a.ProviderID = b.ProviderID
WHERE a.ProviderID = #ProviderID AND GroupID = 2
GROUP BY Time_Stamp, a.ProviderID, event
ORDER BY event DESC, Time_Stamp DESC
)
SELECT ProviderID, Time_Stamp, (AdditionalReports + UniqueReportsRequests + SurveyCompleted + SubscriptionExists) AS TotalScore
FROM cte
You are correct that this can be accomplished with a LEFT JOIN, and I'd use MAX() with a CASE statement:
DECLARE #ProviderID int = '1717';
WITH Subs AS (SELECT DISTINCT ProviderID
FROM SubscriptionsTV
)
,cte AS (SELECT TOP 1 a.ProviderID, Time_Stamp,
SUM(CASE WHEN [AdditionalReports] = '1' THEN 5 ELSE 0 END) as AdditionalReports,
SUM(CASE WHEN [UniqueReportRequests] = '1' THEN 15 ELSE 0 END) as UniqueReportsRequests,
SUM(CASE WHEN [SurveyCompleted] = '1' THEN 30 ELSE 0 END) as SurveyCompleted,
MAX(CASE WHEN b.ProviderID IS NULL THEN 0 ELSE 50 END) as SubscriptionExists
FROM ProviderValueCard a
LEFT JOIN Subs b
ON a.ProviderID = b.ProviderID
WHERE a.ProviderID = #ProviderID
GROUP BY Time_Stamp, ProviderID
ORDER BY Time_Stamp DESC
)
SELECT ProviderID, Time_Stamp, (AdditionalReports + UniqueReportsRequests + SurveyCompleted + SubscriptionExists) AS TotalScore
FROM cte
Update: Since multiple providerID's can exist, need DISTINCT, used a 2nd cte above, could also use a correlated sub-select inside the CASE statement.
Wasn't paying attention, ORDER BY is fine in a cte when TOP is used.

Resources