Update new with values from a seperate query - sql-server

I've got this query which returns a list of id's and int values
SELECT
Opportunity.opportunityid
,obn.cnt
FROM Opportunity
LEFT JOIN Account
ON Opportunity.AccountId = Account.AccountId
OUTER APPLY
(SELECT
COUNT(dst) AS cnt
FROM [server].[telecoms].[dbo].[vwOpportunityUpdate]
WHERE dst COLLATE DATABASE_DEFAULT = REPLACE(Account.Telephone1,' ','') COLLATE DATABASE_DEFAULT
AND calldate > opportunity.createdon
GROUP BY dst) obn
WHERE DATEDIFF(dd,Opportunity.CreatedOn,GETDATE()) < 30
AND obn.cnt IS NOT NULL
All I need to do is update a table based on the values in those results
UPDATE Opportunity SET callcount = (obn.count from previous query)
WHERE OpportunityId = OpportunityId
I'm not sure how to join the two things together.
Thanks

Save the results of the first query into a temporary table, then use that table to join on the other table in your update clause.
SELECT
Opportunity.opportunityid
,obn.cnt as cnt
INTO #CallCounts
FROM Opportunity
LEFT JOIN Account
ON Opportunity.AccountId = Account.AccountId
OUTER APPLY
(SELECT
COUNT(dst) AS cnt
FROM [server].[telecoms].[dbo].[vwOpportunityUpdate]
WHERE dst COLLATE DATABASE_DEFAULT = REPLACE(Account.Telephone1,' ','') COLLATE DATABASE_DEFAULT
AND calldate > opportunity.createdon
GROUP BY dst) obn
WHERE DATEDIFF(dd,Opportunity.CreatedOn,GETDATE()) < 30
AND obn.cnt IS NOT NULL
UPDATE o
SET callcount = c.cnt
FROM Opportunity o
JOIN #CallCounts c on c.opportunityID = o.opportunityID
DROP TABLE #CallCounts

Related

How to change index scan to index seek in this case?

I have the following query on a sql server 2008 r2:
IF OBJECT_ID('tempdb..#temp') IS NOT NULL
/*Then it exists*/
DROP TABLE #temp
Select *
FROM
openquery(mysqlserver, 'select
id,street,name,customer,customerId
from table1 as t1
left join table2 as t2
on t2.id=t1.id
left join table3 as t3
on t3.id=t1.id
left join table4 as t4
on t4.id=t1.producto_id
left join table5 as t5
on t1.id = t5.id
where t1.type=0
and t3.service=''X''
and t1.check is null
and t1.date > date_sub(CURDATE(),INTERVAL 5 DAY)
')
SELECT * FROM #temp as t
left join View as v on v.customerId=t.customerId collate
SQL_Latin1_General_CP1_CI_AI
The above statement shows an execution plan where there is an index scan which costs 27% and another one which cost 26%. This is in related of the left join operation in one of the tables inside the view.
I thought to add index in the temp table with this:
CREATE NONCLUSTERED INDEX ix_temp_customerId on #temp(customerId)
It still the same, so I tried to apply the collate before to create the index:
ALTER TABLE #temp alter COLUMN customerId varchar(30) collate SQL_Latin1_General_CP1_CI_AI
Then the last query of the first statement was:
SELECT * FROM #temp as t
left join View as v on v.customerId=t.customerId
This shows the collation error despite the view uses this collation, and the tables inside the view query uses this collation. Then I add again the collate statement to the query.
The current statement is:
Select *
FROM
openquery(mysqlserver, 'select
id,street,name,customer,customerId
from table1 as t1
left join table2 as t2
on t2.id=t1.id
left join table3 as t3
on t3.id=t1.id
left join table4 as t4
on t4.id=t1.producto_id
left join table5 as t5
on t1.id = t5.id
where t1.type=0
and t3.service=''X''
and t1.check is null
and t1.date > date_sub(CURDATE(),INTERVAL 5 DAY)
')
ALTER TABLE #temp alter COLUMN customerId varchar(30) collate SQL_Latin1_General_CP1_CI_AI
CREATE NONCLUSTERED INDEX ix_temp_customerId on #temp(customerId)
SELECT * FROM #temp as t
left join View as v on v.customerId=t.customerId collate
SQL_Latin1_General_CP1_CI_AI
This execution plan still showing that there is an index scan. I would like to change this into index seek but I do not achieve that.
Any suggestion to achieve better performance?
Thank you
Share your execution plans using Paste The Plan # brentozar.com here are the instructions: How to Use Paste the Plan.
I would try explicitly creating your #temp table with the correct datatype, size, and collation (match the size of customerId to the varchar() size of View.customerId).
I would also consider including the columns in the index, since you are using select *.
create table #temp (
id int
, street varchar(128) collate SQL_Latin1_General_CP1_CI_AI
, name varchar(128) collate SQL_Latin1_General_CP1_CI_AI
, customer varchar(128) collate SQL_Latin1_General_CP1_CI_AI
, customerId varchar(30) collate SQL_Latin1_General_CP1_CI_AI
);
insert into #temp
select *
from openquery(mysqlserver, '
select
id,street,name,customer,customerId
from table1 as t1
left join table2 as t2
on t2.id=t1.id
left join table3 as t3
on t3.id=t1.id
left join table4 as t4
on t4.id=t1.producto_id
left join table5 as t5
on t1.id = t5.id
where t1.type=0
and t3.service=''X''
and t1.check is null
and t1.date > date_sub(CURDATE(),INTERVAL 5 DAY)
')
create nonclustered index ix_temp_customerId
on #temp(customerId)
include (id, street, name, customer);
select *
from #temp as t
left join View as v
on v.customerId=t.customerId

How to improve the performance when select get few records from three millions records in sql

I have nearly 3 million records in a table. when i select the records from using the below query it takes atleast 45 secs.
SELECT a.user_id,
b.NAME,
a.paid_date,
(SELECT Count(user_id)
FROM tbl_payment_master
WHERE user_id = a.user_id
AND paid_date = a.paid_date
GROUP BY paid_date) AS noofpaymnt,
(SELECT Count(user_id)
FROM tbl_payment_master
WHERE user_id = a.user_id
AND Month(paid_date) = Month(a.paid_date)
AND Year(paid_date) = Year(a.paid_date)) AS noofpaymonth
FROM tbl_payment_master AS a
INNER JOIN tbl_user_registration AS b
ON a.user_id = b.user_id
WHERE a.premium_amount != 0
AND a.user_id = #user_id
My table structure
create table tbl_userregistration(user_id bigint primary key identity,name nvarchar(100))
create table tbl_payment_master(payment_id bigint primary key identity,paid_date datetime,amount float,user_id bigint foreign key references tbl_user_registration(user_id)) )
How could be improve the performance.
Any alter solution to retrieve data's .
Not knowing anything else, I did a mechanical refactoring of the correlated subqueries into joins. This MAY or MAY NOT be helpful depending on your setup.
The reason this may work is because with a join, the dataset is calculated generally once, but for a correlated subquery, it is running the sub query once for each record in the return.
This technique demonstrates something called a derived table.
Also, the userid filter in the derived table may or may not be needed or useful.
SELECT a.user_id
,b.NAME
,a.paid_date
,paymentDateCount = noofpaymnt.userCount
,PaymentMonthCount = noofpaymonth.userCount
FROM tbl_payment_master AS a
INNER JOIN tbl_user_registration AS b ON a.user_id = b.user_id
INNER JOIN (
SELECT user_id
,paid_date
,userCount = Count(user_id)
FROM tbl_payment_master
WHERE user_id = #user_id AND a.premium_amount != 0
GROUP BY user_id
,paid_date
) noofpaymnt ON a.user_id = noofpaymnt.user_id
AND a.paid_date = noofpaymnt.paiddate
INNER JOIN (
SELECT user_id
,paymentmonth = month(paid_date)
,paymentyear = year(paiddate)
,userCount = Count(user_id)
FROM tbl_payment_master
WHERE user_id = #user_id AND a.premium_amount != 0
GROUP BY user_id
,month(paid_date)
,year(paid_date)
) noofpaymonth ON a.user_id = noofpaymonth.user_id
AND month(a.paid_date) = noofpaymonth.paymentmonth
AND year(a.paid_date) = noofpaymonth.paymentyear
WHERE a.premium_amount != 0
AND a.user_id = #user_id
I'm very sure there's a better way to write this, but I think you want something in the lines of this:
select
a.user_id
, b.name
, aggr.usr_cnt
, aggr2.usr_cnt
from tbl_payment_master as a
inner join tbl_user_registration as b
on a.user_id = b.user_id
left join (select
count(aa.user_id) usr_cnt
, aa.user_id
, aa.paid_date
from tbl_payment_master aa
where aa.paid_date = a.paid_date
and aa.user_id = #user_id
group by aa.paid_date, aa.user_id
) aggr on aggr.user_id = a.user_id
and aggr.paid_date = a.paid_date
left join (select
count(aa.user_id) usr_cnt
, aa.user_id
, month(aa.paid_date) paid_date_month
, year(aa.paid_date) paid_date_year
from tbl_payment_master aa
where month(aa.paid_date) = month(a.paid_date)
and year(aa.paid_date) = year(a.paid_date)
and aa.user_id = #user_id
group by aa.user_id
, month(aa.paid_date)
, year(aa.paid_date)
) aggr2 on aggr2.user_id = a.user_id
and aggr2.paid_date_month = month(a.paid_date)
and aggr2.paid_date_year = year(a.paid_date)
where a.premium_amount <> 0
and a.user_id = #user_id;
PS: It would be much easier to test and improve this query on real data
Index all fields that are read by the database.
This should speed up performance.

Max date sql server

I have a table with data and I am trying to find max date verified
Create table staging(ID varchar(5) not null, Name varchar(200) not null, dateverified datetime not null,dateinserted datetime not null)
ID,Name,DateVerified,DateInserted
42851775,384,2014-05-24 08:48:20.000,2014-05-28 14:28:10.000
42851775,384,2014-05-28 13:13:07.000,2014-05-28 14:36:12.000
42851775,a1d,2014-05-28 09:17:22.000,2014-05-28 14:36:12.000
42851775,a1d,2014-05-28 09:17:22.000,2014-05-28 14:28:10.000
42851775,a1d,2014-05-28 09:17:22.000,2014-05-28 14:29:08.000
42851775,bc5,2014-05-28 09:17:21.000,2014-05-28 14:29:08.000
42851775,bc5,2014-05-28 09:17:21.000,2014-05-28 14:28:10.000
42851775,bc5,2014-05-28 09:17:21.000,2014-05-28 14:36:12.000
I want to display max dateverified for each keyid i.e.
42851775,384,2014-05-28 13:13:07.000,2014-05-28 14:36:12.000
42851775,a1d,2014-05-28 09:17:22.000,2014-05-28 14:36:12.000
42851775,bc5,2014-05-28 09:17:21.000,2014-05-28 14:29:08.000
SELECT i.[ID],i.name,i.dateinserted,r.maxdate
FROM (select id,name,max(dateverified) as maxdate from
[dbo].[staging] where id=42851775 group by id,name) r
inner join
[dbo].[staging] i
on r.id=i.id and r.jobpostingurl=i.jobpostingurl and r.maxdate=i.dateverified
group by i.id,i.jobpostingurl,r.maxdate
I get an error,dateinserted is invalid as it is not contained in group by clause. But if I add it in group by clause I get all 8 records. How to handle this?
Thanks
R
SELECT
KeyID,
MAX(yourDate)
FROM
Staging
GROUP BY
KeyID
If you want additional information join this to another table for instance:
SELECT
b.KeyID,
a.dateinserted,
b.TheDate
FROM YourTable a
INNER JOIN
(
SELECT
KeyID,
MAX(yourDate) AS TheDate
FROM
Staging
GROUP BY
KeyID
) b
ON
b.KeyID = a.KeyID
If you need to get the dateinserted you can use a cte and join it back to the original table:
WITH cte
AS ( SELECT [ID] ,
name ,
MAX(dateverified) AS dateverified
FROM [dbo].[staging]
GROUP BY ID ,
name
)
SELECT cte.[ID] ,
cte.NAME ,
cte.dateverified ,
s.Dateinserted
FROM cte
INNER JOIN dbo.staging s ON cte.[ID] = s.[ID]
AND cte.NAME = s.NAME
AND cte.dateverified = s.dateverified

Going through tables and replacing IDs resulting from duplicates in dimension table

I have a dimension Users table that unfortunately has a bunch of duplicate records. See screenshot.
I have thousands of users and 5 tables referencing the duplicates. I want to delete records with "bad" UserIDs. I want to go through the 5 dependencies and update bad UserIds with "good" (circled in red).
What would be a good approach to this?
Here's what I did to get the above screenshot...
SELECT UserID
,userIds.FirstName
,userIds.LastName
,dupTable.Email
,dupTable.Username
,dupTable.DupCount
FROM dbo.DimUsers AS userIds
LEFT OUTER JOIN
(SELECT FirstName
,LastName
,Email
,UserName
,DupCount
FROM
(SELECT FirstName
,LastName
,UserName
,Email
,COUNT(*) AS DupCount -- we're finding duplications by matches on FirstName,
-- last name, UserName AND Email. All four fields must match
-- to find a dupe. More confidence from this.
FROM dbo.DimUsers
GROUP BY FirstName
,LastName
,UserName
,Email
HAVING COUNT(*) > 1) AS userTable -- any count more than 1 is a dupe
WHERE LastName NOT LIKE 'NULL' -- exclude entries with literally NULL names
AND FirstName NOT LIKE 'NULL'
)AS dupTable
ON dupTable.FirstName = userIds.FirstName -- to get the userIds of dupes, we LEFT JOIN the original table
AND dupTable.LastName = userIds.LastName -- on four fields to increase our confidence
AND dupTable.Email = userIds.Email
AND dupTable.Username = userIds.Username
WHERE DupCount IS NOT NULL -- ignore NULL dupcounts, these are not dupes
This code should work, created for 1 dependency table but you can use the same logic to update other 4 tables.
update t
set UserID = MinUserID.UserID
from
DimUsersChild1 t
inner join DimUsers on DimUsers.UserID = t.UserID
inner join (
select min(UserID) UserID, FirstName, LastName, UserName, Email
from DimUsers
group by
FirstName, LastName, UserName, Email
) MinUserID on
MinUserID.FirstName = DimUsers.FirstName and
MinUserID.LastName = DimUsers.LastName and
MinUserID.UserName = DimUsers.UserName and
MinUserID.Email = DimUsers.Email
select * from DimUsersChild1;
delete t1
from
DimUsers t
inner join DimUsers t1 on t1.FirstName = t.FirstName and
t1.LastName = t.LastName and
t1.UserName = t.UserName and
t1.Email = t.Email
where
t.UserID < t1.UserID
select * from DimUsers;
Here is a working demo

Acquiring Row index with identity column in Join Query

I need to add the results from a Left Join query to a table that does not have its index set to be an identity, but the int must still be unique. My insert query looks like this:
INSERT INTO Table1 (S.SubjectID, S.Subject, S.SubjectDescription, S.Status)
SELECT (Select MAX(SubjectID) FROM Table1) + ???? , N.Code, N.Literal, N.Trans
FROM Table2 N LEFT JOIN Table1 S ON N.Code = S.Subject
WHERE (N.Code IS NULL OR S.Subject IS NULL OR N.Trans = 'D')
Where I have the ???? is where i need to have some incrementing value so that when inserting into the table1 the ID's will be unique.
I am not allowed to change the table's structure, I just need something that can calculate his on the fly.
As always help, tips and references are much appreciated.
In most databases, you can use row_number() for this purpose. Here is an example with SQL Server syntax:
INSERT INTO Table1 (S.SubjectID, S.Subject, S.SubjectDescription, S.Status)
SELECT (Select MAX(SubjectID) FROM Table1) + row_number() over (order by (select NULL)) ,
N.Code, N.Literal, N.Trans
FROM Table2 N LEFT JOIN Table1 S ON N.Code = S.Subject
WHERE (N.Code IS NULL OR S.Subject IS NULL OR N.Trans = 'D')

Resources