SQL Get MODE Instead of AVG - sql-server

I have a stored procedure which returns me the average issue price of products. There will be multiple rows for each issue, and so I group by barcode, and also the number of stores that have inserted a row for this issue, and get the average of IssuePrice as this comes from user input:
ALTER PROCEDURE [dbo].[GetUnknownBarcodeReport]
#Status INT,#StoreTypeID INT
AS
BEGIN
SELECT COUNT(StoreCode) AS [# Stores]
,MAX(IssueName) AS IssueName
,AVG(IssuePrice) AS IssuePrice
,Barcode
,Product.EAN13 AS [Matched Product Code]
,Product.Name AS [Matched Product Name]
,Product.MainCatagory AS [Product Catagory]
FROM UnknownBarcodes
LEFT JOIN Product on LEFT(UnknownBarcodes.Barcode,13) = Product.EAN13
WHERE UnknownBarcodeStatusID = #Status
AND LEN(Barcode) >= 10
AND StoreTypeID = #StoreTypeID
GROUP BY Barcode, Product.EAN13, Product.Name, Product.MainCatagory
ORDER BY
CASE WHEN #status = 1 THEN
COUNT(StoreCode)
WHEN #status = 2 THEN
COUNT(StoreCode)
WHEN #status = 3 THEN
MAX(DateInserted)
END DESC
END
The same product can be reported multiple times and therefore have multiple rows, but sometimes users will enter different prices. If ten users all enter the same barcode and say the price is 100 then the AVG(IssuePrice) is also 100. However, if 9 enter 100, and the last row is entered as 1 then the AVG changes to 90.1
I would like to replace the AVG with the mathematical equivalent of MODE so that the above example would still return 100 because there are more 100s reported than any other value. Is this possible in SQL?
To help re-create this, the script for the table is below:
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
SET ANSI_PADDING ON
GO
CREATE TABLE [dbo].[UnknownBarcodes](
[ID] [int] IDENTITY(1,1) NOT NULL,
[StoreCode] [int] NULL,
[DateInserted] [datetime] NULL,
[StoreTypeID] [int] NULL,
[Barcode] [varchar](100) NULL,
[UnknownBarcodeStatusID] [int] NULL,
[StatusDescription] [varchar](1000) NULL,
[IssueName] [varchar](100) NULL,
[IssuePrice] [int] NULL,
[AutoReported] [bit] NULL,
CONSTRAINT [PK_UnknownBarcodes] PRIMARY KEY CLUSTERED
(
[ID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
)
GO
SET ANSI_PADDING OFF
GO
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
SET ANSI_PADDING ON
GO
CREATE TABLE [dbo].[Product](
[EAN13] [bigint] NOT NULL,
[Name] [varchar](250) NULL,
[MainCatagory] [varchar](100) NULL,
CONSTRAINT [PK_Product] PRIMARY KEY CLUSTERED
(
[EAN13] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
)
GO
SET ANSI_PADDING OFF
GO
INSERT INTO [dbo].[UnknownBarcodes] ([StoreCode],[DateInserted],[StoreTypeID],[Barcode],[UnknownBarcodeStatusID],[StatusDescription],[IssueName],[IssuePrice],[AutoReported]) VALUES (412,2015-07-15 08:01:03.817,1,'977096171301112',3,'Scanning issues regarding the reported unknown item','ASIAN TRADER FREE TRADE',200,1)
INSERT INTO [dbo].[UnknownBarcodes] ([StoreCode],[DateInserted],[StoreTypeID],[Barcode],[UnknownBarcodeStatusID],[StatusDescription],[IssueName],[IssuePrice],[AutoReported]) VALUES (843,2015-07-15 08:01:03.817,1,'977096171301112',3,'Scanning issues regarding the reported unknown item','ASIAN TRADER FREE TRADE',10,1)
INSERT INTO [dbo].[UnknownBarcodes] ([StoreCode],[DateInserted],[StoreTypeID],[Barcode],[UnknownBarcodeStatusID],[StatusDescription],[IssueName],[IssuePrice],[AutoReported]) VALUES (860,2015-07-15 08:01:03.817,1,'977096171301112',3,'Scanning issues regarding the reported unknown item','ASIAN TRADER FREE TRADE',10,1)
INSERT INTO [dbo].[UnknownBarcodes] ([StoreCode],[DateInserted],[StoreTypeID],[Barcode],[UnknownBarcodeStatusID],[StatusDescription],[IssueName],[IssuePrice],[AutoReported]) VALUES (864,2015-07-15 08:01:03.817,1,'977096171301112',3,'Scanning issues regarding the reported unknown item','ASIAN TRADER FREE TRADE',10,1)
INSERT INTO [dbo].[UnknownBarcodes] ([StoreCode],[DateInserted],[StoreTypeID],[Barcode],[UnknownBarcodeStatusID],[StatusDescription],[IssueName],[IssuePrice],[AutoReported]) VALUES (964,2015-07-15 08:01:03.817,1,'977096171301112',3,'Scanning issues regarding the reported unknown item','ASIAN TRADER FREE TRADE',10,1)
INSERT INTO [dbo].[UnknownBarcodes] ([StoreCode],[DateInserted],[StoreTypeID],[Barcode],[UnknownBarcodeStatusID],[StatusDescription],[IssueName],[IssuePrice],[AutoReported]) VALUES (1061,2015-07-15 08:01:03.817,1,'977096171301112',3,'Scanning issues regarding the reported unknown item','ASIAN TRADER FREE TRADE',10,1)
INSERT INTO [dbo].[UnknownBarcodes] ([StoreCode],[DateInserted],[StoreTypeID],[Barcode],[UnknownBarcodeStatusID],[StatusDescription],[IssueName],[IssuePrice],[AutoReported]) VALUES (1350,2015-07-15 08:01:03.817,1,'977096171301112',3,'Scanning issues regarding the reported unknown item','ASIAN TRADER FREE TRADE',10,1)
INSERT INTO [dbo].[UnknownBarcodes] ([StoreCode],[DateInserted],[StoreTypeID],[Barcode],[UnknownBarcodeStatusID],[StatusDescription],[IssueName],[IssuePrice],[AutoReported]) VALUES (1375,2015-07-15 08:01:03.817,1,'977096171301112',3,'Scanning issues regarding the reported unknown item','ASIAN TRADER FREE TRADE',10,1)
INSERT INTO [dbo].[UnknownBarcodes] ([StoreCode],[DateInserted],[StoreTypeID],[Barcode],[UnknownBarcodeStatusID],[StatusDescription],[IssueName],[IssuePrice],[AutoReported]) VALUES (1489,2015-07-15 08:01:03.817,1,'977096171301112',3,'Scanning issues regarding the reported unknown item','ASIAN TRADER FREE TRADE',10,1)
INSERT INTO [dbo].[UnknownBarcodes] ([StoreCode],[DateInserted],[StoreTypeID],[Barcode],[UnknownBarcodeStatusID],[StatusDescription],[IssueName],[IssuePrice],[AutoReported]) VALUES (1531,2015-07-15 08:01:03.817,1,'977096171301112',3,'Scanning issues regarding the reported unknown item','ASIAN TRADER FREE TRADE',10,1)
You can see that the MODE of the above set should be 10 even though one row has a value of 200 which skews the AVG

From: http://blogs.lessthandot.com/index.php/datamgmt/datadesign/calculating-mean-median-and-mode-with-sq/
MODE
To Calculate the mode with sql server, we first need to get the counts for each value in the set. Then, we need to filter the data so that values equal to the count are returned.
Declare #Temp Table(Id Int Identity(1,1), Data Decimal(10,5))
Insert into #Temp Values(1)
Insert into #Temp Values(2)
Insert into #Temp Values(5)
Insert into #Temp Values(5)
Insert into #Temp Values(5)
Insert into #Temp Values(6)
Insert into #Temp Values(6)
Insert into #Temp Values(6)
Insert into #Temp Values(7)
Insert into #Temp Values(9)
Insert into #Temp Values(10)
Insert into #Temp Values(NULL)
SELECT TOP 1 with ties DATA
FROM #Temp
WHERE DATA IS Not NULL
GROUP BY DATA
ORDER BY COUNT(*) DESC
In your example the functional sql could be accomplished by a subquery:
(select top 1 IssuePrice from UnknownBarcodes barx where barx.BarCode = bar.Barcode group by barx.IssuePrice order by count(*) DESC ) IssuePrice_MODE
Total query:
declare #Status int, #StoreTypeId int;
set #Status = 3;
set #StoreTypeId = 1;
SELECT
COUNT(bar.StoreCode) AS [# Stores]
,MAX(bar.IssueName) AS IssueName
,AVG(bar.IssuePrice) AS IssuePrice
,(select top 1 IssuePrice from UnknownBarcodes barx where barx.BarCode = bar.Barcode group by barx.IssuePrice order by count(*) DESC ) IssuePrice_MODE
,bar.Barcode
,Product.EAN13 AS [Matched Product Code]
,Product.Name AS [Matched Product Name]
,Product.MainCatagory AS [Product Catagory]
FROM UnknownBarcodes bar
LEFT JOIN Product on LEFT(bar.Barcode,13) = Product.EAN13
WHERE bar.UnknownBarcodeStatusID = #Status
AND LEN(bar.Barcode) >= 10
AND bar.StoreTypeID = #StoreTypeID
GROUP BY bar.Barcode, Product.EAN13, Product.Name, Product.MainCatagory
ORDER BY
CASE WHEN #status = 1 THEN
COUNT(bar.StoreCode)
WHEN #status = 2 THEN
COUNT(bar.StoreCode)
WHEN #status = 3 THEN
MAX(bar.DateInserted)
end
DESC
The query below might be a bit more efficient on large datasets since it restricts the results that have to be iterated
declare #Status int, #StoreTypeId int;
set #Status = 3;
set #StoreTypeId = 1;
with FirstQuery as (SELECT
*
FROM UnknownBarcodes bar
LEFT JOIN Product on LEFT(bar.Barcode,13) = Product.EAN13
WHERE bar.UnknownBarcodeStatusID = #Status
AND LEN(bar.Barcode) >= 10
AND bar.StoreTypeID = #StoreTypeID
)
select COUNT(StoreCode) AS [# Stores]
,MAX(IssueName) AS IssueName
,AVG(IssuePrice) AS IssuePrice
,Barcode
,(select top 1 IssuePrice from FirstQuery barx where barx.BarCode = FirstQuery.Barcode group by barx.IssuePrice order by count(*) DESC ) IssuePrice_MODE
,EAN13 AS [Matched Product Code]
,Name AS [Matched Product Name]
,MainCatagory AS [Product Catagory]
from FirstQuery
GROUP BY Barcode, EAN13, Name, MainCatagory
ORDER BY
CASE WHEN #status = 1 THEN
COUNT(StoreCode)
WHEN #status = 2 THEN
COUNT(StoreCode)
WHEN #status = 3 THEN
MAX(DateInserted)
END DESC

I have use inner query to calculate MODE.
SELECT COUNT(StoreCode) AS [# Stores]
,MAX(IssueName) AS IssueName
,AVG(IssuePrice) AS IssuePrice,
(select top 1 issuePrice from [UnknownBarcodes] where ID = UnknownBarcodes.id group by IssuePrice having IssuePrice > 1) issuePrice_MODE
,Barcode
,Product.EAN13 AS [Matched Product Code]
,Product.Name AS [Matched Product Name]
,Product.MainCatagory AS [Product Catagory]
FROM UnknownBarcodes
LEFT JOIN Product on LEFT(UnknownBarcodes.Barcode,13) = Product.EAN13
WHERE UnknownBarcodeStatusID = 3
AND LEN(Barcode) >= 10
AND StoreTypeID = 1
GROUP BY Barcode, Product.EAN13, Product.Name, Product.MainCatagory

How does this work for what you are after? No idea how well it will perform on larger datasets though:
;with cte as
(
select count(1) over (partition by b.IssueName
,p.EAN13
,p.Name
,p.MainCatagory
) as [# Stores]
,b.IssueName
,b.IssuePrice
,row_number() over (partition by b.IssueName
,p.EAN13
,p.Name
,p.MainCatagory
order by count(1) desc
) as IssuePriceSort
,b.Barcode
,p.EAN13
,p.Name
,p.MainCatagory
from UnknownBarcodes as b
left join Product as p
on left(b.Barcode,13) = p.EAN13
where UnknownBarcodeStatusID = #Status
and len(Barcode) >= 10
and StoreTypeID = #StoreTypeID
group by b.Barcode
,b.IssueName
,b.IssuePrice
,p.EAN13
,p.Name
,p.MainCatagory
)
select [# Stores]
,[IssueName]
,[IssuePrice]
,[IssuePriceSort]
,[Barcode]
,[EAN13]
,[Name]
,[MainCatagory]
from cte
where IssuePriceSort = 1

Related

Actual execution plan not updated after change in Azure SQL Database stored procedure

The actual execution plan for my Azure SQL Database stored procedure indicated the following warning:
So I went and added a persisted calculated column in dbo.Interest_rate_changes like this:
[Effective_date] AS (CONVERT([date],CONVERT([nchar](8),[Effective_date_int]),(112))) PERSISTED NOT NULL
I then replaced all references to Interest_date_int in the stored procedure with references to Interest_date.
I ran the query again but the same warning appeared in the actual execution plan, even though no reference to Effective_date_int now exists in the stored procedure.
I tried to clear the cache with DBCC FREEPROCCACHE but Azure SQL Database will not allow that.
What am I doing wrong?
In response to comments, the actual execution plan is here.
The query is this:
EXEC TEST_PopulateCachedDailyInterest 11,'2017-12-31'
The stored procedure is this (apologies I am not a pro):
CREATE PROCEDURE [dbo].[TEST_PopulateCachedDailyInterest] (#entityid int = 0, #enddate date)
AS
SET NOCOUNT ON;
TRUNCATE TABLE TEST_CachedDailyInterest;
WITH A AS
(
SELECT
MyEntity AS Entity,
MyInstrument AS Instrument,
Accrual_date AS AccrualDate,
Balance_x_Par_value_x_Effective_rate AS AccrualNumerator,
Yearfrac_reciprocal AS AccrualDenominator
FROM
(SELECT
MyEntity,
MyInstrument,
Instrument_currency,
Interest_convention,
Yearfrac_date_shift,
Calendar_date AS Accrual_date,
(SELECT SUM(Units) FROM Unit_transactions_indexed WITH (NOEXPAND) WHERE Entity = MyEntity AND Instrument = MyInstrument AND DATEADD(day,Opening_balance_date_shift,Transaction_date) < Calendar_date) * Par_value *
(SELECT TOP 1 Interest_rate FROM Interest_rate_changes WHERE Instrument = MyInstrument AND DATEADD(day,Interest_date_shift, Effective_date) <= Calendar_date ORDER BY Effective_date DESC) AS Balance_x_Par_value_x_Effective_rate
FROM
(SELECT MyEntity, MyInstrument, Min_date, Max_date, Opening_balance_date_shift, Interest_date_shift, Interest_convention, Yearfrac_date_shift, Par_value, Instrument_currency FROM
(SELECT MyEntity, MyInstrument, DATEADD(day,-3,MIN(Transaction_date)) AS Min_date, IIF(SUM(Units)<>0, #enddate, IIF(DATEADD(day,3,MAX(Transaction_date))>=#enddate, #enddate, DATEADD(day,3,MAX(Transaction_date)))) AS Max_date
FROM
(SELECT MyEntity, MyInstrument, Transaction_date, Units FROM
(SELECT Entity AS MyEntity, Instrument AS MyInstrument, Transaction_date, Units FROM Unit_transactions_indexed WITH (NOEXPAND)
WHERE Entity = #entityid AND Units <> 0 AND Units IS NOT NULL) AS A
INNER JOIN
(SELECT ID FROM Instruments WHERE Interest_type>0) B
ON A.MyInstrument = B.ID) C
GROUP BY MyEntity, MyInstrument) D
INNER JOIN Instruments ON D.MyInstrument = Instruments.ID
INNER JOIN Interest_types ON Interest_type = Interest_types.ID) AS D
CROSS JOIN Calendar_dates WHERE Calendar_date BETWEEN Min_date AND Max_date) AS F
INNER JOIN Yearfracs_reciprocal WITH (NOEXPAND) ON Balance_x_Par_value_x_Effective_rate<>0 AND
Balance_x_Par_value_x_Effective_rate IS NOT NULL AND
Yearfrac_reciprocal IS NOT NULL AND
DATEADD(day,-F.Yearfrac_date_shift,F.Accrual_date) = Yearfracs_reciprocal.Calendar_date AND
Interest_convention = Convention_ID
)
INSERT INTO TEST_CachedDailyInterest
SELECT
Entity AS EntityId,
Instrument AS InstrumentId,
AccrualDate,
AccrualNumerator,
AccrualDenominator
FROM A
WHERE ISNULL(AccrualNumerator,0)<>0
The table schema for dbo.Interest_rate_changes is this:
CREATE TABLE [dbo].[Interest_rate_changes](
[Instrument] [int] NOT NULL,
[Effective_date_int] [int] NOT NULL,
[Interest_rate] [decimal](9, 7) NOT NULL,
[Effective_date] AS (CONVERT([date],CONVERT([nchar](8),[Effective_date_int]),(112))) PERSISTED NOT NULL,
CONSTRAINT [PK_Instrument_Date] PRIMARY KEY CLUSTERED
(
[Instrument] ASC,
[Effective_date] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
)

How to speedUp Random selecting in SQL Server

I have a table for phone numbers like this :
ID PhoneNumber Enabled GrupID CountryID
----------- -------------------- ------- ------ -----------
10444 ***001000999 1 NULL 1
10445 ***001000998 1 NULL 1
10446 ***001000994 1 NULL 1
10447 ***001000990 1 NULL 1
10448 ***001000989 1 NULL 1
This table has 68992507 rows.
I want to select some random phone number from it.
I can get my random number query by this stored procedure:
here I select random numbers, insert to a #table and then update the selected numbers .
CREATE proc [dbo].[Mysp_GetRandom]
#countryid int,
#count int
as
declare #tbl table([ID] [int] ,
[PhoneNumber] [nchar](20) NOT NULL,
[Enabled] [bit] NULL,
[GrupID] [tinyint] NULL,
[CountryID] [int] NULL)
INSERT INTO #tbl
SELECT TOP (#count) *
FROM tblPhoneNumber
WHERE CountryID = #countryid
AND GrupID is null
ORDER BY binary_checksum(ID * rand())
UPDATE tblPhoneNumber
SET GrupID = 1
WHERE ID IN (SELECT ID FROM #tbl)
SELECT * FROM #tbl
The problem is that it takes a long time for the query to run. For example this query takes 12:30 minutes ...
DECLARE #return_value int
EXEC #return_value = [dbo].[Mysp_GetRandom]
#countryid = 14, #count = 3
SELECT 'Return Value' = #return_value
and I have an ndex on this table :
CREATE NONCLUSTERED INDEX [NonClusteredIndex-20150415-172433]
ON [dbo].[tblPhoneNumber] ([CountryID] ASC)
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF,
SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF,
ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
Execution plan is as below :
Thanks ...
Add grupID to index key column and add other required columns in include clause of your NC index NonClusteredIndex-20150415-172433.
Execution plan is already giving you the same hint on adding missing index.
P.S Mark it as answer if it helped you.
Look at your query plan, the first INSERT statement takes almost 100% of the time, and 70% of it is sorting. There's not much you can do with it since you already using BINARY_CHECKSUM. May be the table was filled in random enough manner to get off with taking consecutive rows starting from random offset, like this:
SELECT ID FROM tblPhoneNumber WHERE CountryID = #countryid
AND GrupID is null ORDER BY ID OFFSET CONVERT(int,
rand()*(select count(*) from tblPhoneNumber)-#count-1)
ROWS FETCH NEXT #count ROWS ONLY
You should replace your order by clause.
You could create fairly random ids:
declare #count int = 100
; with ids(id, hex) as (
Select 1, convert(bigint, convert(varbinary, '0x'+right(newid(), 6), 1 ))
Union all
Select id+1, convert(bigint, convert(varbinary, '0x'+right(newid(), 6), 1 ))
From ids
Where id+1 <= #count
)
Select * from ids
Option (MAXRECURSION 0)
Then you can join it table with your table on ID.
You should review your indexes (mentionned by others) and add an index on phone Id.

How to DISPLAY horizontal data into vertical columns in SQL?

I have tables Employees, Documents, Transactions, from_to table.
Employee can send a document to other employee and send a copy of the document to other employee.
data will be displayed in transaction table as follow:
TransId - Document Id - EmployeeId - from_toId
1 1 5 1 (From)
2 1 6 2 (To)
3 1 10 2 (CC)
Now; I want the data above to be displayed as follow:
DocId From To CC
1 Jo(5) Fo(6) Do(10)
I know that we need something to do with "Pivot Table". BUT I DON'T KNOW HOW.
Waiting for your feedback.
I have a solution with a Cursor which is dynamic:
CREATE TABLE #PRERESULT(
[TransId] int,
DocumentID int ,
EmployeeId int ,
from_toId [nvarchar](10)
)
INSERT INTO #PRERESULT ([TransId],DocumentID,EmployeeId,from_toId)
VALUES
(1,1,5,'1 (From)'),
(2,1,6,'2 (To)'),
(3,1,10,'2 (CC)')
CREATE TABLE #RESULT (
DocID int,
[From] nvarchar(15),
[To] nvarchar(15),
CC nvarchar(15))
INSERT INTO #RESULT (DocID)
SELECT DocumentID
FROM #PRERESULT
GROUP BY DocumentID
DECLARE #Documentid int,#Employee int, #Alias nvarchar(10),#SQL nvarchar(250)
DECLARE C_FromTo CURSOR
FOR
SELECT DocumentID,EmployeeID
FROM #PRERESULT
OPEN C_FromTo
FETCH NEXT FROM C_FromTo INTO #Documentid, #Employee
While (##Fetch_status = 0)
BEGIN
SET #Alias = (SELECT SUBSTRING(from_toId,PATINDEX('%(%',from_toId)+1,(LEN(from_toId)-PATINDEX('%(%',from_toId)-1)) FROM #PRERESULT WHERE #Employee = EmployeeId)
SET #SQL = 'UPDATE #RESULT
SET ['+#Alias+'] = '+Convert(nvarchar(50),#Employee)+'
WHERE '+Convert(nvarchar(50),#Documentid)+' = DocID'
EXEC (#SQL)
FETCH NEXT FROM C_FromTo INTO #Documentid, #Employee
END
CLOSE C_FromTo
DEALLOCATE C_FromTO
SELECT * FROM #RESULT
DROP TABLE #PRERESULT
DROP TABLE #RESULT
Gives you this:
DocID |From |To |CC
1 |5 |6 |10
Hope this will help you
Have a nice day & Greets from Switzerland
Etienne
I think this reflects your table, although I am using names instead of IDs for the last two columns:
CREATE TABLE [dbo].[Transaction](
[TransId] [int] NOT NULL,
[DocId] [int] NOT NULL,
[EmpId] [nvarchar](10) NOT NULL,
[FromToId] [nchar](10) NOT NULL
) ON [PRIMARY]
INSERT INTO [Transaction] ([TransId],[DocId],[EmpId],[FromToId])VALUES(1,1,'Jo','From')
INSERT INTO [Transaction] ([TransId],[DocId],[EmpId],[FromToId])VALUES(2,1,'Fo','To')
INSERT INTO [Transaction] ([TransId],[DocId],[EmpId],[FromToId])VALUES(3,1,'Do','CC')
INSERT INTO [Transaction] ([TransId],[DocId],[EmpId],[FromToId])VALUES(4,2,'Jo','From')
INSERT INTO [Transaction] ([TransId],[DocId],[EmpId],[FromToId])VALUES(5,2,'Bo','To')
INSERT INTO [Transaction] ([TransId],[DocId],[EmpId],[FromToId])VALUES(6,2,'Zo','CC')
INSERT INTO [Transaction] ([TransId],[DocId],[EmpId],[FromToId])VALUES(7,3,'Bo','From')
INSERT INTO [Transaction] ([TransId],[DocId],[EmpId],[FromToId])VALUES(8,3,'Go','To')
Then this query will give you the results you requested:
SELECT DISTINCT
t.DocId
,x.[From]
,y.[To]
,z.Cc
FROM [Transaction] t
LEFT JOIN
(
SELECT DocId
,CASE WHEN FromToId = 'From' THEN EmpId END AS [From]
FROM [Transaction]
WHERE CASE WHEN FromToId = 'From' THEN EmpId END IS NOT NULL
) x ON t.DocId = x.DocId
LEFT JOIN
(
SELECT DocId
,CASE WHEN FromToId = 'To' THEN EmpId END AS [To]
FROM [Transaction]
WHERE CASE WHEN FromToId = 'To' THEN EmpId END IS NOT NULL
) y ON t.DocId = y.DocId
LEFT JOIN
(
SELECT DocId
,CASE WHEN FromToId = 'CC' THEN EmpId END AS [Cc]
FROM [Transaction]
WHERE CASE WHEN FromToId = 'Cc' THEN EmpId END IS NOT NULL
) z ON t.DocId = z.DocId
DocId From To Cc
1 Jo Fo Do
2 Jo Bo Zo
3 Bo Go NULL

Need to speed up SQL Server SP that uses system metadata

Let me apologize in advance for the length of this question. I don't see how to ask it without giving all the definitions.
I've inherited a SQL Server 2005 database that includes a homegrown implementation of change tracking. Through triggers, changes to virtually every field in the database are stored in a set of three tables. In the application for this database, the user can request the history of various items, and what's returned is not just changes to the item itself, but also changes in related tables. The problem is that in some cases, it's painfully slow, and in some cases, the request eventually crashes the application. The client has also reported other users having problems when someone requests history.
The tables that store the change data are as follows:
CREATE TABLE [dbo].[tblSYSChangeHistory](
[id] [bigint] IDENTITY(1,1) NOT NULL,
[date] [datetime] NULL,
[obj_id] [int] NULL,
[uid] [varchar](50) NULL
This table tracks the tables that have been changed. Obj_id is the value that Object_ID() returns.
CREATE TABLE [dbo].[tblSYSChangeHistory_Items](
[id] [bigint] IDENTITY(1,1) NOT NULL,
[h_id] [bigint] NOT NULL,
[item_id] [int] NULL,
[action] [tinyint] NULL
This table tracks the items that have been changed. h_id is a foreign key to tblSYSChangeHistory. item_id is the PK of the changed item in the specified table. action indicates insert, delete or change.
CREATE TABLE [dbo].[tblSYSChangeHistory_Details](
[id] [bigint] IDENTITY(1,1) NOT NULL,
[i_id] [bigint] NOT NULL,
[col_id] [int] NOT NULL,
[prev_val] [varchar](max) NULL,
[new_val] [varchar](max) NULL
This table tracks the individual changes. i_id is a foreign key to tblSYSChangeHistory_Items. col_id indicates which column was changed, and prev_val and new_val indicate the original and new values for that field.
There's actually a fourth table that supports this architecture. tblSYSChangeHistory_Objects maps plain English descriptions of operations to particular tables in the database.
The code to look up the history for an item is incredibly convoluted. It's one branch of a very long SP. Relevant parameters are as follows:
#action varchar(50),
#obj_id bigint = 0,
#uid varchar(50) = '',
#prev_val varchar(MAX) = '',
#new_val varchar(MAX) = '',
#start_date datetime = '',
#end_date datetime = ''
I'm storing them to local variables right away (because I was able to significantly speed up another SP by doing so):
declare #iObj_id bigint,
#cUID varchar(50),
#cPrev_val varchar(max),
#cNew_val varchar(max),
#tStart_date datetime,
#tEnd_date datetime
set #iObj_id = #obj_id
set #cUID = #uid
set #cPrev_val = #prev_val
set #cNew_val = #new_val
set #tStart_date = #start_date
set #tEnd_date = #end_date
And here's the code from that branch of the SP:
create table #r (obj_id int, item_id int, l tinyint)
create clustered index #ri on #r (obj_id, item_id)
insert into #r
select object_id(obj_name), #iObj_id, 0
from dbo.tblSYSChangeHistory_Objects
where obj_type = 'U' and descr = cast(#cPrev_val AS varchar(150))
declare #i tinyint, #cnt int
set #i = 1
while #i <= 4
begin
insert into #r
select obj_id, item_id, #i
from dbo.vSYSChangeHistoryFK a with (nolock)
where exists (select null from #r where obj_id = a.rel_obj_id and item_id = a.rel_item_id and l = #i - 1)
and not exists (select null from #r where obj_id = a.obj_id and item_id = a.item_id)
set #cnt = ##rowcount
insert into #r
select rel_obj_id, rel_item_id, #i
from dbo.vSYSChangeHistoryFK a with (nolock)
where object_name(obj_id) not in (<this is a list of particular tables in the database>)
and exists (select null from #r where obj_id = a.obj_id and item_id = a.item_id and l between #i - 1 and #i)
and not exists (select null from #r where obj_id = a.rel_obj_id and item_id = a.rel_item_id)
set #i = case #cnt + ##rowcount when 0 then 100 else #i + 1 end
end
select date, obj_name, item, [uid], [action],
pkey, item_id, id, key_obj_id into #tCH_R
from dbo.vSYSChangeHistory a with (nolock)
where exists (select null from #r where obj_id = a.obj_id and item_id = a.item_id)
and (#cUID = '' or uid = #cUID)
and (#cNew_val = '' or [action] = #cNew_val)
declare ch_item_cursor cursor for
select distinct pkey, key_obj_id, item_id
from #tCH_R
where item = '' and pkey <> ''
open ch_item_cursor
fetch next from ch_item_cursor
into #cPrev_val, #iObj_id, #iCol_id
while ##fetch_status = 0
begin
set #SQLStr = 'select #val = ' + #cPrev_val +
' from ' + object_name(#iObj_id) + ' with (nolock)' +
' where id = #id'
exec sp_executesql #SQLStr,
N'#val varchar(max) output, #id int',
#cNew_val output, #iCol_id
update #tCH_R
set item = #cNew_val
where key_obj_id = #iObj_id
and item_id = #iCol_id
fetch next from ch_item_cursor
into #cPrev_val, #iObj_id, #iCol_id
end
close ch_item_cursor
deallocate ch_item_cursor
select date, obj_name,
cast(item AS varchar(254)) AS item,
uid, [action],
cast(id AS int) AS id
from #tCH_R
order by id
return
As you can see, the code uses a view. Here's that definition:
ALTER VIEW [dbo].[vSYSChangeHistoryFK]
AS
SELECT i.obj_id, i.item_id, c1.parent_object_id AS rel_obj_id, i2.item_id AS rel_item_id
FROM dbo.vSYSChangeHistoryItemsD AS i INNER JOIN
sys.foreign_key_columns AS c1 ON c1.referenced_object_id = i.obj_id AND c1.constraint_column_id = 1 INNER JOIN
dbo.vSYSChangeHistoryItemsD AS i2 ON c1.parent_object_id = i2.obj_id INNER JOIN
dbo.tblSYSChangeHistory_Details AS d1 ON d1.i_id = i.min_id AND d1.col_id = c1.referenced_column_id INNER JOIN
dbo.tblSYSChangeHistory_Details AS d1k ON d1k.i_id = i2.min_id AND d1k.col_id = c1.parent_column_id AND ISNULL(d1.new_val,
ISNULL(d1.prev_val, '')) = ISNULL(d1k.new_val, ISNULL(d1k.prev_val, '')) --LEFT OUTER JOIN
UNION ALL
SELECT i0.obj_id, i0.item_id, c01.parent_object_id AS rel_obj_id, i02.item_id AS rel_item_id
FROM dbo.vSYSChangeHistoryItemsD AS i0 INNER JOIN
sys.foreign_key_columns AS c01 ON c01.referenced_object_id = i0.obj_id AND c01.constraint_column_id = 1 AND col_name(c01.referenced_object_id,
c01.referenced_column_id) = 'ID' INNER JOIN
dbo.vSYSChangeHistoryItemsD AS i02 ON c01.parent_object_id = i02.obj_id INNER JOIN
dbo.tblSYSChangeHistory_Details AS d01k ON i02.min_id = d01k.i_id AND d01k.col_id = c01.parent_column_id AND ISNULL(d01k.new_val,
d01k.prev_val) = CAST(i0.item_id AS varchar(max))
And finally, that view uses one more view:
ALTER VIEW [dbo].[vSYSChangeHistoryItemsD]
AS
SELECT h.obj_id, m.item_id, MIN(m.id) AS min_id
FROM dbo.tblSYSChangeHistory AS h INNER JOIN
dbo.tblSYSChangeHistory_Items AS m ON h.id = m.h_id
GROUP BY h.obj_id, m.item_id
Working with the Profiler, it appears that view vSYSChangeHistoryFK is the big culprit, and my testing suggests that the particular problem is in the join between the two copies of vSYSChangeHistoryItemsD and the foreign_key_columns table.
I'm looking for any ideas on how to give acceptable performance here. The client reports sometimes waiting as much as 15 minutes without getting results. I've tested up to nearly 10 minutes with no result in at least one case.
If there were new language elements in 2008 or later that would solve this, I think the client would be willing to upgrade.
Thanks.
Wow that's a mess. Your big gain should be in removing the cursor. I see 'where exists' - that's nice and efficient b/c as soon as it finds one match it aborts. And I see 'where not exists' - by definition that has to scan everything. Is it finding the top 4? You can do better with using ROW_NUMBER() OVER (PARTITON BY [whatever makes it unique] ORDER BY [whatever your id is]. It's hard to tell. select object_id(obj_name), #iObj_id, 0 makes it seem like only the #i=1 loop actually does anything (?)
If that is what it's doing, you could write it as
SELECT * from
(
select ROW_NUMBER() OVER (PARTITION BY obj_id ORDER BY item_id desc) as Row,
obj_id, item_id
FROM bo.vSYSChangeHistoryFK a with (nolock)
where obj_type = 'U' and descr = cast(#cPrev_val AS varchar(150))
) paged
where Row between 1 and 4
ORDER BY Row
A DBA level change that could help would be to set up a partitioning scheme based on date. Roll over to a new partition every so often. Put the old partitions on different disks. Most queries may only need to hit the recent partition, which will be say 1/5th the size that it used to be, making it much faster without changing anything else.
Not a full answer, sorry. That mess would take hours to parse

SQL Server 2008 Running trigger after Insert, Update locks original table

I have a serious performance problem.
I have a database with (related to this problem), 2 tables.
1 Table contains strings with some global information. The second table contains the string stripped down to each individual word. So the string is like indexed in the second table, word by word.
The validity of the data in the second table is of less important then the validity of the data in the first table.
Since the first table can grow like towards 1*10^6 records and the second table having an average of like 10 words for 1 string can grow like 1*10^7 records, i use a nolock in order to read the second this leaves me free for inserting new records without locking it (Expect many reads on both tables).
I have a script which keeps on adding and updating rows to the first table in a MERGE statement. On average, the data beeing merged are like 20 strings a time and the scripts runs like ones every 5 seconds.
On the first table, i have a trigger which is beeing invoked on a Insert or Update, which takes the newly inserted or updated data and calls a stored procedure on it which makes sure the data is indexed in the second table. (This takes some significant time).
The problem is that when having the trigger disbaled, Reading the first table happens in a few ms. However, when enabling the trigger and your in bad luck of trying to read the first table while this is beeing updated, Our webserver gives you a timeout after 10 seconds (which is way to long anyways).
I can quess from this part that when running the trigger, the first table is kept (partially) in a lock untill the trigger is completed.
What do you think, if i'm right, is there a easy way around this?
Thanks in advance!
As requested:
ALTER TRIGGER [dbo].[OnFeedItemsChanged]
ON [dbo].[FeedItems]
AFTER INSERT,UPDATE
AS
BEGIN
-- SET NOCOUNT ON added to prevent extra result sets from
-- interfering with SELECT statements.
SET NOCOUNT ON;
DECLARE #id int;
SELECT #id = ID FROM INSERTED;
IF #id IS NOT NULL
BEGIN
DECLARE #title nvarchar(MAX);
SELECT #title = Title FROM INSERTED;
DECLARE #description nvarchar(MAX);
SELECT #description = [Description] FROM INSERTED;
SELECT #title = dbo.RemoveNonAlphaCharacters(#title)
SELECT #description = dbo.RemoveNonAlphaCharacters(#description)
-- Insert statements for trigger here
EXEC dbo.usp_index_itemstring #id, #title;
EXEC dbo.usp_index_itemstring #id, #description;
END
END
The FeedItems table is populated by this query:
MERGE INTO FeedItems i
USING #newitems d ON i.Service = d.Service AND i.GUID = d.GUID
WHEN matched THEN UPDATE
SET i.Title = d.Title,
i.Description = d.Description,
i.Uri = d.Uri,
i.Readers = d.Readers
WHEN NOT matched THEN INSERT
(Service, Title, Uri, GUID, Description, Readers)
VALUES
(d.Service, d.Title, d.Uri, d.GUID, d.Description, d.Readers);
The sproc: IndexItemStrings is populating the second table, executing this proc does indeed take his time. The problem is that while executing this trigger. Queries applied to the FeedItems table are mostly timing out (even those queries who dont uses the second table)
First table:
USE [ICI]
GO
/****** Object: Table [dbo].[FeedItems] Script Date: 04/09/2010 15:03:31 ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE TABLE [dbo].[FeedItems](
[ID] [int] IDENTITY(1,1) NOT NULL,
[Service] [int] NOT NULL,
[Title] [nvarchar](max) NULL,
[Uri] [nvarchar](max) NULL,
[Description] [nvarchar](max) NULL,
[GUID] [nvarchar](255) NULL,
[Inserted] [smalldatetime] NOT NULL,
[Readers] [int] NOT NULL,
CONSTRAINT [PK_FeedItems] PRIMARY KEY CLUSTERED
(
[ID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
GO
ALTER TABLE [dbo].[FeedItems] WITH CHECK ADD CONSTRAINT [FK_FeedItems_FeedServices] FOREIGN KEY([Service])
REFERENCES [dbo].[FeedServices] ([ID])
ON DELETE CASCADE
GO
ALTER TABLE [dbo].[FeedItems] CHECK CONSTRAINT [FK_FeedItems_FeedServices]
GO
ALTER TABLE [dbo].[FeedItems] ADD CONSTRAINT [DF_FeedItems_Inserted] DEFAULT (getdate()) FOR [Inserted]
GO
Second table:
USE [ICI]
GO
/****** Object: Table [dbo].[FeedItemPhrases] Script Date: 04/09/2010 15:04:47 ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE TABLE [dbo].[FeedItemPhrases](
[FeedItem] [int] NOT NULL,
[Phrase] [int] NOT NULL,
[Count] [smallint] NOT NULL,
CONSTRAINT [PK_FeedItemPhrases] PRIMARY KEY CLUSTERED
(
[FeedItem] ASC,
[Phrase] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
GO
ALTER TABLE [dbo].[FeedItemPhrases] WITH CHECK ADD CONSTRAINT [FK_FeedItemPhrases_FeedItems] FOREIGN KEY([FeedItem])
REFERENCES [dbo].[FeedItems] ([ID])
ON UPDATE CASCADE
ON DELETE CASCADE
GO
ALTER TABLE [dbo].[FeedItemPhrases] CHECK CONSTRAINT [FK_FeedItemPhrases_FeedItems]
GO
ALTER TABLE [dbo].[FeedItemPhrases] WITH CHECK ADD CONSTRAINT [FK_FeedItemPhrases_Phrases] FOREIGN KEY([Phrase])
REFERENCES [dbo].[Phrases] ([ID])
ON UPDATE CASCADE
ON DELETE CASCADE
GO
ALTER TABLE [dbo].[FeedItemPhrases] CHECK CONSTRAINT [FK_FeedItemPhrases_Phrases]
GO
And more:
ALTER PROCEDURE [dbo].[usp_index_itemstring]
-- Add the parameters for the stored procedure here
#item int,
#text nvarchar(MAX)
AS
BEGIN
-- SET NOCOUNT ON added to prevent extra result sets from
-- interfering with SELECT statements.
SET NOCOUNT ON;
-- DECLARE a table containing all words within the text
DECLARE #tempPhrases TABLE
(
[Index] int,
[Phrase] NVARCHAR(256)
);
-- extract each word from text and store it in the temp table
WITH Pieces(pn, start, [stop]) AS
(
SELECT 1, 1, CHARINDEX(' ', #text)
UNION ALL
SELECT pn + 1, CAST([stop] + 1 AS INT), CHARINDEX(' ', #text, [stop] + 1)
FROM Pieces
WHERE [stop] > 0
)
INSERT INTO #tempPhrases
SELECT pn, SUBSTRING(#text, start, CASE WHEN [stop] > 0 THEN [stop]-start ELSE LEN(#text) END) AS s
FROM Pieces
OPTION (MAXRECURSION 0);
WITH CombinedPhrases ([Phrase]) AS
(
-- SELECT ALL 2-WORD COMBINATIONS
SELECT w1.[Phrase] + ' ' + w2.[Phrase]
FROM #tempPhrases w1
JOIN #tempPhrases w2 ON w1.[Index] + 1 = w2.[Index]
UNION ALL -- SELECT ALL 3-WORD COMBINATIONS
SELECT w1.[Phrase] + ' ' + w2.[Phrase] + ' ' + w3.[Phrase]
FROM #tempPhrases w1
JOIN #tempPhrases w2 ON w1.[Index] + 1 = w2.[Index]
JOIN #tempPhrases w3 ON w1.[Index] + 2 = w3.[Index]
UNION ALL -- SELECT ALL 4-WORD COMBINATIONS
SELECT w1.[Phrase] + ' ' + w2.[Phrase] + ' ' + w3.[Phrase] + ' ' + w4.[Phrase]
FROM #tempPhrases w1
JOIN #tempPhrases w2 ON w1.[Index] + 1 = w2.[Index]
JOIN #tempPhrases w3 ON w1.[Index] + 2 = w3.[Index]
JOIN #tempPhrases w4 ON w1.[Index] + 3 = w4.[Index]
)
-- ONLY INSERT THE NEW PHRASES IN THE Phrase TABLE
INSERT INTO #tempPhrases
SELECT 0, [Phrase] FROM CombinedPhrases
-- DELETE PHRASES WHICH ARE EXCLUDED
DELETE FROM #tempPhrases
WHERE [Phrase] IN
(
SELECT [Text] FROM Phrases p
JOIN ExcludedPhrases ex
ON ex.ID = p.ID
);
MERGE INTO Phrases p
USING
(
SELECT DISTINCT Phrase FROM #tempPhrases
) t
ON p.[Text] = t.Phrase
WHEN NOT MATCHED THEN
INSERT VALUES (t.Phrase);
-- Finally create relations between the phrases and feeditem,
MERGE INTO FeedItemPhrases p
USING
(
SELECT #item as [Item], MIN(p.[ID]) as Phrase, COUNT(t.[Phrase]) as [Count]
FROM Phrases p WITH (NOLOCK)
JOIN #tempPhrases t ON p.[Text] = t.[Phrase]
GROUP BY t.[Phrase]
) t
ON p.FeedItem = t.Item
AND p.Phrase = t.Phrase
WHEN MATCHED THEN
UPDATE SET p.[Count] = t.[Count]
WHEN NOT MATCHED THEN
INSERT VALUES (t.[Item], t.Phrase, t.[Count]);
END
and more:
ALTER Function [dbo].[RemoveNonAlphaCharacters](#Temp NVarChar(max))
Returns NVarChar(max)
AS
Begin
SELECT #Temp = REPLACE (#Temp, '%20', ' ');
While PatIndex('%[^a-z ]%', #Temp) > 0
Set #Temp = Stuff(#Temp, PatIndex('%[^a-z ]%', #Temp), 1, '')
Return #TEmp
End
I looked around on the internet, and I couldn't find any way of making the trigger happen without claiming a lock. Therefore I choose to do the inserts via a stored procedure, which in turn performs the logic previously found in the trigger. This allowed me to execute the content of the trigger in a transaction AFTER the actual data was inserted and the insertion lock was lifted.
Hope this helps!

Resources