Selective XML index query plan - sql-server

I am currently exploring the SQL server XML column and Selective indexes for our needs. For doing so I created table called Incidents and created the Selective Index and Secondary selective Indexes (scripts below).
When I run the following query it does use the selective index but the query plan does the IS NOT NULL predicate on Severity column data and then the sort on it. This degrades the performance of the query significantly when the data in table is large. I have seen with 4 million rows in table it takes ~20 sec to complete following query.
Am I missing anything here?
select TOP 100 Data.value('(/Incident/Severity)[1]', 'int') AS Severity,
Data.value('(/Incident/OwningTenantId)[1]', 'VARCHAR(800)') AS OwningTenantId,
Data.value('(/Incident/OwningTeamId)[1]', 'NVARCHAR(800)') AS OwningTeamId
FROM Incidents
WHERE Data.value('(/Incident/Severity)[1]', 'int') = 1
ORDER BY Data.value('(/Incident/OwningTenantId)[1]', 'NVARCHAR(800)')
Index:
CREATE TABLE [dbo].[Incidents](
[id] [uniqueidentifier] NOT NULL,
[Data] [xml] NOT NULL,
CONSTRAINT [PK_Incidents] PRIMARY KEY CLUSTERED
(
[id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
GO
CREATE SELECTIVE XML INDEX sxi_Incident_Data ON Incidents(Data)
FOR
(
Severity = '/Incident/Severity' AS SQL int SINGLETON,
OwningTeamId = '/Incident/OwningTeamId' AS SQL NVARCHAR(400) SINGLETON,
OwningTenantId = '/Incident/OwningTenantId' AS SQL NVARCHAR(400) SINGLETON,
id = '/Incident/_id' AS SQL BIGINT SINGLETON
)
GO
create xml index sxi_secondary_severity on Incidents(Data)
using xml index sxi_Incident_Data
for (Severity);
GO
create xml index sxi_secondary_OwningTeamId on Incidents(Data)
using xml index sxi_Incident_Data
for (OwningTeamId);
GO
create xml index sxi_secondary_OwningTenantId on Incidents(Data)
using xml index sxi_Incident_Data
for (OwningTenantId);
GO
create xml index sxi_secondary_Id on Incidents(Data)
using xml index sxi_Incident_Data
for (id);
GO
Sample XML:
<Incident>
<_id>123</_id>
<Severity>3</Severity>
<IncidentStatus>RESOLVED</IncidentStatus>
<CreateDate>2014-05-04 05:43:58.317</CreateDate>
<LastUpdateDate>2014-05-06 18:47:39.037</LastUpdateDate>
<AlertSourceLocalId>20070</AlertSourceLocalId>
<SourceIncidentId>35d0bfe4-ccb9-491f-a30c-ea7685ffe8c0</SourceIncidentId>
<SourceCreateDate>2014-05-04 02:51:14.000</SourceCreateDate>
<SourceCreatedBy>Someone</SourceCreatedBy>
<SourceModifiedDate>2014-05-04 05:43:57.797</SourceModifiedDate>
<SourceOrigin>Some Origin</SourceOrigin>
<CorrelationId>correlatioid</CorrelationId>
<RoutingId>Route123</RoutingId>
<Datacenter>Unknown</Datacenter>
<Environment>INT</Environment>
<DeviceGroup>Devicegroup</DeviceGroup>
<DeviceName>DeviceName</DeviceName>
<RaisingEnvironment>PROD</RaisingEnvironment>
<RaisingDatacenter>Unknown</RaisingDatacenter>
<RaisingDeviceGroup>DEviceGroup</RaisingDeviceGroup>
<RaisingDeviceName>FakeDevice</RaisingDeviceName>
<PrimaryIncidentId>1234</PrimaryIncidentId>
<RelatedLinksCount>0</RelatedLinksCount>
<ExternalLinksCount>0</ExternalLinksCount>
<HitCount>0</HitCount>
<ChildCount>0</ChildCount>
<Title>Some Title</Title>
<ReproSteps></ReproSteps>
<OwningTenantId>564</OwningTenantId>
<OwningTeamId>123</OwningTeamId>
<ResolveDate>2014-05-06 18:47:39.037</ResolveDate>
<ResolvedBy>SomeOne</ResolvedBy>
<MitigateDate>2014-05-06 18:45:55.403</MitigateDate>
<MitigatedBy>Someone</MitigatedBy>
<Mitigation>N/A</Mitigation>
<IsNoise>0</IsNoise>
<IsSecurityRisk>0</IsSecurityRisk>
<IsCustomerImpacting>0</IsCustomerImpacting>
<OriginatingTenantId>10066</OriginatingTenantId>
<ImpactStartDate>2014-05-01 23:31:22.000</ImpactStartDate>
<RootCauseNeedsInvestigation>0</RootCauseNeedsInvestigation>
<ConnectorTenantId>10066</ConnectorTenantId>
<RelationshipId>1852546</RelationshipId>
<SuppressAutoUpdate>0</SuppressAutoUpdate>
</Incident>
Repro:
Create Table indices
-- Create Table
IF(EXISTS(SELECT * FROM sys.tables WHERE [Name] = 'XmlTable' AND [Type] = 'U'))
BEGIN
DROP TABLE XmlTable
END
CREATE TABLE [dbo].[XmlTable](
[id] [uniqueidentifier] NOT NULL,
[Data] [xml] NULL
PRIMARY KEY CLUSTERED
(
[id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
GO
-- Populate Data
DECLARE #i INT = 0
DECLARE #XML NVARCHAR(MAX),
#Severity INT,
#OwningTeamId VARCHAR(400),
#OwningTenantId VARCHAR(400),
#IncidentStatus varchar(100),
#Mod SMALLINT
WHILE #i < 500
BEGIN
SET #i = #i + 1
SET #Mod = #i % 3
SELECT #Severity = #Mod + 1,
#OwningTeamId = 'OwningTeam' + CAST(#Mod AS VARCHAR),
#OwningTenantId = 'OwningTenantId' + CAST(#Mod AS VARCHAR),
#IncidentStatus = CASE #Mod
WHEN 0 THEN 'Active'
WHEN 1 THEN 'Resolved'
WHEN 2 THEN 'Closed'
END
SET #XML =
'<Incident>' +
'<_id>' + CAST(#i AS VARCHAR) + '</_id>' +
'<Severity>' + CAST(#Severity AS VARCHAR) + '</Severity>' +
'<OwningTeamId>' + #OwningTeamId + '</OwningTeamId>' +
'<OwningTenantId>' + #OwningTenantId + '</OwningTenantId>' +
'<IncidentStatus>' + #IncidentStatus + '</IncidentStatus>' +
'</Incident>'
INSERT INTO XmlTable
SELECT NEWID(), #XML
END
-- Creat Indices
CREATE SELECTIVE XML INDEX [sxi_Data] ON [dbo].[XmlTable]
(
[Data]
)
FOR
(
[Severity] = '/Incident/Severity' as SQL [int] SINGLETON ,
[OwningTeamId] = '/Incident/OwningTeamId' as SQL [nvarchar](400) SINGLETON ,
[OwningTenantId] = '/Incident/OwningTenantId' as SQL [nvarchar](400) SINGLETON ,
[id] = '/Incident/_id' as SQL [bigint] SINGLETON ,
[TicketStatus] = '/Incident/IncidentStatus' as SQL [nvarchar](100) SINGLETON
)
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
GO
CREATE XML INDEX [sxi_secondary_Id] ON [dbo].[XmlTable]
(
[Data]
)USING XML INDEX [sxi_Data] FOR (
[id]
)
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
GO
CREATE XML INDEX [sxi_secondary_OwningTeamId] ON [dbo].[XmlTable]
(
[Data]
)USING XML INDEX [sxi_Data] FOR (
[OwningTeamId]
)
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
GO
USE [XMLDocuemntStore]
GO
CREATE XML INDEX [sxi_secondary_OwningTenantId] ON [dbo].[XmlTable]
(
[Data]
)USING XML INDEX [sxi_Data] FOR (
[OwningTenantId]
)
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
GO
USE [XMLDocuemntStore]
GO
CREATE XML INDEX [sxi_secondary_severity] ON [dbo].[XmlTable]
(
[Data]
)USING XML INDEX [sxi_Data] FOR (
[Severity]
)
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
GO
Sample Query: Check the query plan on right sides.
select TOP 100 Data.value('(/Incident/Severity)[1]', 'int') AS Severity
FROM XmlTable
WHERE Data.value('(/Incident/Severity)[1]', 'int') = 1
ORDER BY Data.value('(/Incident/OwningTenantId)[1]', 'NVARCHAR(800)')

The SORT TOP N is needed because of the [1] in your XPATH query. To get rid of that you'll need to ensure SQL Server the required xml element only occurs once within an incident element. For that you'll need to strongly type your XML using an XSD document. You can create one like so:
CREATE XML SCHEMA COLLECTION Incident_XSD AS
N'<?xml version="1.0" encoding="UTF-16"?>
<xs:schema attributeFormDefault="unqualified" elementFormDefault="qualified" xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:element name="Incident">
<xs:complexType>
<xs:sequence>
<xs:element type="xs:int" name="_id" />
<xs:element type="xs:int" name="Severity" />
<xs:element type="xs:string" name="OwningTeamId" />
<xs:element type="xs:string" name="OwningTenantId" />
<xs:element type="xs:string" name="IncidentStatus"/>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:schema>' ;
GO
Use it in your table definition like so
[Data] [xml](Incident_XSD) NULL
Now the following query is valid
select TOP 100 Data.value('/Incident[1]/Severity', 'int') AS Severity
FROM XmlTable
WHERE Data.value('/Incident[1]/Severity', 'int') = 1
ORDER BY Data.value('/Incident[1]/OwningTenantId', 'NVARCHAR(800)')
Returns within a second or 2 with a milion rows in the table.
PS: You might want to reconsider using GUIDs as primary key

Related

How to fix, The operating system returned the error '0xc0000033(Object Name invalid.)', attempting to insert image data in FILESTREAM database

I've created a database with a FILESTREAM file group along with a table to store images. When trying to insert data...
USE Images
GO
INSERT INTO ReceivedImages (ReceiveID, ImageData, URowID)
VALUES (23, CAST('My image data' AS varbinary(max)), NEWID())
GO
I receive the following error message:
The operating system returned the error '0xc0000033(Object Name invalid.)' while attempting 'NtCreateFile' on 'E:\Microsoft SQL Server\MSSQL15.MSSQLSERVER\MSSQL\DATA\TPImages\fdfbf99c-3b4b-4a03-87ae-15b3086639c8\345191cc-a1d2-4db3-8e69-5da238c7e641\00000025-00000302-0003'.
If you know how this issue can be resolved, please post your answer. Thank you!
My dev environment: Windows Server 2016 Standard vm, 16Gb RAM, 64-bit. SqlServer 2019 (15.0.2080).
Scripts used to create the database and the table follow...
USE [master]
GO
-- Get the SQL Server data path.
DECLARE #data_path nvarchar(256);
SET #data_path = (SELECT SUBSTRING(physical_name, 1, CHARINDEX(N'master.mdf', LOWER(physical_name)) - 1)
FROM master.sys.master_files
WHERE database_id = 1 AND file_id = 1);
-- Execute the CREATE DATABASE statement.
EXECUTE ('CREATE DATABASE Images
ON PRIMARY
(
NAME = Images_data
,FILENAME = ''' + #data_path + 'Images_data.mdf''
,SIZE = 7MB
,MAXSIZE = 2750MB
,FILEGROWTH = 10%
),
FILEGROUP FSTPImages CONTAINS FILESTREAM
(
NAME = TPIMAGES
, FILENAME = ''' + #data_path + '\TPImages''
, MAXSIZE = 10000 MB
-- SIZE and FILEGROWTH should not be specified here.
-- If they are specified an error will be raised.
)
LOG ON
(
NAME = Images_log
,FILENAME = ''' + #data_path + 'Images_log.ldf''
,SIZE = 5MB
,MAXSIZE = 25MB
,FILEGROWTH = 5MB
)'
);
GO
ALTER DATABASE [Images] SET AUTO_SHRINK ON
GO
USE [Images]
GO
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
DROP TABLE IF EXISTS dbo.ReceivedImages
GO
CREATE TABLE [dbo].[ReceivedImages] (
ImageID BIGINT IDENTITY(6999, 1) NOT NULL
, ReceiveID INT NOT NULL
, ExpiryDate DATETIME NOT NULL CONSTRAINT [DF_ReceivedImages_ExpiryDate] DEFAULT(DATEADD(d, 32, CURRENT_TIMESTAMP))
, ImageData VARBINARY(MAX) FILESTREAM NULL
, URowID UNIQUEIDENTIFIER ROWGUIDCOL NOT NULL CONSTRAINT [UX_ReceivedImages_URowID] UNIQUE WITH FILLFACTOR = 70 --must have indexed rowguidcol to accompany FILESTREAM column.
, CONSTRAINT [PK_ReceivedImages] PRIMARY KEY NONCLUSTERED
(ImageID DESC)
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 90) ON [PRIMARY]
)
GO
DROP INDEX IF EXISTS [IX_ReceivedImages_ReceiveID_Incl_ImageData] ON [dbo].[ReceivedImages]
GO
CREATE CLUSTERED INDEX [IX_ReceivedImages_ReceiveID] ON [dbo].[ReceivedImages]
([ReceiveID] ASC)
WITH (ONLINE = OFF, PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 90) ON [PRIMARY]
GO
DROP INDEX IF EXISTS [IX_ReceivedImages_ExpiryDate] ON [dbo].[ReceivedImages]
GO
CREATE NONCLUSTERED INDEX [IX_ReceivedImages_ExpiryDate] ON [dbo].[ReceivedImages]
([ExpiryDate])
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 90) ON [PRIMARY]
GO
ALTER AUTHORIZATION ON [dbo].[ReceivedImages] TO SCHEMA OWNER
GO

Why is there a table scan even though I have index on IGroupes table?

This is my query:
exec sp_executesql N'set arithabort off;set statistics time on; set transaction isolation level read uncommitted;With cte as (Select peta_rn = ROW_NUMBER() OVER (ORDER BY d.LastStatusChangedDateTime desc )
, d.DocumentID,
d.IsReEfiled, d.IGroupID, d.ITypeID, d.RecordingDateTime, d.CreatedByAccountID, d.JurisdictionID,
d.LastStatusChangedDateTime as LastStatusChangedDateTime
, d.IDate, d.InstrumentID, d.DocumentStatusID
, u.Username
, it.Abbreviation AS ITypeAbbreviation
, ig.Abbreviation AS IGroupAbbreviation,
d.DocumentDate
From Documents d
Inner Join ITypes it on it.ITypeID = d.ITypeID
Inner Join Users u on d.UserID = u.UserID Inner Join IGroupes ig on ig.IGroupID = d.IGroupID
Where 1=1 And ( d.DocumentStatusID = 9 ) ) Select cte.DocumentID,
cte.IsReEfiled, cte.IGroupID, cte.ITypeID, cte.RecordingDateTime, cte.CreatedByAccountID, cte.JurisdictionID,
cte.LastStatusChangedDateTime as LastStatusChangedDateTime
, cte.IDate, cte.InstrumentID, cte.DocumentStatusID,cte.IGroupAbbreviation, cte.Username, j.JDAbbreviation, inf.DocumentName,
cte.ITypeAbbreviation, cte.DocumentDate, ds.Abbreviation as DocumentStatusAbbreviation, ds.Name as DocumentStatusName,
( SELECT CAST(CASE WHEN cte.DocumentID = (
SELECT TOP 1 doc.DocumentID
FROM Documents doc
WHERE doc.JurisdictionID = cte.JurisdictionID
AND doc.DocumentStatusID = cte.DocumentStatusID
ORDER BY LastStatusChangedDateTime)
THEN 1
ELSE 0
END AS BIT)
) AS CanChangeStatus ,
Upper((Select Top 1 Stuff( (Select ''='' + dbo.GetDocumentNameFromParamsWithPartyType(Business, FirstName, MiddleName, LastName, t.Abbreviation, NameTypeID, pt.Abbreviation, IsGrantor, IsGrantee) From DocumentNames dn
Left Join Titles t
on dn.TitleID = t.TitleID
Left Join PartyTypes pt
On pt.PartyTypeID = dn.PartyTypeID
Where DocumentID = cte.DocumentID
For XML PATH('''')),1,1,''''))) as FlatDocumentName, (SELECT COUNT(*) FROM CTE) AS TotalRecords
FROM cte Left Join DocumentStatuses ds On
cte.DocumentStatusID = ds.DocumentStatusID Left Join InstrumentFiles inf On cte.DocumentID = inf.DocumentID
Left Join Jurisdictions j on j.JurisdictionID = cte.JurisdictionID Where 1=1 And
peta_rn>#7 AND peta_rn<=#8 Order by peta_rn set statistics time off; ',N'#0 int,#1 int,#2 int,#3 int,#4 int,#5 int,#6 int,#7 int,#8 int',
#0=1,#1=5,#2=9,#3=1,#4=5,#5=9,#6=1,#7=97500,#8=97550
And this is my IGroupes table definition:
CREATE TABLE [dbo].[IGroupes](
[IGroupID] [int] IDENTITY(1,1) NOT NULL,
[Name] [varchar](64) NOT NULL,
[JurisdictionID] [int] NOT NULL,
[Abbreviation] [varchar](12) NOT NULL,
CONSTRAINT [PK_IGroupes] PRIMARY KEY NONCLUSTERED
(
[IGroupID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 90) ON [PRIMARY]
) ON [PRIMARY]
GO
SET ANSI_PADDING OFF
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [IX_IGroupes_Abbreviation] Script Date: 10/11/2013 4:21:46 AM ******/
CREATE NONCLUSTERED INDEX [IX_IGroupes_Abbreviation] ON [dbo].[IGroupes]
(
[Abbreviation] ASC
)
INCLUDE ( [IGroupID],
[Name],
[JurisdictionID]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 90) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [IX_IGroupes_JurisdictionID] Script Date: 10/11/2013 4:21:46 AM ******/
CREATE NONCLUSTERED INDEX [IX_IGroupes_JurisdictionID] ON [dbo].[IGroupes]
(
[JurisdictionID] ASC
)
INCLUDE ( [IGroupID],
[Name],
[Abbreviation]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 90) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [IX_IGroupes_Name] Script Date: 10/11/2013 4:21:46 AM ******/
CREATE NONCLUSTERED INDEX [IX_IGroupes_Name] ON [dbo].[IGroupes]
(
[Name] ASC
)
INCLUDE ( [IGroupID],
[JurisdictionID],
[Abbreviation]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 90) ON [PRIMARY]
GO
Yet please see it is using table scan. This operation is costing me too much. IGroupes table just has 7 rows and Documents table has approximately 98K records. Yet when I join on d.IGroupID = ig.IGroupID it shows actual number of rows above 600K! That is the problem. Please see the attached screenshot:
In case anybody is interested in the full query plan xml, here it is:
https://www.dropbox.com/s/kldx24x3j8vndpe/plan.xml
Any help is appreciated. Thanks!
None of the 3 indexes (other than the PK) you have on IGroupes are going to help this query because you are not using any of those fields in a where or join clause. Unless you need those indexes for other queries, I would delete them. They are just going to give the query optimizer more choices to test (and reject).
The index on the Primary Key PK_IGroupes should be clustered. That will allow it to do an index seek (or bookmark lookup). If it can't be clustered for some other reason, try creating an index on IGroupID and Abbreviation, in that order (or including the Abbreviation column in the existing PK index).
If it still doesn't pick up the right index, you can use a hint such as WITH(INDEX(0)) or WITH(INDEX('index-name')).
The 600k rows does come from the fact that it is doing a nested loop join on 98k rows multiplied by the 7 rows. If the index above doesn't work, you can try replacing the INNER JOIN iGroupes with INNER HASH JOIN IGroupes.
Probably in this case table scan is more efficient than using any of the indexes you have on the IGroupes table.
If you think table scan operation is bottleneck in this query (though with 3% cost I'm not sure it is) either you may try modifying PK_IGroupes to become clustered index or you may try index like
CREATE UNIQUE NONCLUSTERED INDEX [IX_IGroupes_IGroupID]
ON [dbo].[IGroupes] ([IGroupID]) INCLUDE ([Abbreviation])

sp_executesql not using index?

I'm using nHibnerate in my web application and I have a problem using indexes in generated sp_execute. My table has 210 millions records and the query is very slow.
Firstly, there was a problem with generated column 'kolumna1' type. In database I have a column of varchar but nHibernate generated nvarchar. I workarounded this by putting special attribute in the code which forced using varchar. After that trick sp_executed started using indexes and everything was correct. Now the problem is back sp_executesql takes 10 minutes to finish. When i checked normal query(without sp_executesql) it took only 1s. I checked execution plans for both: sp_executesql wasn't using index and normal query was using index. Without changing index i modified back varchar to nvarchar and sp_execute finished in 1s (used index). Anyone got any idea where did i make a mistake ? why the execution plan is diffrent for such small changes? And how to fix it?
Here i attached more code. Just in case if someone need it.
sp_executesql with varchar(8000)
exec sp_executesql N'SELECT count(*) as y0_ FROM tabela1 this_ WHERE ((this_.kolumna2 >= #p0 and this_.kolumna2 <= #p1)) and
(this_.kolumna3 in (#p2, #p3) and this_.kolumna1 like #p4)',N'#p0 datetime,#p1 datetime,#p2 int,#p3 int,#p4 varchar(8000)',
#p0='2013-01-08 14:38:00' ,#p1='2013-02-08 14:38:00',#p2=341,#p3=342,#p4='%501096109%'
sp_executesql with nvarchar(4000)
exec sp_executesql N'SELECT count(*) as y0_ FROM tabela1 this_ WHERE ((this_.kolumna2 >= #p0 and this_.kolumna2 <= #p1)) and
(this_.kolumna3 in (#p2, #p3) and this_.kolumna1 like #p4)',N'#p0 datetime,#p1 datetime,#p2 int,#p3 int,#p4 nvarchar(4000)',
#p0='2013-01-08 14:38:00' ,#p1='2013-02-08 14:38:00',#p2=341,#p3=342,#p4='%501096109%'
The funny part is that in sql profiler both query gives same reuslt:
exec sp_executesql N'SELECT count(*) as y0_ FROM tabela1 this_
WHERE this_.kolumna3 in (#p2, #p3) and ((this_.kolumna2 >= #p0 and this_.kolumna2 <= #p1))
and ( this_.kolumna1 like #p4)',N'#p0 datetime,#p1 datetime,#p2 int,#p3 int,#p4 varchar(8000)',
#p0='2013-01-08 14:38:00' ,#p1='2013-02-08 14:38:00',#p2=341,#p3=342,#p4='%501096109%'
--Declare #p0 datetime
--set #p0 = '2013-01-08 14:38:00'
--Declare #p1 datetime
--set #p1 = '2013-02-08 14:38:00'
--Declare #p2 int
--set #p2 = 341
--Declare #p3 int
--set #p3 = 342
--Declare #p4 varchar(8000)
--set #p4 = '%501096109%'
--SELECT count(*) as y0_
--FROM tabela1 this_
--WHERE ((this_.kolumna2 >= #p0 and
--this_.kolumna2 <= #p1)) and
--(this_.kolumna3 in (#p2, #p3) and this_.kolumna1 like #p4)
Here are indexes:
CREATE TABLE [dbo].[tabela1](
[id] [bigint] NOT NULL,
[kolumna1] [varchar](128) NOT NULL,
[kolumna2] [datetime] NOT NULL,
[kolumna3] [int] NOT NULL,
CONSTRAINT [PK__tabela1__4F7CD00D] PRIMARY KEY CLUSTERED
(
[id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
CREATE NONCLUSTERED INDEX [ind_tabela1_ kolumna2] ON [dbo].[tabela1]
(
[kolumna2] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [ind_ tabela1_ kolumna3] ON [dbo].[ tabela1]
(
[kolumna3] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [IX_ tabela1_ kolumna1] ON [dbo].[ tabela1]
(
[kolumna1] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [IX_ tabela1_ kolumna2_ kolumna3] ON [dbo].[ tabela1]
(
[kolumna2] ASC,
[kolumna3] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [IX_ tabela1_ kolumna3_ kolumna2_id_ kolumna1] ON [dbo].[ tabela1]
(
[kolumna3] ASC,
[kolumna2] ASC,
[id] ASC,
[kolumna1] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
Below execution plan for query: select count(*) from [dbo].[tabela1] where [kolumna1] like N'%501096109%'
Sql Server query optimizer can choose to use index seek when:
There are another filter predicates besides LIKE. It should be a precise search or at least SARGable predicate
Table is very large (millions of rows)
But seek operation cannot be done when explicit type conversion is used - different collation/datatype.
Another thing that you cannot control this behavior and query plans can be vary for different predicate sets. To do this, you need to use hint FORCESEEK (version 2008+). You can find information here:
http://msdn.microsoft.com/en-us/library/ms187373%28v=sql.100%29.aspx
Could you try this:
(1) Run the following SQL:
select * from sys.dm_exec_cached_plans
cross apply sys.dm_exec_sql_text(plan_handle) t
(2) Use the last column to find the SQL for the first query. It will not contain sp_executesql, but will start with your list of parameters, the last one being a varchar. Get the plan_handle, and use it in the following statement:
dbcc freeproccache (<your_plan_handle>)
Then retry query 1.

How much data to optimize our queries

i have one table with this stracture and Indexs
CREATE TABLE [dbo].[Report4](
[Id] [int] IDENTITY(1,1) NOT NULL,
[Mesc] [nvarchar](50) NULL,
[Line] [nvarchar](5) NULL,
[Unit] [nvarchar](5) NULL,
[Description] [nvarchar](500) NULL,
[ST_CODE] [nvarchar](5) NULL,
[PbsNo] [nvarchar](50) NULL,
[PbsDate] [nvarchar](10) NULL,
[PbsQty] [nvarchar](10) NULL,
[PbsQtyRec] [nvarchar](10) NULL,
[QtyConsum1] [nvarchar](10) NULL,
[QtyConsum2] [nvarchar](10) NULL,
[QtyConsum3] [nvarchar](10) NULL,
[QtyConsum4] [nvarchar](10) NULL,
[QtyConsum5] [nvarchar](10) NULL,
[Type] [nvarchar](20) NULL,
[InvQty] [nvarchar](10) NULL,
[TypeRequest] [nvarchar](50) NULL,
[HeaderId] [bigint] NULL,
[LOCATION] [nvarchar](10) NULL,
CONSTRAINT [PK_Report4] PRIMARY KEY CLUSTERED
(
[Id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [HeaderId] ON [dbo].[Report4]
(
[HeaderId] ASC
)
INCLUDE ( [Id],
[Mesc],
[Line],
[Unit],
[Description],
[ST_CODE],
[PbsNo],
[PbsDate],
[PbsQty],
[PbsQtyRec],
[QtyConsum1],
[QtyConsum2],
[QtyConsum3],
[QtyConsum4],
[QtyConsum5],
[Type],
[InvQty],
[TypeRequest]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [HeaderIdRAll] ON [dbo].[Report4]
(
[HeaderId] ASC
)
INCLUDE ( [Id],
[Mesc],
[Line],
[Unit],
[Description],
[ST_CODE],
[PbsNo],
[PbsDate],
[PbsQty],
[PbsQtyRec],
[QtyConsum1],
[QtyConsum2],
[QtyConsum3],
[QtyConsum4],
[QtyConsum5],
[Type],
[InvQty],
[TypeRequest]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [LineNOIRHeaderId] ON [dbo].[Report4]
(
[Line] ASC
)
INCLUDE ( [HeaderId])
WHERE ([line]='I')
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [LineNoRHeaderId] ON [dbo].[Report4]
(
[Line] ASC
)
INCLUDE ( [HeaderId])
WHERE ([line]='H')
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [LineNoRMesc] ON [dbo].[Report4]
(
[Line] ASC
)
INCLUDE ( [Mesc])
WHERE ([line]='I')
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [MescRAll] ON [dbo].[Report4]
(
[Mesc] ASC
)
INCLUDE ( [Id],
[Line],
[Unit],
[Description],
[ST_CODE],
[PbsNo],
[PbsDate],
[PbsQty],
[PbsQtyRec],
[QtyConsum1],
[QtyConsum2],
[QtyConsum3],
[QtyConsum4],
[QtyConsum5],
[Type],
[InvQty],
[TypeRequest],
[HeaderId]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
i want run this Query for this table
ALTER PROCEDURE [dbo].[SPSelectReport2] (#StringWhereParameter nvarchar(4000),#PageIndex int,#PageSize int )
AS
BEGIN
SET NOCOUNT ON;
-- َ Begin Of Transaction
begin tran
declare #from int=(#PageSize*(#PageIndex-1))+1
declare #to int=(#PageIndex*#PageSize)
declare #Query nvarchar(max)
set #Query=' select
distinct id, [Mesc], [Line]
,[Unit]
,[Discription]
,[InvQty]
,[LastDateNil]
,[StCode]
,[PlanCode]
,[MIN]
,[MAX]
,[LastDateConsum]
,[PbsNo]
,[PbsDate]
,[PbsQty]
,[PbsQtyRec]
,[DateDelay]
,[TypeRequest]
,[HeaderId]
,LOCATION
from (
SELECT *, ROW_NUMBER() OVER(ORDER BY Id) ROW_NUM
FROM(
((SELECT Id,[Mesc]
,[Line]
,[Unit]
,[Discription]
,[InvQty]
,[LastDateNil]
,[StCode]
,[PlanCode]
,[MIN]
,[MAX]
,[LastDateConsum]
,[PbsNo]
,[PbsDate]
,[PbsQty]
,[PbsQtyRec]
,[DateDelay]
,[TypeRequest]
,[HeaderId]
,LOCATION
FROM [MyMaterialDB].[dbo].[Report2]
WHERE headerid IN(SELECT HeaderId FROM [MyMaterialDB].[dbo].[Report2] WHERE line=''H'''+ #StringWhereParameter+'))
UNION
(
(SELECT Id,[Mesc]
,[Line]
,[Unit]
,[Discription]
,[InvQty]
,[LastDateNil]
,[StCode]
,[PlanCode]
,[MIN]
,[MAX]
,[LastDateConsum]
,[PbsNo]
,[PbsDate]
,[PbsQty]
,[PbsQtyRec]
,[DateDelay]
,[TypeRequest]
,[HeaderId]
,LOCATION
FROM [MyMaterialDB].[dbo].[Report2]
WHERE mesc IN(SELECT mesc FROM [MyMaterialDB].[dbo].[Report2] WHERE line=''I''' +#StringWhereParameter+'))
UNION
(SELECT Id, [Mesc]
,[Line]
,[Unit]
,[Discription]
,[InvQty]
,[LastDateNil]
,[StCode]
,[PlanCode]
,[MIN]
,[MAX]
,[LastDateConsum]
,[PbsNo]
,[PbsDate]
,[PbsQty]
,[PbsQtyRec]
,[DateDelay]
,[TypeRequest]
,[HeaderId]
,LOCATION
FROM [MyMaterialDB].[dbo].[Report2]
WHERE mesc IN(SELECT HeaderId FROM [MyMaterialDB].[dbo].[Report2] WHERE line=''I'''+#StringWhereParameter+')
)))) a)b where b.ROW_NUM between '+CAST(#from as varchar(10))+' and '+CAST(#to as varchar(10))
-- Order by Mesc,Line,unit
exec(#Query)
--print #Query
--
if ##error = 0
Commit Tran
Else
rollback tran
End
into this table i have more then 1000000 records and when run this sp ,It takes ten minutes or more. How do I optimize the structure or query. please help me. thanks all.
For starters you MUST do the following (I think only those actions can take your query execution time down to much less than a minute):
remove every in clause. select ... from ... where a in (select ...) is a) very very very slow, b)dangerous as it can run your sql server out of memory -especially for large datasets like yours-. You must change all in clauses to left outer joins, it is very easy to do this.
remove distinct clause. This forces your query to do order by (behind the schenes) and generally is very slow too. Try to rephrase your query so that records are already distinct
As long as you have distinct data you must also change union to union all. Union is same as distinct union which goes as my previous statement. If you use union all you will see great improvement to your query... as long as you can bring your data to not needed to be distinct...
If you manage to make at least 2 of the 3 previous changes, i think you will see great improvement. If you can handle all of them, you will have a very fast query.
And also, do not bother changing your sql exec into a 'precompliled' stored procedure. I don't think you will gain an significant improvement by doing this (I see you have dynamic where so this wouldn't be applicant in any case).
Please, tell me if that worked.

SQL Server 2008: Collation ignored in unique index?

I've created a compound unique index on my table:
CREATE TABLE [dbo].[SearchIndexWord](
[ID] [int] IDENTITY(1,1) NOT NULL,
[CatalogID] [int] NOT NULL,
[Word] [nvarchar](100) NOT NULL,
CONSTRAINT [PK_SearchIndexWord] PRIMARY KEY CLUSTERED
(
[ID] ASC
)
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
)
ON [PRIMARY]
CREATE UNIQUE NONCLUSTERED INDEX [IX_SearchIndexWord] ON [dbo].[SearchIndexWord]
(
[Word] ASC,
[CatalogID] ASC
)
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
ON [PRIMARY]
The collation for the entire database is set to SQL_Latin1_General_CP1_CI_AS. When I run the following T-SQL, it prints 'Does not equal':
IF 'm3/h' = 'm³/h'
PRINT 'Equals'
ELSE
PRINT 'Does not equal'
Then, if I try the following insert statement:
INSERT INTO [SearchIndexWord] (Word, CatalogID) VALUES ('m3/h', 1), ('m³/h', 1)
I get the following error message:
Msg 2601, Level 14, State 1, Line 1
Cannot insert duplicate key row in object 'dbo.SearchIndexWord' with unique index 'IX_SearchIndexWord'.
Why is this? I couldn't find it in the docs, but I assume the condition of two keys being duplicate is examined using the configured collation.
I've checked the table, column and index collation by the way, and they're all equal to the database collation.
Try this:
IF CAST('m3/h' AS NVARCHAR(100)) = CAST('m³/h' AS NVARCHAR(100))
PRINT 'Equals'
ELSE
PRINT 'Does not equal'
For me, this returns 'Equals' which explains why you're getting the duplicate key row error.
I suspect the values in the code IF 'm3/h' = 'm³/h' are created as VARCHAR.

Resources