T-SQL for finding Redundant Indexes - sql-server

Is anyone aware of a T-SQL script that can detect redundant indexes across an entire database? An example of a redundant index in a table would be as follows:
Index 1: 'ColumnA', 'ColumnB', 'ColumnC'
Index 2: 'ColumnA', 'ColumnB'
Ignoring other considerations, such as the width of columns and covering indexes, Index 2 would be redundant.
Thanks.

There are situations where the redundancy doesn't hold. For example, say ColumnC was a huuge field, but you'd sometimes have to retrieve it quickly. Your index 1 would not require a key lookup for:
select ColumnC from YourTable where ColumnnA = 12
On the other hand index 2 is much smaller, so it can be read in memory for queries that require an index scan:
select * from YourTable where ColumnnA like '%hello%'
So they're not really redundant.
If you're not convinced by my above argument, you can find "redundant" indexes like:
;with ind as (
select a.object_id
, a.index_id
, cast(col_list.list as varchar(max)) as list
from (
select distinct object_id
, index_id
from sys.index_columns
) a
cross apply
(
select cast(column_id as varchar(16)) + ',' as [text()]
from sys.index_columns b
where a.object_id = b.object_id
and a.index_id = b.index_id
for xml path(''), type
) col_list (list)
)
select object_name(a.object_id) as TableName
, asi.name as FatherIndex
, bsi.name as RedundantIndex
from ind a
join sys.sysindexes asi
on asi.id = a.object_id
and asi.indid = a.index_id
join ind b
on a.object_id = b.object_id
and a.object_id = b.object_id
and len(a.list) > len(b.list)
and left(a.list, LEN(b.list)) = b.list
join sys.sysindexes bsi
on bsi.id = b.object_id
and bsi.indid = b.index_id
Bring cake for your users in case performance decreases "unexpectedly" :-)

Inspired by Paul Nielsen, I wrote this query to find/distinguish:
Duplicates (ignoring include order)
Redundant (different include columns)
Overlapping (different index columns)
And also record their usage
(One might also want to use is_descending_key, but I don't need it.)
WITH IndexColumns AS
(
SELECT I.object_id AS TableObjectId, OBJECT_SCHEMA_NAME(I.object_id) + '.' + OBJECT_NAME(I.object_id) AS TableName, I.index_id AS IndexId, I.name AS IndexName
, (IndexUsage.user_seeks + IndexUsage.user_scans + IndexUsage.user_lookups) AS IndexUsage
, IndexUsage.user_updates AS IndexUpdates
, (SELECT CASE is_included_column WHEN 1 THEN NULL ELSE column_id END AS [data()]
FROM sys.index_columns AS IndexColumns
WHERE IndexColumns.object_id = I.object_id
AND IndexColumns.index_id = I.index_id
ORDER BY index_column_id, column_id
FOR XML PATH('')
) AS ConcIndexColumnNrs
,(SELECT CASE is_included_column WHEN 1 THEN NULL ELSE COL_NAME(I.object_id, column_id) END AS [data()]
FROM sys.index_columns AS IndexColumns
WHERE IndexColumns.object_id = I.object_id
AND IndexColumns.index_id = I.index_id
ORDER BY index_column_id, column_id
FOR XML PATH('')
) AS ConcIndexColumnNames
,(SELECT CASE is_included_column WHEN 1 THEN column_id ELSE NULL END AS [data()]
FROM sys.index_columns AS IndexColumns
WHERE IndexColumns.object_id = I.object_id
AND IndexColumns.index_id = I.index_id
ORDER BY column_id
FOR XML PATH('')
) AS ConcIncludeColumnNrs
,(SELECT CASE is_included_column WHEN 1 THEN COL_NAME(I.object_id, column_id) ELSE NULL END AS [data()]
FROM sys.index_columns AS IndexColumns
WHERE IndexColumns.object_id = I.object_id
AND IndexColumns.index_id = I.index_id
ORDER BY column_id
FOR XML PATH('')
) AS ConcIncludeColumnNames
FROM sys.indexes AS I
LEFT OUTER JOIN sys.dm_db_index_usage_stats AS IndexUsage
ON IndexUsage.object_id = I.object_id
AND IndexUsage.index_id = I.index_id
AND IndexUsage.Database_id = db_id()
)
SELECT
C1.TableName
, C1.IndexName AS 'Index1'
, C2.IndexName AS 'Index2'
, CASE WHEN (C1.ConcIndexColumnNrs = C2.ConcIndexColumnNrs) AND (C1.ConcIncludeColumnNrs = C2.ConcIncludeColumnNrs) THEN 'Exact duplicate'
WHEN (C1.ConcIndexColumnNrs = C2.ConcIndexColumnNrs) THEN 'Different includes'
ELSE 'Overlapping columns' END
-- , C1.ConcIndexColumnNrs
-- , C2.ConcIndexColumnNrs
, C1.ConcIndexColumnNames
, C2.ConcIndexColumnNames
-- , C1.ConcIncludeColumnNrs
-- , C2.ConcIncludeColumnNrs
, C1.ConcIncludeColumnNames
, C2.ConcIncludeColumnNames
, C1.IndexUsage
, C2.IndexUsage
, C1.IndexUpdates
, C2.IndexUpdates
, 'DROP INDEX ' + C2.IndexName + ' ON ' + C2.TableName AS Drop2
, 'DROP INDEX ' + C1.IndexName + ' ON ' + C1.TableName AS Drop1
FROM IndexColumns AS C1
INNER JOIN IndexColumns AS C2
ON (C1.TableObjectId = C2.TableObjectId)
AND (
-- exact: show lower IndexId as 1
(C1.IndexId < C2.IndexId
AND C1.ConcIndexColumnNrs = C2.ConcIndexColumnNrs
AND C1.ConcIncludeColumnNrs = C2.ConcIncludeColumnNrs)
-- different includes: show longer include as 1
OR (C1.ConcIndexColumnNrs = C2.ConcIndexColumnNrs
AND LEN(C1.ConcIncludeColumnNrs) > LEN(C2.ConcIncludeColumnNrs))
-- overlapping: show longer index as 1
OR (C1.IndexId <> C2.IndexId
AND C1.ConcIndexColumnNrs <> C2.ConcIndexColumnNrs
AND C1.ConcIndexColumnNrs like C2.ConcIndexColumnNrs + ' %')
)
ORDER BY C1.TableName, C1.ConcIndexColumnNrs

I created the following query that gives me a lot of good information to identify duplicate and near-duplicate indexes. It also includes other information like how many pages of memory an index takes, which allows me to give a higher priority to larger indexes. It shows what columns are indexed and what columns are included, so I can see if there are two indexes that are almost identical with only slight variations in the included columns.
WITH IndexSummary AS
(
SELECT DISTINCT sys.objects.name AS [Table Name],
sys.indexes.name AS [Index Name],
SUBSTRING((SELECT ', ' + sys.columns.Name as [text()]
FROM sys.columns
INNER JOIN sys.index_columns
ON sys.index_columns.column_id = sys.columns.column_id
AND sys.index_columns.object_id = sys.columns.object_id
WHERE sys.index_columns.index_id = sys.indexes.index_id
AND sys.index_columns.object_id = sys.indexes.object_id
AND sys.index_columns.is_included_column = 0
ORDER BY sys.columns.name
FOR XML Path('')), 2, 10000) AS [Indexed Column Names],
ISNULL(SUBSTRING((SELECT ', ' + sys.columns.Name as [text()]
FROM sys.columns
INNER JOIN sys.index_columns
ON sys.index_columns.column_id = sys.columns.column_id
AND sys.index_columns.object_id = sys.columns.object_id
WHERE sys.index_columns.index_id = sys.indexes.index_id
AND sys.index_columns.object_id = sys.indexes.object_id
AND sys.index_columns.is_included_column = 1
ORDER BY sys.columns.name
FOR XML Path('')), 2, 10000), '') AS [Included Column Names],
sys.indexes.index_id, sys.indexes.object_id
FROM sys.indexes
INNER JOIN SYS.index_columns
ON sys.indexes.index_id = SYS.index_columns.index_id
AND sys.indexes.object_id = sys.index_columns.object_id
INNER JOIN sys.objects
ON sys.OBJECTS.object_id = SYS.indexES.object_id
WHERE sys.objects.type = 'U'
)
SELECT IndexSummary.[Table Name],
IndexSummary.[Index Name],
IndexSummary.[Indexed Column Names],
IndexSummary.[Included Column Names],
PhysicalStats.page_count as [Page Count],
CONVERT(decimal(18,2), PhysicalStats.page_count * 8 / 1024.0) AS [Size (MB)],
CONVERT(decimal(18,2), PhysicalStats.avg_fragmentation_in_percent) AS [Fragment %]
FROM IndexSummary
INNER JOIN sys.dm_db_index_physical_stats (DB_ID(), NULL, NULL, NULL, NULL)
AS PhysicalStats
ON PhysicalStats.index_id = IndexSummary.index_id
AND PhysicalStats.object_id = IndexSummary.object_id
WHERE (SELECT COUNT(*) as Computed
FROM IndexSummary Summary2
WHERE Summary2.[Table Name] = IndexSummary.[Table Name]
AND Summary2.[Indexed Column Names] = IndexSummary.[Indexed Column Names]) > 1
ORDER BY [Table Name], [Index Name], [Indexed Column Names], [Included Column Names]
Results of the query look like this:
Table Name Index Indexed Cols Included Cols Pages Size (MB) Frag %
My_Table Indx_1 Col1 Col2, Col3 123 0.96 8.94
My_Table Indx_2 Col1 Col2, Col3 123 0.96 8.94
Complete Description
For the complete explanation see Identifying Duplicate or Redundant Indexes in SQL Server.

Try the script below to show Unused Indexes, hope it helps
/****************************************************************
Description: Script to show Unused Indexes using DMVs
****************************************************************/
SELECT TOP 100
o.name AS ObjectName
, i.name AS IndexName
, i.index_id AS IndexID
, dm_ius.user_seeks AS UserSeek
, dm_ius.user_scans AS UserScans
, dm_ius.user_lookups AS UserLookups
, dm_ius.user_updates AS UserUpdates
, p.TableRows
, 'DROP INDEX ' + QUOTENAME(i.name)
+ ' ON ' + QUOTENAME(s.name) + '.' + QUOTENAME(OBJECT_NAME(dm_ius.object_id)) as 'drop statement'
FROM sys.dm_db_index_usage_stats dm_ius
INNER JOIN sys.indexes i ON i.index_id = dm_ius.index_id AND dm_ius.object_id = i.object_id
INNER JOIN sys.objects o on dm_ius.object_id = o.object_id
INNER JOIN sys.schemas s on o.schema_id = s.schema_id
INNER JOIN (SELECT SUM(p.rows) TableRows, p.index_id, p.object_id
FROM sys.partitions p GROUP BY p.index_id, p.object_id) p
ON p.index_id = dm_ius.index_id AND dm_ius.object_id = p.object_id
WHERE OBJECTPROPERTY(dm_ius.object_id,'IsUserTable') = 1
AND dm_ius.database_id = DB_ID()
AND i.type_desc = 'nonclustered'
AND i.is_primary_key = 0
AND i.is_unique_constraint = 0
ORDER BY (dm_ius.user_seeks + dm_ius.user_scans + dm_ius.user_lookups) ASC
GO

I was just reading some MSDN blogs, noticed a script to do this and remembered this question.
I haven't bothered testing it side by side with Andomar's to see if one has any particular benefit over the other.
One amendment I would likely make to both though would be to take into account the size of both indexes when assessing redundancy.
Edit:
Also see Kimberley Tripp's post on Removing duplicate indexes

Related

SQL Server Indexes - verify naming convention

I'm looking for a T-SQL script, which can verify that my indexes are named correctly.
PK_ for primary keys
UK_ for unique keys
IX_ for non clustered non unique indexes
UX_ for unique indexes
and furthermore, the naming of the index (the tricky part):
IX_Column1_Column2 - instead of IX_CrazyIndexWhichFixesPerformance
Someone out there with this type of script?
It is quite easily accomplished with use of the Object Catalog Views (sys.*).
It depends how you want to approach this - you can write a full blown procedure to check all these things. I don't have code that will do exactly the above but this should set you on the right track.
For the Primary Key Check, you can find primary keys that don't fit your naming scheme with:
SELECT distinct
i.name AS index_name,
i.object_id
from
sys.indexes i
where
i.is_primary_key = 1
and i.name not like 'PK_%'
It will be a question of using the sys.objects table to filter for the particular key constraint.
For columns, join through sys.index_columns and sys.columns:
SELECT distinct
c.name AS column_name,
i.name AS index_name,
i.object_id
from
sys.indexes i
inner join sys.index_columns ic ON i.object_id = ic.object_id AND i.index_id = ic.index_id
inner join sys.columns c ON ic.object_id = c.object_id AND c.column_id = ic.column_id
As you will want to concatenate column names into one field, you will need to look into FOR XML PATH('')
This information plus careful manipulation of the naming conventions you require should allow you perform these checks.
I ended up with this - it may not be pretty but gets the job done for now :-)
WITH indexesCTE
AS
(
SELECT
t.name AS TableName
, i.name AS IndexName
, i.index_id AS IndexId
, ic.index_column_id AS ColumnId
, t.object_id AS TableId
, REPLACE(c.name, '_', '') AS ColumnName
, i.object_id AS IndexObjectId
FROM sys.indexes i
INNER JOIN sys.index_columns ic ON i.object_id = ic.object_id and i.index_id = ic.index_id
INNER JOIN sys.columns c ON ic.object_id = c.object_id and ic.column_id = c.column_id
INNER JOIN sys.tables t ON i.object_id = t.object_id
WHERE
i.is_primary_key = 0
AND i.is_unique = 0
AND i.is_unique_constraint = 0
AND t.is_ms_shipped = 0
)
, indexNameProposal
AS
(
SELECT i.TableName
, i.IndexName AS CurrentIndexName
, 'IX' + STUFF((SELECT '_' + i2.ColumnName FROM indexesCTE i2 WHERE i2.IndexId = i.IndexId AND i.TableId = i2.TableId FOR XML PATH('')),1,0,'') AS IndexNameProposal
FROM indexesCTE i
GROUP BY i.TableName, i.IndexName, i.TableId, i.IndexId
)
SELECT
*
FROM indexNameProposal i
WHERE i.CurrentIndexName <> i.IndexNameProposal

I found this script that go through the current database only and identifiy unused index but I wanna to go through all the databases

I found this script that give you unused index however it runs only per databases how make it run through all the databases on server
SELECT o.name ,
indexname = i.name ,
i.index_id ,
reads = user_seeks + user_scans + user_lookups ,
writes = user_updates ,
rows = ( SELECT SUM(p.rows)
FROM sys.partitions p
WHERE p.index_id = s.index_id
AND s.object_id = p.object_id
) ,
CASE WHEN s.user_updates < 1 THEN 100
ELSE 1.00 * ( s.user_seeks + s.user_scans + s.user_lookups )
/ s.user_updates
END AS reads_per_write ,
'DROP INDEX ' + QUOTENAME(i.name) + ' ON ' + QUOTENAME(c.name) + '.'
+ QUOTENAME(OBJECT_NAME(s.object_id)) AS 'drop statement'
FROM sys.dm_db_index_usage_stats s
INNER JOIN sys.indexes i ON i.index_id = s.index_id
AND s.object_id = i.object_id
INNER JOIN sys.objects o ON s.object_id = o.object_id
INNER JOIN sys.schemas c ON o.schema_id = c.schema_id
WHERE OBJECTPROPERTY(s.object_id, 'IsUserTable') = 1
AND s.database_id = DB_ID()
AND i.type_desc = 'nonclustered'
AND i.is_primary_key = 0
AND i.is_unique_constraint = 0
AND ( SELECT SUM(p.rows)
FROM sys.partitions p
WHERE p.index_id = s.index_id
AND s.object_id = p.object_id
) > 10000
ORDER BY reads_per_write ASC;
There's a line in your code that is restricting the records to the current database:
AND s.database_id = DB_ID()
in your where clause. if you remove that, it will give you stats on all of the databases and objects you have access to.
you may want to also include the database name if you remove that line:
SELECT
db_name(database_id) DATABASE_NAME,
...

Query to get columns metadata, key and index

How to get column metadata for a table or view in following format
column_name column_data_type column_data_length index key
-----------------------------------------------------------------------------
column1 int 4 CLUSTERED PRIMARYKEY
column2 int 4 NONCLUSTERED FOREIGNKEY
column3 varchar 20 NULL NULL
SELECT
C.TABLE_SCHEMA+'.'+ C.TABLE_NAME as TableName,
C.COLUMN_NAME as ColumnName,
C.DATA_TYPE as ColumnDataType,
C.CHARACTER_MAXIMUM_LENGTH as ColumnDataTypeLength,
(CASE
WHEN K.CONSTRAINT_NAME like 'PK_%' then 'PRIMARYKEY'
WHEN K.CONSTRAINT_NAME like 'FK_%' then 'FOREIGNKEY'
ELSE NULL
END) as [Keys],
(SELECT
i.[type_desc]
FROM
[sys].[indexes] AS i
INNER JOIN
[sys].[index_columns] AS ic
ON i.[object_id] = ic.[object_id]
AND i.index_id = ic.index_id
INNER JOIN
[sys].[columns] co
ON ic.column_id = co.column_id
AND ic.[object_id] = co.[object_id]
WHERE
i.[object_id] = OBJECT_ID(C.TABLE_SCHEMA+'.'+ C.TABLE_NAME)
AND co.name=C.COLUMN_NAME
) AS [Indexes]
FROM
INFORMATION_SCHEMA.COLUMNS C LEFT JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE K
ON C.COLUMN_NAME=k.COLUMN_NAME
AND C.TABLE_NAME=K.TABLE_NAME
WHERE C.TABLE_SCHEMA+'.'+ C.TABLE_NAME =#tableName
ORDER BY C.ORDINAL_POSITION ASC

How to get all the columns of the MS SQL including view columns as well

I'm trying to write a SQL query that will output all the columns in tables and views.
The query below returns just table columns and doesn't include view columns
SELECT t.name AS table_name, SCHEMA_NAME(schema_id) AS schema_name, c.name AS column_name
FROM sys.tables AS t
INNER JOIN sys.columns c ON t.OBJECT_ID = c.OBJECT_ID
ORDER BY schema_name, table_name;
Try this one (fast) -
SELECT
[object_name] = s.name + '.' + o.name
, column_name = c.name
, o.type_desc
FROM sys.objects o
JOIN sys.schemas s ON o.[schema_id] = s.[schema_id]
JOIN sys.columns c ON o.[object_id] = c.[object_id]
WHERE o.[type] IN ('U', 'V')
ORDER BY [object_name]
Or try this (slow) -
SELECT
[object_name] = s.name + '.' + o.name
, column_name = c.name
FROM (
SELECT v.name, v.[object_id], v.[schema_id]
FROM sys.views v
UNION ALL
SELECT t.name, t.[object_id], t.[schema_id]
FROM sys.tables t
) o
JOIN sys.schemas s ON o.[schema_id] = s.[schema_id]
JOIN sys.columns c ON o.[object_id] = c.[object_id]
ORDER BY [object_name]
Inside sys.tables & sys.views:
ALTER VIEW sys.views AS
SELECT *
FROM sys.objects$
WHERE type = 'V'
ALTER VIEW sys.tables AS
SELECT *
FROM sys.objects$ o
LEFT JOIN sys.sysidxstats lob ON lob.id = o.object_id AND lob.indid <= 1
LEFT JOIN sys.syssingleobjrefs ds ON ds.depid = o.object_id AND ds.class = 8 AND ds.depsubid <= 1 -- SRC_INDEXTOLOBDS
LEFT JOIN sys.syssingleobjrefs rfs ON rfs.depid = o.object_id AND rfs.class = 42 AND rfs.depsubid = 0 -- SRC_OBJTOFSDS
LEFT JOIN sys.syspalvalues ts ON ts.class = 'LEOP' AND ts.value = o.lock_escalation_option
WHERE o.type = 'U'
This will solve your problems
SELECT *
FROM INFORMATION_SCHEMA.COLUMNS
You can access all tables and views informations

Get the creation date of a table's column

I found a script to get the column information:
SELECT
crdate as thisFieldIsNotTheCreationOfTheField1 ,
refdate as thisFieldIsNotTheCreationOfTheField2 ,
o.name AS [TableName],
o.type,
c.name AS [ColName],
s.name AS [ColType],
c.prec,
c.scale,
c.isnullable
FROM
dbo.sysobjects AS o
INNER JOIN
dbo.syscolumns AS c ON c.id = o.id
INNER JOIN
dbo.systypes AS s ON c.xtype = s.xtype
WHERE
o.type = 'U' and o.name='TableNa'
ORDER BY
crdate
The column creation datetime isn't found.
How can this be done?
This is what you want :
SELECT obj.create_date
from sys.objects obj
inner join sys.columns col on obj.object_Id=col.object_Id
WHERE col.name = #columnName
and obj.Name=#tableName

Resources