I have a table with records which has 100 columns, I need to get the count of distinct values of all the columns from this table based on some condition (where clause).
Below query is working fine, but I'm not able to use the where clause. So it's giving the result for all the records of the table. But I want it to be based on some condition lets say column file_id = 1;. My question is how to use where clause with the below query. Or if there is any other alternative way to solve this problem.
declare #SQL nvarchar(max)
set #SQL = ''
;with cols as (
select Table_Schema, Table_Name, Column_Name, Row_Number() over(partition by Table_Schema, Table_Name
order by ORDINAL_POSITION) as RowNum
from INFORMATION_SCHEMA.COLUMNS
)
select #SQL = #SQL + case when RowNum = 1 then '' else ' union all ' end
+ ' select ''' + Column_Name + ''' as Column_Name, count(distinct ' + quotename (Column_Name) + ' ) As DistinctCountValue,
count( '+ quotename (Column_Name) + ') as CountValue FROM ' + quotename (Table_Schema) + '.' + quotename (Table_Name)
from cols
where Table_Name = 'table_name' --print #SQL
execute (#SQL)
I am using the dynamic query because I need to reuse this query for other tables also.
First get the columns and use stuff to generate the select in this way:
SELECT COUNT(ColumnA) AS ColumnA, COUNT(ColumnB AS ColumnB), COUNT(ColumnC) AS ColumnC....
That way you only select on your table once to get all counts, After that, use CROSS APPLY to "unpivot" those columns and return the output on one row per column
CROSS APPLY(
VALUES(1, 'ColumnA', ColumnA), (2, 'ColumnB', ColumnB), (3, 'ColumnC', ColumnC)
)(ID, ColumnName, DistinctCountValue)
For the filter, use sp_executesql and send the file_id as parameter
exec SP_executesql #SQL, N'#FID INT', #FID = #FileID
Since you are using all columns of the table Row_Number() over(partition by Table_Schema, Table_Name order by ORDINAL_POSITION) as RowNum becomes redundant, ORDINAL_POSITION already has the value that you are looking for
declare #tablename nvarchar(50) = 'MyTestTable'
declare #fileID int = 1
declare #SQL nvarchar(max)
set #SQL = ''
;with cols as (
select TABLE_SCHEMA, TABLE_NAME, COLUMN_NAME, ORDINAL_POSITION
from INFORMATION_SCHEMA.COLUMNS
where TABLE_NAME = #TableName
)
select #SQL = ';WITH CTE AS (SELECT
' +
STUFF((
SELECT ', COUNT(DISTINCT ' + QUOTENAME(COLUMN_NAME) + ') AS ' + QUOTENAME(COLUMN_NAME)
FROM cols
ORDER BY ORDINAL_POSITION
FOR XML PATH('')
), 1, 1, '')
+ '
FROM ' + #TableName + '
WHERE File_ID = #FID
)
SELECT B.*
FROM CTE
CROSS APPLY (
VALUES ' +STUFF((
SELECT ',( ' + CAST(ORDINAL_POSITION AS VARCHAR) + ',' + QUOTENAME(COLUMN_NAME,'''') + ',' + QUOTENAME(COLUMN_NAME) + ')'
FROM cols
ORDER BY ORDINAL_POSITION
FOR XML PATH('')
), 1, 1, '') + '
)B (ID,ColumnName,DistinctCountValue)
'
from cols
exec SP_executesql #SQL, N'#FID INT', #FID = #FileID
The query below creates a table of all the column names and uses a while loop to select the count for whatever WHERE clause you want to use. This should be pretty flexible for any table; just update the top variables. Note that this will not count a column where its value is null. You can add a case to the #Query parameter if that's what you want. Since it processes each row individually, I added in a temp table so you only hit the db once.
IF OBJECT_ID('tempdb..##SourceValues') IS NOT NULL
DROP TABLE ##SourceValues
DECLARE #Schema VARCHAR(50) = 'SomeSchema'
DECLARE #Table VARCHAR(50) = 'SomeTable'
DECLARE #WhereClause VARCHAR(MAX) = ' Some WHERE clause'
DECLARE #ColumnName VARCHAR(50)
DECLARE #ProcessedRows TABLE(ColumnName VARCHAR(50), DistinctCount INT)
DECLARE #Columns TABLE(RowNumber INT, ColumnName VARCHAR(100))
INSERT INTO #Columns SELECT ROW_NUMBER() OVER(ORDER BY COLUMN_NAME DESC), COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = #Table
DECLARE #Count INT = (SELECT MAX(RowNumber) FROM #Columns)
DECLARE #Counter INT = 0
DECLARE #DistinctCount INT
DECLARE #Query NVARCHAR(MAX)
EXEC('SELECT * INTO ##SourceValues FROM ' + #Table +' (NOLOCK)')
WHILE #Counter < #Count
BEGIN
SET #Counter += 1
SET #ColumnName = (SELECT ColumnName FROM #Columns WHERE RowNumber = #Counter)
SET #Query = 'SELECT #OutPut = COUNT(' + #ColumnName + ') FROM ' + #Schema + '.' + ' ##SourceValues ' + #WhereClause
EXECUTE sp_executesql #Query, N'#Output INT OUT', #DistinctCount OUT
INSERT INTO #ProcessedRows(ColumnName, DistinctCount) VALUES (#ColumnName, #DistinctCount)
END
SELECT * FROM #ProcessedRows
Let's try some different approach.
Get all values unpivoted as Param/Value:
1) Collect list of tables and columns to be used in dynamic SQL:
DROP TABLE IF EXISTS #Base;
;WITH SchemaData AS (
SELECT t.name AS [TableName],c.name AS [ColumnName],c.column_id AS [ColumnOrderID]
FROM sys.tables t
INNER JOIN sys.columns c ON c.object_id = t.object_id
)
SELECT t.TableName
,STUFF((SELECT ',CONVERT(NVARCHAR(MAX),' + QUOTENAME([ColumnName]) + ') AS ' + QUOTENAME([ColumnName])
FROM SchemaData a WHERE (a.TableName = t.TableName) FOR XML PATH(''),TYPE).value('(./text())[1]','NVARCHAR(MAX)'),1,1,'') AS [SelectClause]
,STUFF((SELECT ',' + QUOTENAME([ColumnName]) FROM SchemaData a WHERE (a.TableName = t.TableName) FOR XML PATH(''),TYPE).value('(./text())[1]','NVARCHAR(MAX)'),1,1,'') AS [UnpivotClause]
INTO #Base
FROM SchemaData t
GROUP BY t.TableName
;
2) Get all data inside a temp table
DROP TABLE IF EXISTS #Result;
CREATE TABLE #Result(TableName NVARCHAR(255),ColumnName NVARCHAR(255),[Value] NVARCHAR(MAX));
DECLARE #TableName NVARCHAR(255),#SelectClause NVARCHAR(MAX),#UnpivotClause NVARCHAR(MAX);
DECLARE crPopulateResult CURSOR LOCAL FAST_FORWARD READ_ONLY FOR SELECT b.TableName,b.SelectClause,b.UnpivotClause FROM #Base b;
OPEN crPopulateResult;
FETCH NEXT FROM crPopulateResult INTO #TableName,#SelectClause,#UnpivotClause;
DECLARE #dSql NVARCHAR(MAX);
WHILE ##FETCH_STATUS = 0
BEGIN
SELECT #dSql = N' INSERT INTO #Result(TableName,[ColumnName],[Value])
SELECT up.TableName,up.Param AS [ColumnName],up.[Value]
FROM (
SELECT ''' + #TableName + N''' AS [TableName]
,' + #SelectClause + N'
FROM ' + QUOTENAME(#TableName) + N'
) a
UNPIVOT(Value FOR Param IN (' + #UnpivotClause + N')) up
';
EXEC sp_executesql #stmt = #dSql;
FETCH NEXT FROM crPopulateResult INTO #TableName,#SelectClause,#UnpivotClause;
END
CLOSE crPopulateResult;
DEALLOCATE crPopulateResult;
3) Any filters can be applied with #Results, including Table names, column names, data filters, etc:
SELECT r.TableName,r.ColumnName,COUNT(*) AS [CountValue],COUNT(DISTINCT r.[Value]) AS [DistinctCountValue]
FROM #Result r
--
--WHERE r.ColumnName = 'file_id' AND r.[Value] = '1'
--
GROUP BY r.TableName,r.ColumnName
ORDER BY r.TableName,r.ColumnName
;
To use this with a where clause with this query you just have to put the where clause in the construction after the table name so if you wanted to filter on file_id='1' then you would have:
FROM ' + quotename (Table_Schema) + '.' + quotename (Table_Name) +'where file_id =''1'' '
You can add a #where variable and concatenate that with your big union construction (as part of your select ... from cols). For example:
declare #SQL nvarchar(max)
declare #where nvarchar(max) = ' where file_id = 1'
set #SQL = ''
;with cols as (
select Table_Schema, Table_Name, Column_Name, Row_Number() over(partition by Table_Schema, Table_Name
order by ORDINAL_POSITION) as RowNum
from INFORMATION_SCHEMA.COLUMNS
)
select #SQL = #SQL + case when RowNum = 1 then '' else ' union all ' end
+ ' select ''' + Column_Name + ''' as Column_Name, count(distinct ' + quotename (Column_Name) + ' ) As DistinctCountValue,
count( '+ quotename (Column_Name) + ') as CountValue FROM ' + quotename (Table_Schema) + '.' + quotename (Table_Name)
+ #where
from cols
where Table_Name = 'table_name' --print #SQL
execute (#SQL)
Note that you'll need to escape single quotes in #where if you're searching for a string. For example, declare #where nvarchar(max) = ' where state = ''CT'''.
I have a database with ~420 tables in it. All the tables have Audit columns: field1, field2, field3.
How do I write a query to check if there are any NULL values in any of these audit columns across the entire database?
Below should do a trick for you, play around if you need different output.
CREATE TABLE #Results (ColumnName nvarchar(370), ColumnValue nvarchar(3630))
SET NOCOUNT ON
DECLARE #TableName nvarchar(256) = ''
DECLARE #ColumnName nvarchar(100)
WHILE #TableName IS NOT NULL
BEGIN
SET #ColumnName = ''
-- check tables
SET #TableName =
(
SELECT MIN(QUOTENAME(TABLE_SCHEMA) + '.' + QUOTENAME(TABLE_NAME))
FROM INFORMATION_SCHEMA.TABLES
WHERE TABLE_TYPE = 'BASE TABLE'
AND QUOTENAME(TABLE_SCHEMA) + '.' + QUOTENAME(TABLE_NAME) > #TableName
AND OBJECTPROPERTY(OBJECT_ID(QUOTENAME(TABLE_SCHEMA) + '.' + QUOTENAME(TABLE_NAME)), 'IsMSShipped') = 0
)
-- check if above table has columns we want to check
WHILE (#TableName IS NOT NULL) AND (#ColumnName IS NOT NULL)
BEGIN
SET #ColumnName =
(
SELECT MIN(QUOTENAME(COLUMN_NAME))
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_SCHEMA = PARSENAME(#TableName, 2)
AND TABLE_NAME = PARSENAME(#TableName, 1)
AND COLUMN_NAME IN ('field1', 'field2', 'field3')
AND QUOTENAME(COLUMN_NAME) > #ColumnName
)
-- insert into temp table if column value is null
IF #ColumnName IS NOT NULL
INSERT INTO #Results EXEC('SELECT DISTINCT ''' + #TableName + '.' + #ColumnName + ''', LEFT(' + #ColumnName + ', 3630) FROM ' + #TableName + ' (NOLOCK) ' +' WHERE ' + #ColumnName + ' IS NULL ')
END
END
-- return results
SELECT ColumnName, ColumnValue FROM #Results
-- drop temp table
DROP TABLE #Results
I have a script that will search a database for a string and list the table name and column. I need to get it to list not just the column but the whole record so I can compare accounts in text. How would I change the script to show the entire record and not just the column?
USE powercampustest
DECLARE #SearchStr nvarchar(100) = '93335' --Five,Test
DECLARE #Results TABLE (ColumnName nvarchar(370), ColumnValue nvarchar(3630))
SET NOCOUNT ON
DECLARE #TableName nvarchar(256), #ColumnName nvarchar(128), #SearchStr2 nvarchar(110)
SET #TableName = ''
SET #SearchStr2 = QUOTENAME('%' + #SearchStr + '%','''')
WHILE #TableName IS NOT NULL
BEGIN
SET #ColumnName = ''
SET #TableName =
(
SELECT MIN(QUOTENAME(TABLE_SCHEMA) + '.' + QUOTENAME(TABLE_NAME))
FROM INFORMATION_SCHEMA.TABLES
WHERE TABLE_TYPE = 'BASE TABLE'
AND QUOTENAME(TABLE_SCHEMA) + '.' + QUOTENAME(TABLE_NAME) > #TableName
AND OBJECTPROPERTY(
OBJECT_ID(
QUOTENAME(TABLE_SCHEMA) + '.' + QUOTENAME(TABLE_NAME)
), 'IsMSShipped'
) = 0
)
WHILE (#TableName IS NOT NULL) AND (#ColumnName IS NOT NULL)
BEGIN
SET #ColumnName =
(
SELECT MIN(QUOTENAME(COLUMN_NAME))
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_SCHEMA = PARSENAME(#TableName, 2)
AND TABLE_NAME = PARSENAME(#TableName, 1)
AND DATA_TYPE IN ('char', 'varchar', 'nchar', 'nvarchar', 'int', 'decimal')
AND QUOTENAME(COLUMN_NAME) > #ColumnName
)
IF #ColumnName IS NOT NULL
BEGIN
INSERT INTO #Results
EXEC
(
'SELECT ''' + #TableName + '.' + #ColumnName + ''', LEFT(' + #ColumnName + ', 3630)
FROM ' + #TableName + ' (NOLOCK) ' +
' WHERE ' + #ColumnName + ' LIKE ' + #SearchStr2
)
END
END
END
SELECT ColumnName, ColumnValue FROM #Results
/*
ColumnName ColumnValue
[dbo].[ADDRESS].[PEOPLE_ORG_CODE_ID] P000092436
[dbo].[ADDRESS].[PEOPLE_ORG_ID] 000092436
[dbo].[ADDRESSHIERARCHYUNIQUE].[PEOPLE_ORG_CODE_ID] P000092436
[dbo].[ADDRESSSCHEDULE].[PEOPLE_ORG_CODE_ID] P000092436
[dbo].[ADDRESSSCHEDULE].[PEOPLE_ORG_CODE_ID] P000092436
[dbo].[ADDRESSSCHEDULE].[PEOPLE_ORG_CODE_ID] P000092436
[dbo].[ADDRESSSCHEDULE].[PEOPLE_ORG_CODE_ID] P000092436
[dbo].[ADDRESSSCHEDULE].[PEOPLE_ORG_ID] 000092436
[dbo].[ADDRESSSCHEDULE].[PEOPLE_ORG_ID] 000092436
[dbo].[ADDRESSSCHEDULE].[PEOPLE_ORG_ID] 000092436
First, the script you currently have is, well, kind of bad.
Loops are almost never the correct approach in SQL, nested loops are even worst.
Second, since you are searching all the tables in the database, your best option to get the entire row content is as XML. This will enable you to have a single table with only 3 columns to keep all the information you need: ColumnName, ColumnValue, and RowContent.
It will also enable you to go with a set based approach and loose the loops.
So first, you declare the search parameter and the variable that will hold your query:
DECLARE #SearchStr nvarchar(100) = 'P000092436',
#SQL nvarchar(max) = ''
Then, you use the built in views of Information_schema.Columns and Information_schema.Tables to create your query:
SELECT #SQL = #SQL +
'UNION
SELECT '''+ QUOTENAME(t.TABLE_SCHEMA) +'.'+ QUOTENAME(c.TABLE_NAME) +'.'+ QUOTENAME(c.COLUMN_NAME) +''' As ColumnName, -- This gets you the full column name
'''+ #SearchStr +''' As ColumnValue,
(SELECT * FROM '+ QUOTENAME(c.TABLE_NAME) +' WHERE '+ QUOTENAME(c.COLUMN_NAME) +' LIKE ''%'+ #SearchStr +'%'' FOR XML AUTO) -- This gets you the row content
FROM '+ QUOTENAME(c.TABLE_NAME) +'
WHERE '+ QUOTENAME(c.COLUMN_NAME) +' LIKE ''%'+ #SearchStr +'%''
'
FROM INFORMATION_SCHEMA.COLUMNS c
INNER JOIN INFORMATION_SCHEMA.TABLES t ON c.TABLE_NAME = t.TABLE_NAME
WHERE t.TABLE_TYPE = 'BASE TABLE'
Then, use STUFF to remove the first UNION:
SET #SQL = STUFF(#SQL, 1, 7, '')
No, the query this will create is something like this:
SELECT '[schema].[table1].[column1]' As ColumnName,
'P000092436' As ColumnValue,
(SELECT * FROM [table1] WHERE [column1] LIKE '%P000092436%' FOR XML AUTO)
FROM [table1]
WHERE [column1] LIKE '%P000092436%'
UNION
SELECT '[schema].[table1].[column2]' As ColumnName,
'P000092436' As ColumnValue,
(SELECT * FROM [table1] WHERE [column2] LIKE '%P000092436%' FOR XML AUTO)
FROM [table1]
WHERE [column2] LIKE '%P000092436%'
UNION....
Including all tables and all columns.
The next step is to create the temporary table to hold the results:
CREATE TABLE #SearchResults
(
ColumnName nvarchar(500),
ColumnValue nvarchar(100),
RowContent xml
)
and insert the result of this query into it:
INSERT INTO #SearchResults
EXEC(#SQL)
You can see a live demo on Rextester.
When I try to create the following stored procedure, I get this error message:
what am I doing wrong?
Create Procedure uspUpdateWithNewSSN
(
#OldSSN VARCHAR(9)
,#NewSSN VARCHAR(9)
)
AS
SET NOCOUNT ON
BEGIN
If OBJECT_ID(N'NewSSNEXIST') IS NULL
Begin
Create Table NewSSNEXIST
(
NewSSN nvarchar(9)NOT NULL
)
End
Begin
Insert into NewSSNEXIST
Exec uspCheckNewSSN #NewSSN
GO
End
If (select count(*) from NewSSNEXIST)>0
Begin
print 'social security number already used'
Truncate Table NewSSNEXIST;
Return
End
Else
--Begin update ssn
Begin
--Declare #OldSSN VARCHAR(9)
--Declare #NewSSN VARCHAR(9)
Declare #cmd VARCHAR(MAX)
--Set #OldSSN='222334444'
--Set #NewSSN='222334644'
Select #cmd = COALESCE(#cmd,'') +
'
UPDATE [' + TABLE_SCHEMA + '].[' + TABLE_NAME + '] SET [' + Column_Name + '] = ' + #NewSSN + '
WHERE [' + Column_Name + '] = ' + #OldSSN + '
'
From INFORMATION_SCHEMA.COLUMNS
Where Column_Name like 'SSN%'
OR Column_Name LIKE 'ssn%'
OR Column_Name LIKE 'ssn%'
OR Column_Name LIKE '%_ssn%'
OR Column_Name LIKE '_ocsecno'
OR Column_Name LIKE 'Ssn%';
--Select #cmd
EXEC(#cmd)
End
END
GO
Apart from GO key word in your procedure you should also look into your dynamic sql and fix it as follows
Declare #cmd VARCHAR(MAX)
Select #cmd = N' UPDATE ' + QUOTENAME(s.name) + N'.' + QUOTENAME(t.name)
+ N' SET ' + QUOTENAME(c.name) + N' = #NewSSN '
+ N' WHERE ' + QUOTENAME(c.name) + N' = #OldSSN '
From sys.tables t
Inner join sys.columns c ON t.object_id = c.object_id
Inner join sys.schemas s on t.schema_id = s.schema_id
Where c.name like 'SSN%'
OR c.name LIKE 'ssn%'
OR c.name LIKE 'ssn%'
OR c.name LIKE '%_ssn%'
OR c.name LIKE '_ocsecno'
OR c.name LIKE 'Ssn%';
--Select #cmd
EXEC sp_executesql #cmd
,N' #OldSSN VARCHAR(9) ,#NewSSN VARCHAR(9)'
,#OldSSN
,#NewSSN
I found a similar entry thatw as only looking for the length of varchar columns. The answer provided was:
SELECT COLUMN_NAME, CHARACTER_MAXIMUM_LENGTH AS DefinitionMaxLength
, N'SELECT #resultOUT = MAX(LEN(' + COLUMN_NAME + ')) FROM ' + COL.TABLE_NAME [query]
, ORDINAL_POSITION
, 0 [ActualMaxLength]
INTO #tmp
FROM INFORMATION_SCHEMA.COLUMNS as COL
WHERE COL.DATA_TYPE ='varchar' AND COL.TABLE_NAME='TableSRC';
DECLARE
#pos int = (SELECT MIN(ORDINAL_POSITION) FROM #tmp),
#result int,
#query NVARCHAR(MAX) = N'',
#param_def NVARCHAR(100) = N'#resultOUT int OUTPUT';
WHILE EXISTS (SELECT * FROM #tmp WHERE ORDINAL_POSITION > #pos)
BEGIN
SELECT #query = [query] FROM #tmp WHERE ORDINAL_POSITION = #pos;
EXECUTE sp_executesql #query, #param_def, #resultOUT = #result OUTPUT;
UPDATE #tmp SET [ActualMaxLength] = ISNULL(#result, 0) WHERE ORDINAL_POSITION = #pos;
SET #pos = (SELECT MIN(ORDINAL_POSITION) FROM #tmp WHERE ORDINAL_POSITION > #pos);
END
SELECT COLUMN_NAME, DefinitionMaxLength, ActualMaxLength FROM #tmp;
DROP TABLE #tmp;
How do I modify this to get the length of the longest entry in all columns, irrespective of format?
Is this what you're looking for? (If I've given you an answer and it's not what you're looking for, please update the question with some data samples.)
NOTE: the query generated may be really slow. You've been warned.
DECLARE #Tsql NVARCHAR(MAX)
SET #Tsql = ''
SELECT #Tsql = #Tsql + 'SELECT ''' + c.TABLE_SCHEMA + ''' AS TABLE_SCHEMA, ' +
'''' + c.TABLE_NAME + ''' AS TABLE_NAME, ' +
'''' + CAST(c.CHARACTER_MAXIMUM_LENGTH AS VARCHAR) + ''' AS CHARACTER_MAXIMUM_LENGTH, ' +
'MAX(LEN([' + c.COLUMN_NAME + '])) AS MaxDataLength ' +
'FROM [' + c.TABLE_SCHEMA + '].[' + c.TABLE_NAME + '] UNION' + CHAR(13) + CHAR(10)
FROM INFORMATION_SCHEMA.COLUMNS c
JOIN INFORMATION_SCHEMA.TABLES t
ON t.TABLE_SCHEMA = c.TABLE_SCHEMA
AND t.TABLE_NAME = c.TABLE_NAME
--Include/exclude views as desired
WHERE t.TABLE_TYPE IN ('BASE TABLE', 'VIEW')
--Include/exclude nvarchar data type as desired
AND c.DATA_TYPE IN ('nvarchar', 'varchar')
--Trim off the last UNION
SET #Tsql = RTRIM(#Tsql)
SET #Tsql = LEFT(#Tsql, LEN(#Tsql) - 7)
PRINT #Tsql
--Uncomment when ready to proceed
--EXEC (#Tsql)