I am having problems with a ETL script that is syntactically correct but does not run; it is the where clause variable which uses data from a metadata table. I've redacted all but the relevant code:
DECLARE #table VARCHAR(200) = 'MyTable'
DECLARE #w_clause NVARCHAR(MAX)
DECLARE #j_clause NVARCHAR(MAX)
-- Get non-primary key column names
DROP TABLE IF EXISTS #temp0
SELECT [Name], FieldType
INTO #temp0
FROM dbo.metadata
WHERE [Table] = #table AND
ETL_Active = 1 AND
[Name] IS NOT NULL AND
PrimaryKey <> 1
-- Get primary key column names
DROP TABLE IF EXISTS #temp1
SELECT [Name]
INTO #temp1
FROM dbo.metadata
WHERE [Table] = #table AND
ETL_Active = 1 AND
[Name] IS NOT NULL AND
PrimaryKey = 1
SELECT #w_clause = COALESCE(#w_clause+' OR ','') + 'COALESCE(prd.'+ [Name] +',' + CASE WHEN FieldType IN('char','varchar', 'nvarchar', 'nchar') THEN '''' ELSE 0 END +')' FROM #temp0
PRINT #w_clause
SELECT #j_clause = COALESCE(#j_clause+' AND ','') + 'prd.'+ [Name] + ' = ' + 'stg.' + [Name] FROM #temp1
PRINT #j_clause
The error I'm getting:
Msg 245, Level 16, State 1, Line xx
Conversion failed when converting the varchar value ')' to data type int.
This occurs when I attempt the COALESCE(prd dynamic SQL assignment to the variable for the WHERE clause. This has me stomped so any help is appreciated. Thanks.
You should use CONCAT instead of + for string concatenation. With + you have to cast both arguments to string, and replace NULL with ''. CONCAT does both for you.
That should be:
SELECT #w_clause = COALESCE(#w_clause+' OR ','') + 'COALESCE(prd.'+ [Name] +',' + CASE
WHEN FieldType IN('char','varchar', 'nvarchar', 'nchar') THEN '''' ELSE '0' END +')'
or
SELECT #w_clause = concat(COALESCE(#w_clause+' OR ',''), 'COALESCE(prd.', [Name], ',', CASE WHEN FieldType IN('char','varchar', 'nvarchar', 'nchar') THEN '''' ELSE '0' END ')' )
Related
I have this sqlserver query to generate a query text that I can use to update all columns types from nvarchar/ nchar to varchar/char.
SELECT
AlterSql = CONCAT('ALTER TABLE ', TABLE_SCHEMA, '.', TABLE_NAME, ' ALTER COLUMN ', COLUMN_NAME, ' ', SUBSTRING(DATA_TYPE, 2, LEN(DATA_TYPE)), '(', CHARACTER_MAXIMUM_LENGTH, ') ', CASE IS_NULLABLE WHEN 'YES' THEN 'NULL' WHEN 'NO' THEN 'NOT NULL' ELSE 'ERROR' END, CHAR(13), CHAR(10), 'GO')
, *
FROM INFORMATION_SCHEMA.COLUMNS
WHERE DATA_TYPE IN ('nchar', 'nvarchar')
The issue is when I running it, an error displayed that it can't cast the columns exists in indexs.
So I should test if the column exist in index or no, then I delete the index, covert and recreate my index.
How can I modify my script to do it ?
Thanks a lot
First, you should check CONSTRAINT exit or not, if so then drop it as following
IF EXISTS (
SELECT * FROM dbo.sysobjects WHERE id =
OBJECT_ID(N'[CONSTRAINTNAME]') AND type = 'D'
)
BEGIN
ALTER TABLE [dbo].[_COMPANY] DROP CONSTRAINT [CONSTRAINTNAME]
END
Then use your statement
SELECT
AlterSql = CONCAT('ALTER TABLE ', TABLE_SCHEMA, '.', TABLE_NAME, ' ALTER COLUMN ', COLUMN_NAME, ' ', SUBSTRING(DATA_TYPE, 2, LEN(DATA_TYPE)), '(', CHARACTER_MAXIMUM_LENGTH, ') ', CASE IS_NULLABLE WHEN 'YES' THEN 'NULL' WHEN 'NO' THEN 'NOT NULL' ELSE 'ERROR' END, CHAR(13), CHAR(10), 'GO')
, *
FROM INFORMATION_SCHEMA.COLUMNS
WHERE DATA_TYPE IN ('nchar', 'nvarchar')
The procedure sp_helpindex does, what you want, you can exec it into table in memory and proceed
declare #t table
(
index_name sysname collate database_default NOT NULL,
index_description nvarchar(512) ,
index_keys sysname
)
insert into #t exec sp_helpIndex 'items'
--do whatever with this data here
select * from #t
And i join to Sean Lange question: why you need it?
I tried something like this, which did not work:
SELECT COLUMN_NAME, COUNT(COLUMN_NAME) AS COUNT_NOT_NULL FROM myDB.INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = 'myTable';
Obviously, the "information-meta-table" doesn't have the data from myTable to count. So do I need some sort of JOIN? Do I need to declare some variable and use some iteration?
The weird part is that this solution works for some tables, but not for others (it counts NULLs): https://stackoverflow.com/a/24411738/8055476
First I created a table with some sample data
IF OBJECT_ID('COuntNotNull') IS NOT NULL
Drop Table COuntNotNull
;With cte(Column1 , Column2 , Column3 , Column4 )
AS
(
SELECT 'X' , ' X' , NULL , 'X' Union all
SELECT NULL , NULL , 'X' , 'X' Union all
SELECT NULL , NULL , NULL , NULL
)
SELECT * INTO COUNTNOTNULL FROM cte
The below code get the column names dynamically For given table and get count of non null values
DECLARE #DynamicColms NVARCHAR(max)
,#CaseDynamicColms NVARCHAR(max)
,#Sql NVARCHAR(max)
,#TableName VARCHAR(100) = 'COuntNotNull'--Here give Your TableName
SELECT #DynamicColms = STUFF((
SELECT ', ' + COLUMN_NAME
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = #TableName
FOR XML PATH('')
), 1, 1, '')
--SELECT #DynamicColms
SELECT #CaseDynamicColms = STUFF((
SELECT '+ ' + 'CASE WHEN ' + COLUMN_NAME + ' IS NOT NULL THEN 1 ELSE 0 END'
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = #TableName
FOR XML PATH('')
), 1, 1, '')
--SELECT #CaseDynamicColms
SET #Sql = 'SELECT ' + #DynamicColms + ',' + #CaseDynamicColms + CHAR(13) + CHAR(10) + ' AS COUNT_NOT_NULL FROM ' + #TableName
PRINT #Sql
EXECUTE (#Sql)
Result
Column1 Column2 Column3 Column4 COUNT_NOT_NULL
-------------------------------------------------
X X NULL X 3
NULL NULL X X 2
NULL NULL NULL NULL 0
Note: this is NOT asking
how to select a string where the column name is known.
how to select a string in ALL tables (all google results relate to this one)
This is asking search in only ONE table.
SQL returns error info conversion failed when converting the nvarchar value S3N2V5.
I want to locate the column name where S3N2V5 exists.
No manual methods please. There are 1000000 columns.
Input S3N2V5
Output columnname1ofthistable
Assuming I understand the question, here is one way to get a list of all columns from a single table that contain the search value, using CASE:
Create and populate sample table (Please save us this step in your future questions)
CREATE TABLE T
(
COL1 char(3),
COL2 char(3),
COL3 char(3),
COL4 int
)
INSERT INTO T VALUES
('abc', 'def', 'nop', 1),
('klm', 'nop', 'qrs', 2),
('tuv', 'wzy', 'zab', 3)
Build your dynamic sql:
DECLARE #Search nvarchar(5) = 'nop'
DECLARE #SQL nvarchar(max) = 'SELECT CASE #Search'
SELECT #SQL = #SQL +' WHEN '+ COLUMN_NAME + ' THEN '''+ COLUMN_NAME +''''
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = 'T'
AND LOWER(DATA_TYPE) LIKE '%char%' -- only search char, varchar, nchar and nvarchar columns
SELECT #SQL = 'SELECT ColumnName FROM (' +
#SQL + ' END As ColumnName FROM T) x WHERE ColumnName IS NOT NULL'
Execute: (Note that using sp_executeSQL is SQL Injection safe, since we do not concatenate the search parameter into the query, but using it as a parameter)
EXEC sp_executeSQL #SQL, N'#Search nvarchar(5)', #Search
Results:
ColumnName
COL3
COL2
DECLARE #MyValue NVarChar(4000) = 'searchstring';
SELECT S.name SchemaName, T.name TableName
INTO #T
FROM sys.schemas S INNER JOIN
sys.tables T ON S.schema_id = T.schema_id;
WHILE (EXISTS (SELECT * FROM #T)) BEGIN
DECLARE #SQL NVarChar(4000) = 'SELECT * FROM $$TableName WHERE (0 = 1) ';
DECLARE #TableName NVarChar(1000) = (
SELECT TOP 1 SchemaName + '.' + TableName FROM #T
);
SELECT #SQL = REPLACE(#SQL, '$$TableName', #TableName);
DECLARE #Cols NVarChar(4000) = '';
SELECT
#Cols = COALESCE(#Cols + 'OR CONVERT(NVarChar(4000), ', '') + C.name + ') = CONVERT(NVarChar(4000), ''$$MyValue'') '
FROM sys.columns C
WHERE C.object_id = OBJECT_ID(#TableName);
SELECT #Cols = REPLACE(#Cols, '$$MyValue', #MyValue);
SELECT #SQL = #SQL + #Cols;
select substring(#SQL,charindex('.',#SQL)+1,charindex('(',#SQL)-charindex('.',#SQL)-8) as 'TableName'
EXECUTE(#SQL);
DELETE FROM #T
WHERE SchemaName + '.' + TableName = #TableName;
END;
DROP TABLE #T;
This will give you table Name and the entire row from the table which contains the searchstring.
Apart from anwswers mentioned in post : Older Post
1) (using column name) SELECT table_name,table_schema FROM INFORMATION_SCHEMA.COLUMNS WHERE column_name='sort_method';
I hope better you can take dump ( in.sql format ) and you can easily search the content using IDEs like N++.
This is my code.
Create Procedure Merge_tables
#tablename varchar(20)
As
create table temp1 ( column_name varchar(20) )
insert into temp1 (column_name)
select Column_Name
from INFORMATION_SCHEMA.COLUMNS
where TABLE_NAME = 'result'
intersect
select Column_Name
from INFORMATION_SCHEMA.COLUMNS
where TABLE_NAME = '#tablename'
Declare #name varchar(max)
Declare #concat varchar(max)
set #concat = ''
while (select COUNT(*) from temp1)>0
Begin
set #name = (select top 1 * from temp1)
set #concat = #concat + #name + ','
select #concat as combined
delete temp1 where temp1.column_name = #name
End
Merge result as T
using #tablename as S on T.TXN_KEY = S.TXN_KEY
when not matched then
insert ('+#concat+') values ('+#concat+')
when matched then
update set T.TXN_KEY = S.TXN_KEY(?)
Table temp1 is storing the common column names. Only specific thing is the key to be matched upon which is TXN_KEY. Rest everything else is generic. Towards the end of the while loop #concat has the combined column names separated by a comma.
The error I get in the merge statement is:
Msg 207, Level 16, State 1, Line 17
Invalid column name '+#concat+'
Also, for update statement to work #concat string needs to be split to set values for individual columns. I have been trying to crack this for a while now.
Thanks in advance.
Ok you have to pass your target table and source table, to define Primary keys. This works, I have tested it a lot.
CREATE PROCEDURE Merge_Tables
(
#tablenameTarget VARCHAR(128),
#tablenameSource VARCHAR(128)
)
AS
SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED
SET NOCOUNT ON
--variables
DECLARE #targetPK VARCHAR(128),
#sourcePK VARCHAR(128),
#columns VARCHAR(MAX),
#sql VARCHAR(8000)
--temp table for the primary keys
CREATE TABLE #tableMapping
(
TargetPK VARCHAR(128),
SourcePK VARCHAR(128),
Columns VARCHAR(MAX)
)
--temp table for the comma delimted columns
CREATE TABLE #Columns
(
ColumnsUpdate VARCHAR(MAX)
)
--get the primary keys for both target and source tables. so we make sure we dont update or insert them
INSERT INTO #tableMapping
SELECT cu.COLUMN_NAME,
sourcePK.COLUMN_NAME,
data.columns
FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS ta
INNER JOIN INFORMATION_SCHEMA.CONSTRAINT_COLUMN_USAGE cu
ON cu.Constraint_name = ta.CONSTRAINT_NAME
OUTER APPLY
(
SELECT cus.COLUMN_NAME
FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS tas
INNER JOIN INFORMATION_SCHEMA.CONSTRAINT_COLUMN_USAGE cus
ON cus.Constraint_name = ta.CONSTRAINT_NAME
WHERE tas.Table_Name = #tablenameSource
AND ta.CONSTRAINT_TYPE = 'Primary Key'
) AS sourcePK
OUTER APPLY
(
SELECT STUFF(
(
SELECT ',' + Column_Name
FROM INFORMATION_SCHEMA.Columns Columns
WHERE ta.Table_Name = Columns.Table_Name
AND Columns.Column_Name <> cu.COLUMN_NAME --dont get the primary key
ORDER BY Column_Name
FOR XML PATH ('')
), 1, 1, '') columns
) AS data
WHERE ta.Table_Name = #tablenameTarget
AND ta.CONSTRAINT_TYPE = 'Primary Key'
--populate the variables so we can use it in our dynamic merge statement
SELECT #targetPK = TargetPK,
#sourcePK = SourcePK,
#columns = Columns
FROM #tableMapping
--make sure the rows match from the source and target tables, and make it in a comma delimted string
INSERT INTO #Columns
SELECT
STUFF(
(
SELECT ',' + 'TRGT.' + Column_Name + ' = SRCE.' + COLUMN_NAME
FROM INFORMATION_SCHEMA.Columns Columns
WHERE t.Table_Name = Columns.Table_Name
AND Column_Name <> #targetPK
ORDER BY Column_Name
FOR XML PATH ('')
), 1, 1, ''
)Columns
FROM INFORMATION_SCHEMA.Columns t
INNER JOIN INFORMATION_SCHEMA.TABLE_CONSTRAINTS ta
ON ta.TABLE_NAME = t.TABLE_NAME
INNER JOIN INFORMATION_SCHEMA.CONSTRAINT_COLUMN_USAGE ccu
ON ccu.Constraint_name = ta.CONSTRAINT_NAME
WHERE t.Table_Name = '' + #tablenameTarget + ''
INTERSECT
SELECT
STUFF(
(
SELECT ',' + 'TRGT.' + Column_Name + ' = SRCE.' + COLUMN_NAME
FROM INFORMATION_SCHEMA.Columns Columns
WHERE t.Table_Name = Columns.Table_Name
AND Column_Name <> #sourcePK
ORDER BY Column_Name
FOR XML PATH ('')
), 1, 1, ''
)Columns
FROM INFORMATION_SCHEMA.Columns t
WHERE t.Table_Name = '' + #tablenameSource + ''
--use dynamic sql for our merge statement
SET #sql = 'MERGE ' + #tablenameTarget + ' AS TRGT
USING ' + #tablenameSource + ' AS SRCE
ON SRCE.' + #sourcePK + ' = TRGT.' + #targetPK + '
WHEN MATCHED THEN UPDATE SET ' + (SELECT ColumnsUpdate FROM #Columns)+ '
WHEN NOT MATCHED BY TARGET THEN
INSERT (' + (SELECT #Columns)+ ')
VALUES (' + (SELECT 'SRCE.' + REPLACE(#columns, ',',',SRCE.')) + ')
WHEN NOT MATCHED BY SOURCE THEN
DELETE;'
EXEC (#sql)
DROP TABLE #Columns
DROP TABLE #tableMapping
1) Firstly why do you need the INTERSECT?
2) Secondly nothing will be inserted into temp1, because you say WHERE
TABLE_NAME = '#tablename'. There will never be a table name #tablename.
Change it to WHERE TABLE_NAME = '' + #tablename + ''
3)Also '+#concat+' needs to be '' + #concat + ''
4) I really think the merge statement will need to be in Dynamic SQL, for you to split the #concat columns.
I have a large table with 500 columns and 100M rows. Based on a small sample, I believe only about 50 of the columns contain any values, and the other 450 contain only NULL values. I want to list the columns that contain no data.
On my current hardware, it would take about 24 hours to query every column (select count(1) from tab where col_n is not null)
Is there a less expensive way to determine that a column is completely empty/NULL?
What about this:
SELECT
SUM(CASE WHEN column_1 IS NOT NULL THEN 1 ELSE 0) column_1_count,
SUM(CASE WHEN column_2 IS NOT NULL THEN 1 ELSE 0) column_2_count,
...
FROM table_name
?
You can easily create this query if you use INFORMATION_SCHEMA.COLUMNS table.
EDIT:
Another idea:
SELECT MAX(column_1), MAX(column_2),..... FROM table_name
If result contains value, column is populated. It should require one table scan.
Try this one -
DDL:
IF OBJECT_ID ('dbo.test2') IS NOT NULL
DROP TABLE dbo.test2
CREATE TABLE dbo.test2
(
ID BIGINT IDENTITY(1,1) PRIMARY KEY
, Name VARCHAR(10) NOT NULL
, IsCitizen BIT NULL
, Age INT NULL
)
INSERT INTO dbo.test2 (Name, IsCitizen, Age)
VALUES
('1', 1, NULL),
('2', 0, NULL),
('3', NULL, NULL)
Query 1:
DECLARE
#TableName SYSNAME
, #ObjectID INT
, #SQL NVARCHAR(MAX)
SELECT
#TableName = 'dbo.test2'
, #ObjectID = OBJECT_ID(#TableName)
SELECT #SQL = 'SELECT' + CHAR(13) + STUFF((
SELECT CHAR(13) + ', [' + c.name + '] = ' +
CASE WHEN c.is_nullable = 0
THEN '0'
ELSE 'CASE WHEN ' + totalrows +
' = SUM(CASE WHEN [' + c.name + '] IS NULL THEN 1 ELSE 0 END) THEN 1 ELSE 0 END'
END
FROM sys.columns c WITH (NOWAIT)
CROSS JOIN (
SELECT totalrows = CAST(MIN(p.[rows]) AS VARCHAR(50))
FROM sys.partitions p
WHERE p.[object_id] = #ObjectID
AND p.index_id IN (0, 1)
) r
WHERE c.[object_id] = #ObjectID
FOR XML PATH(''), TYPE).value('.', 'NVARCHAR(MAX)'), 1, 2, ' ') + CHAR(13) + 'FROM ' + #TableName
PRINT #SQL
EXEC sys.sp_executesql #SQL
Output 1:
SELECT
[ID] = 0
, [Name] = 0
, [IsCitizen] = CASE WHEN 3 = SUM(CASE WHEN [IsCitizen] IS NULL THEN 1 ELSE 0 END) THEN 1 ELSE 0 END
, [Age] = CASE WHEN 3 = SUM(CASE WHEN [Age] IS NULL THEN 1 ELSE 0 END) THEN 1 ELSE 0 END
FROM dbo.test2
Query 2:
DECLARE
#TableName SYSNAME
, #SQL NVARCHAR(MAX)
SELECT #TableName = 'dbo.test2'
SELECT #SQL = 'SELECT' + CHAR(13) + STUFF((
SELECT CHAR(13) + ', [' + c.name + '] = ' +
CASE WHEN c.is_nullable = 0
THEN '0'
ELSE 'CASE WHEN '+
'MAX(CAST([' + c.name + '] AS CHAR(1))) IS NULL THEN 1 ELSE 0 END'
END
FROM sys.columns c WITH (NOWAIT)
WHERE c.[object_id] = OBJECT_ID(#TableName)
FOR XML PATH(''), TYPE).value('.', 'NVARCHAR(MAX)'), 1, 2, ' ') + CHAR(13) + 'FROM ' + #TableName
PRINT #SQL
EXEC sys.sp_executesql #SQL
Output 2:
SELECT
[ID] = 0
, [Name] = 0
, [IsCitizen] = CASE WHEN MAX(CAST([IsCitizen] AS CHAR(1))) IS NULL THEN 1 ELSE 0 END
, [Age] = CASE WHEN MAX(CAST([Age] AS CHAR(1))) IS NULL THEN 1 ELSE 0 END
FROM dbo.test2
Results:
ID Name IsCitizen Age
----------- ----------- ----------- -----------
0 0 0 1
Could you check if colums idexing will help you reach some performance improve
CREATE UNIQUE NONCLUSTERED INDEX IndexName ON dbo.TableName(ColumnName)
WHERE ColumnName IS NOT NULL;
GO
SQL server query to get the list of columns in a table along with Data types, NOT NULL, and PRIMARY KEY constraints
Run SQL in best answer of above questions and generate a new query like below.
Select ISNULL(column1,1), ISNULL(column2,1), ISNULL(column3,1) from table
You would not need to 'count' all of the 100M records. When you simply back out of the query with a TOP 1 as soon as you hit a column with a not-null value, would save a lot of time while providing the same information.
500 Columns?!
Ok, the right answer to your question is: normalize your table.
Here's what happening for the time being:
You don't have an index on that column so SQL Server has to do a full scan of your humongous table.
SQL Server will certainly fully read every row (it means every columns even if you're only interested in one).
And since your row are most likely over 8kb... http://msdn.microsoft.com/en-us/library/ms186981%28v=sql.105%29.aspx
Seriously, normalize your table and if needed split it horizontally (put "theme grouped" columns inside separate table, to only read them when you need them).
EDIT: You can rewrite your query like this
select count(col_n) from tab
and if you want to get all columns at once (better):
SELECT
COUNT(column_1) column_1_count,
COUNT(column_2) column_2_count,
...
FROM table_name
If most records are not null maybe you can mix some of the approach suggested (for example check only nullable fields) with this:
if exists (select * from table where field is not null)
this should speed up the search because exists stops the search as soon as condition is met, in this example a single not null record is enough to decide the status of the field.
If the field has an index this should be almost instant.
Normally adding top 1 to this query is not needed because the query optimizer knows that you do not need to retrieve all the matching records.
You can use this stored procedure to the trick You need to provide the table name you wish to query note that if you'll pass to procedure the #exec parameter = 1 it will execute the select query
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE PROCEDURE [dbo].[SP_SELECT_NON_NULL_COLUMNS] ( #tablename varchar (100)=null, #exec int =0)
AS BEGIN
SET NOCOUNT ON
IF #tablename IS NULL
RAISERROR('CANT EXECUTE THE PROC, TABLE NAME IS MISSING',16 ,1)
ELSE
BEGIN
IF OBJECT_ID('tempdb..#table') IS NOT NULL DROP TABLE #table
DECLARE #i VARCHAR (max)=''
DECLARE #sentence VARCHAR (max)=''
DECLARE #SELECT VARCHAR (max)
DECLARE #LocalTableName VARCHAR(50) = '['+#tablename+']'
CREATE TABLE #table (ColumnName VARCHAR (max))
SELECT #i+=
' IF EXISTS ( SELECT TOP 1 '+column_name+' FROM ' +#LocalTableName+' WHERE ' +column_name+
' '+'IS NOT NULL) INSERT INTO #table VALUES ('''+column_name+''');'
FROM INFORMATION_SCHEMA.COLUMNS WHERE table_name=#tablename
INSERT INTO #table
EXEC (#i)
SELECT #sentence = #sentence+' '+columnname+' ,' FROM #table
DROP TABLE #table
IF #exec=0
BEGIN
SELECT 'SELECT '+ LTRIM (left (#sentence,NULLIF(LEN (#sentence)-1,-1)))+
+' FROM ' +#LocalTableName
END
ELSE
BEGIN
SELECT #SELECT= 'SELECT '+ LTRIM (left (#sentence,NULLIF(LEN (#sentence)-1,-1)))+
+' FROM '+#LocalTableName
EXEC (#SELECT)
END
END
END
Use it like this:
EXEC [dbo].[SP_SELECT_NON_NULL_COLUMNS] 'YourTableName' , 1