I'm trying to insert a dataframe into a snowflake table using the pandas connector and am getting permission issues, but using the "normal" snowflake connector works fine.
import snowflake.connector as snow
from snowflake.connector.pandas_tools import *
cur = self.conn.cursor()
my_schema = "my_schema"
my_table = "my_table"
cur.execute(f"""INSERT INTO {my_schema}.{my_table}(load_date, some_id)
values (current_timestamp, 'xxx')""")
write_pandas(self.conn, daily_epc_df, table_name=my_table, schema=my_schema)
But I'm getting
File "/Users/abohr/virtualenv/peak38/lib/python3.8/site-packages/snowflake/connector/errors.py", line 85, in default_errorhandler
raise error_class(
snowflake.connector.errors.ProgrammingError: 001757 (42601): SQL compilation error:
Table '"my_schema"."my_table"' does not exist
the same connection can insert and then doesn't work on the same table.
I also tried
df.to_sql(..., method=pd_writer)
and get
pandas.io.sql.DatabaseError: Execution failed on sql 'SELECT name FROM sqlite_master WHERE type='table' AND name=?;': not all arguments converted during string formatting
Why is it talking about sqlite_master if I'm trying to connect to Snowflake? Do the pandas functions require different connections?
my libs:
name = "snowflake-connector-python"
version = "2.3.3"
description = "Snowflake Connector for Python"
category = "main"
optional = false
python-versions = ">=3.5"
[[package]]
name = "pandas"
version = "1.0.5"
description = "Powerful data structures for data analysis, time series, and statistics"
category = "main"
optional = false
python-versions = ">=3.6.1"```
on Python 3.8
I found my issue - I added quote_identifiers=False to
write_pandas(self.conn, daily_epc_df, table_name=my_table, schema=my_schema, quote_identifiers=False)
and now it works.
But seems very wrong that the default behavior would break, I'm still suspicious.
Hmm I've done something superior I think to pandas ...
Tell me if you agree ...
CREATE OR REPLACE PROCEDURE DEMO_DB.PUBLIC.SNOWBALL(
db_name STRING,
schema_name STRING,
snowball_table STRING,
max_age_days FLOAT,
limit FLOAT
)
RETURNS VARIANT
LANGUAGE JAVASCRIPT
COMMENT = 'Collects table and column stats.'
EXECUTE AS OWNER
AS
$$
var validLimit = Math.max(LIMIT, 0); // prevent SQL syntax error caused by negative numbers
var sqlGenerateInserts = `
WITH snowball_tables AS (
SELECT CONCAT_WS('.', table_catalog, table_schema, table_name) AS full_table_name, *
FROM IDENTIFIER(?) -- <<DB_NAME>>.INFORMATION_SCHEMA.TABLES
),
snowball_columns AS (
SELECT CONCAT_WS('.', table_catalog, table_schema, table_name) AS full_table_name, *
FROM IDENTIFIER(?) -- <<DB_NAME>>.INFORMATION_SCHEMA.COLUMNS
),
snowball AS (
SELECT table_name, MAX(stats_run_date_time) AS stats_run_date_time
FROM IDENTIFIER(?) -- <<SNOWBALL_TABLE>> table
GROUP BY table_name
)
SELECT full_table_name, aprox_row_count,
CONCAT (
'INSERT INTO IDENTIFIER(''', ?, ''') ', -- SNOWBALL table
'(table_name,total_rows,table_last_altered,table_created,table_bytes,col_name,',
'col_data_type,col_hll,col_avg_length,col_null_cnt,col_min,col_max,col_top,col_mode,col_avg,stats_run_date_time)',
'SELECT ''', full_table_name, ''' AS table_name, ',
table_stats_sql,
', ARRAY_CONSTRUCT( ', col_name, ') AS col_name',
', ARRAY_CONSTRUCT( ', col_data_type, ') AS col_data_type',
', ARRAY_CONSTRUCT( ', col_hll, ') AS col_hll',
', ARRAY_CONSTRUCT( ', col_avg_length, ') AS col_avg_length',
', ARRAY_CONSTRUCT( ', col_null_cnt, ') AS col_null_cnt',
', ARRAY_CONSTRUCT( ', col_min, ') AS col_min',
', ARRAY_CONSTRUCT( ', col_max, ') AS col_max',
', ARRAY_CONSTRUCT( ', col_top, ') AS col_top',
', ARRAY_CONSTRUCT( ', col_MODE, ') AS col_MODE',
', ARRAY_CONSTRUCT( ', col_AVG, ') AS col_AVG',
', CURRENT_TIMESTAMP() AS stats_run_date_time ',
' FROM ', quoted_table_name
) AS insert_sql
FROM (
SELECT
tbl.full_table_name,
tbl.row_count AS aprox_row_count,
CONCAT ( '"', col.table_catalog, '"."', col.table_schema, '"."', col.table_name, '"' ) AS quoted_table_name,
CONCAT (
'COUNT(1) AS total_rows,''',
IFNULL( tbl.last_altered::VARCHAR, 'NULL'), ''' AS table_last_altered,''',
IFNULL( tbl.created::VARCHAR, 'NULL'), ''' AS table_created,',
IFNULL( tbl.bytes::VARCHAR, 'NULL'), ' AS table_bytes' ) AS table_stats_sql,
LISTAGG (
CONCAT ('''', col.full_table_name, '.', col.column_name, '''' ), ', '
) AS col_name,
LISTAGG ( CONCAT('''', col.data_type, '''' ), ', ' ) AS col_data_type,
LISTAGG ( CONCAT( ' HLL(', '"', col.column_name, '"',') ' ), ', ' ) AS col_hll,
LISTAGG ( CONCAT( ' AVG(ZEROIFNULL(LENGTH(', '"', col.column_name, '"','))) ' ), ', ' ) AS col_avg_length,
LISTAGG ( CONCAT( ' SUM( IFF( ', '"', col.column_name, '"',' IS NULL, 1, 0) ) ' ), ', ') AS col_null_cnt,
LISTAGG ( IFF ( col.data_type = 'NUMBER', CONCAT ( ' MODE(', '"', col.column_name, '"', ') ' ), 'NULL' ), ', ' ) AS col_MODE,
LISTAGG ( IFF ( col.data_type = 'NUMBER', CONCAT ( ' MIN(', '"', col.column_name, '"', ') ' ), 'NULL' ), ', ' ) AS col_min,
LISTAGG ( IFF ( col.data_type = 'NUMBER', CONCAT ( ' MAX(', '"', col.column_name, '"', ') ' ), 'NULL' ), ', ' ) AS col_max,
LISTAGG ( IFF ( col.data_type = 'NUMBER', CONCAT ( ' AVG(', '"', col.column_name,'"',') ' ), 'NULL' ), ', ' ) AS col_AVG,
LISTAGG ( CONCAT ( ' APPROX_TOP_K(', '"', col.column_name, '"', ', 100, 10000)' ), ', ' ) AS col_top
FROM snowball_tables tbl JOIN snowball_columns col ON col.full_table_name = tbl.full_table_name
LEFT OUTER JOIN snowball sb ON sb.table_name = tbl.full_table_name
WHERE (tbl.table_catalog, tbl.table_schema) = (?, ?)
AND ( sb.table_name IS NULL OR sb.stats_run_date_time < TIMESTAMPADD(DAY, - FLOOR(?), CURRENT_TIMESTAMP()) )
--AND tbl.row_count > 0 -- NB: also excludes views (table_type = 'VIEW')
GROUP BY tbl.full_table_name, aprox_row_count, quoted_table_name, table_stats_sql, stats_run_date_time
ORDER BY stats_run_date_time NULLS FIRST )
LIMIT ` + validLimit;
var tablesAnalysed = [];
var currentSql;
try {
currentSql = sqlGenerateInserts;
var generateInserts = snowflake.createStatement( {
sqlText: currentSql,
binds: [
`"${DB_NAME}".information_schema.tables`,
`"${DB_NAME}".information_schema.columns`,
SNOWBALL_TABLE, SNOWBALL_TABLE,
DB_NAME, SCHEMA_NAME, MAX_AGE_DAYS, LIMIT
]
} );
var insertStatements = generateInserts.execute();
// loop over generated INSERT statements and execute them
while (insertStatements.next()) {
var tableName = insertStatements.getColumnValue('FULL_TABLE_NAME');
currentSql = insertStatements.getColumnValue('INSERT_SQL');
var insertStatement = snowflake.createStatement( {
sqlText: currentSql,
binds: [ SNOWBALL_TABLE ]
} );
var insertResult = insertStatement.execute();
tablesAnalysed.push(tableName);
}
return { result: "SUCCESS", analysedTables: tablesAnalysed };
}
catch (err) {
return {
error: err,
analysedTables: tablesAnalysed,
sql: currentSql
};
}
$$;
I have the following details to prepare string as shown below in the expected result.
Given:
DECLARE #VValue VARCHAR(MAX) = 'John Dee|Mak Don'
DECLARE #VColumns VARCHAR(MAX) = 'FName|LName'
DECLARE #VCondition VARCHAR(500) = 'OR'
DECLARE #VPattern VARCHAR(MAX) = 'Start|Exact'
DECLARE #VCheck VARCHAR(MAX) = '1|0'
DECLARE #String VARCHAR(MAX) = ''
Expected result:
(
ISNULL(PATINDEX(''John%'', FName), ''0'') > 0 AND
ISNULL(PATINDEX(''Dee%'', FName), ''0'') > 0
)
OR
(
ISNULL(PATINDEX(''Mak'', LName), ''0'') +
ISNULL(PATINDEX(''Don'', LName), ''0'') > 0
)
My attempt:
SET #VCondition = '|'+#VCondition;
SET #String = REPLACE('('+REPLACE(STUFF((SELECT N') '+DS3.Item+' (' + NCHAR(10) +
STUFF((SELECT
CASE WHEN DS4.Item = 'Start' AND DS5.Item = 0
THEN
N' + ' +NCHAR(10)+
N'ISNULL(PATINDEX('''''+DSn.Item +'%'''''+ N',' + DSc.Item + N'),''''0'''')'
WHEN DS4.Item = 'Start' AND DS5.Item = 1
THEN
N' > 0 AND ' +NCHAR(10)+
N'ISNULL(PATINDEX('''''+DSn.Item +'%'''''+ N',' + DSc.Item + N'),''''0'''')'
ELSE
''
END
+
CASE WHEN DS4.Item = 'Exact' AND DS5.Item = 0
THEN
N' + ' +NCHAR(10)+
N'ISNULL(PATINDEX('''''+DSn.Item +''''''+ N',' + DSc.Item + N'),''''0'''')'
WHEN DS4.Item = 'Exact' AND DS5.Item = 1
THEN
N' > 0 AND ' +NCHAR(10)+
N'ISNULL(PATINDEX('''''+DSn.Item +''''''+ N',' + DSc.Item + N'),''''0'''')'
ELSE ''
END
FROM dbo.f_split(DS1.Item,' ') DSn
CROSS APPLY dbo.f_split(DS2.Item,',') DSc
ORDER BY DSc.id, DSn.id
FOR XML PATH(N'')),1,4,N'')
FROM dbo.udf_split (#VValue,'|') DS1
CROSS APPLY dbo.udf_split (#VColumns,'|') DS2
CROSS APPLY dbo.udf_split (#VCondition,'|') DS3
CROSS APPLY dbo.udf_split (#VPattern,'|') DS4
CROSS APPLY dbo.udf_split (#VCheck, '|') DS5
WHERE DS1.id = DS2.id AND DS2.ID = DS3.ID AND DS3.ID = DS4.ID AND DS4.ID = DS5.ID
ORDER BY DS1.id
FOR XML PATH(N'')),1,9,N''),'>','>') + N') )','>','>');
PRINT(#String);
But I'm getting the following result:
( AND
ISNULL(PATINDEX(''John%'', FName), ''0'') > 0 AND
ISNULL(PATINDEX(''Dee%'', FName), ''0'')) OR (
ISNULL(PATINDEX(''Mak'', LName), ''0'') +
ISNULL(PATINDEX(''Don'', LName), ''0'')) )
After many attempt, following does the job done.
Query:
DECLARE #VValue VARCHAR(MAX) = 'John Dee|Mak Don'
DECLARE #VColumns VARCHAR(MAX) = 'FName|LName'
DECLARE #VCondition VARCHAR(500) = 'OR'
DECLARE #VPattern VARCHAR(MAX) = 'Start|Exact'
DECLARE #VCheck VARCHAR(MAX) = '1|0'
DECLARE #String VARCHAR(MAX) = ''
SET #VCondition = '|'+#VCondition;
SET #String = REPLACE('('+STUFF((SELECT N' > 0 ) '+DS3.Item+' (' + NCHAR(10) +
REPLACE(STUFF((SELECT
CASE WHEN DS4.Item = 'Start' AND DS5.Item = 0
THEN
N' + ' +NCHAR(10)+
N'ISNULL(PATINDEX('''''+DSn.Item +'%'''''+ N',' + DSc.Item + N'),''''0'''')'
WHEN DS4.Item = 'Start' AND DS5.Item = 1
THEN
N' > 0 AND ' +NCHAR(10)+
N'ISNULL(PATINDEX('''''+DSn.Item +'%'''''+ N',' + DSc.Item + N'),''''0'''')'
ELSE
''
END
+
CASE WHEN DS4.Item = 'Exact' AND DS5.Item = 0
THEN
N' + ' +NCHAR(10)+
N'ISNULL(PATINDEX('''''+DSn.Item +''''''+ N',' + DSc.Item + N'),''''0'''')'
WHEN DS4.Item = 'Exact' AND DS5.Item = 1
THEN
N' > 0 AND ' +NCHAR(10)+
N'ISNULL(PATINDEX('''''+DSn.Item +''''''+ N',' + DSc.Item + N'),''''0'''')'
ELSE ''
END
FROM dbo.f_split(DS1.Item,' ') DSn
CROSS APPLY dbo.f_split(DS2.Item,',') DSc
ORDER BY DSc.id, DSn.id
FOR XML PATH(N'')),1,11,N''),'>','>')
FROM dbo.udf_split (#VValue,'|') DS1
CROSS APPLY dbo.udf_split (#VColumns,'|') DS2
CROSS APPLY dbo.udf_split (#VCondition,'|') DS3
CROSS APPLY dbo.udf_split (#VPattern,'|') DS4
CROSS APPLY dbo.udf_split (#VCheck, '|') DS5
WHERE DS1.id = DS2.id AND DS2.ID = DS3.ID AND DS3.ID = DS4.ID AND DS4.ID = DS5.ID
ORDER BY DS1.id
FOR XML PATH(N'')),1,13,N''),'>','>') + N' > 0 ) ';
PRINT(#String);
Output:
(
ISNULL(PATINDEX(''John%'',FName),''0'') > 0 AND
ISNULL(PATINDEX(''Dee%'',FName),''0'') > 0
)
OR
(
ISNULL(PATINDEX(''Mak'',LName),''0'') +
ISNULL(PATINDEX(''Don'',LName),''0'') > 0
)
I wanted to write an sql query to fetch data as:
1. when param = 'all' it should list data across the table
2. when param = 'yes' it should list data where invoicenumber is not empty.
3. when param = 'no' it should list data where invoicenumber is empty.
i tried below query for yes and no
declare #invoiced as nvarchar(10) = 'no'
select * from OrderSummary
where
((#invoiced = 'yes') or (InvoiceNumber = ''))
and
((#invoiced = 'no') or (InvoiceNumber <> ''))
now i also want to incorporate all condition, could anyone suggest how could i achieve that
declare #invoiced as nvarchar(10) = 'no'
select * from OrderSummary
where
#invoiced = 'all'
OR
(#invoiced = 'yes' AND InvoiceNumber <> '')
OR
(#invoiced = 'no' AND InvoiceNumber = '')
Try this
declare #invoiced as nvarchar(10) = 'no'
select
*
from OrderSummary
where
(
#invoiced = 'all'
OR
(
#invoiced = 'yes'
AND
InvoiceNumber <> ''
)
OR
(
#invoiced = 'no'
AND
InvoiceNumber = ''
)
)
It should fulfill your requirement.
declare #invoiced as nvarchar(10) = 'no'
select * from OrderSummary
where
((#invoiced in ('all','no')) OR (#invoiced = 'yes' AND InvoiceNumber <> ''))
and
((#invoiced in ('all','yes')) OR (#invoiced = 'no' AND InvoiceNumber = ''))
and
(#invoiced in ('no','yes'))
declare #invoiced as nvarchar(10) = 'no'
select * from OrderSummary
where
((#invoiced = 'yes') and (InvoiceNumber <> '') )
or
((#invoiced = 'no') and ( (InvoiceNumber = '') or (InvoiceNumber = null)))
or (#invoiced = 'all')
Please update this query with above query.