How to extract data from a snowflake table into CSV using Python - snowflake-cloud-data-platform

import snowflake.connector
import sys
import logging
import logging.config
class opMetadataExt:
def __init__(self):
#self.logger = logging.getLogger('csv_to_json')
self.sfExtract()
def sfExtract(self):
#pwd = input("Password: ")
sqlSnowflake = snowflake.connector.connect(
user=
password=
account=
database=
role=
warehouse=
#warehouse=
schema=
)
sqlSnowflake.cursor().execute("USE WAREHOUSE ")
sqlSnowflake.cursor().execute("USE DATABASE ")
sqlSnowflake = sqlSnowflake.cursor()
test = "copy into '#/extractFile/' from (select * from information_schema.tables limit 1) file_format=(format_name=(type='csv'))"
try:
sqlSnowflake.execute(test)
rows = sqlSnowflake.fetchall() print(f'Data - {rows}')
except Exception as e:
print(e)
#sqlSnowflake.rollback()
finally:
sqlSnowflake.close()
But getting the below error:
001012 (42601): 0195a241-01fd-1365-0000-54ad01f956da: SQL compilation error:
missing stage name in URL: #/extractFile/

Related

Export all Tables and Views from a database (data dump)

I understood that you could not do a full snowflake data dump and need to use the COPY command to unload data from a table into an internal (i.e. Snowflake) stage.
To automate the process, I thought to do it with Python. Do you think that is the best method?
import traceback
import snowflake.connector
import pandas as pd
from snowflake.sqlalchemy import URL
from sqlalchemy import create_engine
url = URL(
user='??????',
password='????????',
account='??????-??????',
database='SNOWFLAKE',
role = 'ACCOUNTADMIN'
)
out_put_string = ""
try:
engine = create_engine(url)
connection = engine.connect()
# Get all the views from the SNOWFLAKE database
query = '''
show views in database SNOWFLAKE
'''
df = pd.read_sql(query, connection)
# Loop over all the views
df = df.reset_index() # make sure indexes pair with number of rows
for index, row in df.iterrows():
out_put_string += "VIEW:----------" + row['schema_name'] + "." + row['name'] + "----------\n"
df_view = pd.read_sql('select * from ' + row['schema_name'] + "." + row['name'], connection)
df_view.to_csv("/Temp/Output_CVS/" + row['schema_name'] + "-" + row['name'] + ".csv")
out_put_string += df_view.to_string() + "\n"
except:
print("ERROR:")
traceback.print_exc()
connection.close()
#Export all the Views in one file
text_file = open("/Temp/Output_CVS/AllViewsData.txt", "w")
text_file.write(out_put_string)
text_file.close()

getting Table 'NM_TEMP_STAGING_1100952600' does not exist using aws glue and snowflake

I am using glue job to write data pipeline. I took code from community, which is as following
import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.dynamicframe import DynamicFrame
from awsglue.job import Job
from py4j.java_gateway import java_import
SNOWFLAKE_SOURCE_NAME = "net.snowflake.spark.snowflake"
#args = getResolvedOptions(sys.argv, ['JOB_NAME'])
args = getResolvedOptions(sys.argv, ['JOB_NAME', 'URL', 'ACCOUNT', 'WAREHOUSE', 'DB', 'SCHEMA', 'USERNAME', 'PASSWORD', 'ROLE'])
sparkContext = SparkContext()
glueContext = GlueContext(sparkContext)
sparkSession = glueContext.spark_session
glueJob = Job(glueContext)
glueJob.init(args['JOB_NAME'], args)
##Use the CData JDBC driver to read Snowflake data from the Products table into a DataFrame
##Note the populated JDBC URL and driver class name
java_import(sparkSession._jvm, SNOWFLAKE_SOURCE_NAME)
sparkSession._jvm.net.snowflake.spark.snowflake.SnowflakeConnectorUtils.enablePushdownSession(sparkSession._jvm.org.apache.spark.sql.SparkSession.builder().getOrCreate())
tmp_dir=args["TempDir"]
sfOptions = {
"sfURL" : args['URL'],
"sfAccount" : args['ACCOUNT'],
"sfUser" : args['USERNAME'],
"sfPassword" : args['PASSWORD'],
"sfDatabase" : args['DB'],
"sfSchema" : args['SCHEMA'],
"sfRole" : args['ROLE'],
"sfWarehouse" : args['WAREHOUSE'],
"preactions" : "USE DATABASE dev_lz;",
}
#"tempDir" : tmp_dir,
print('=========DB Connection details ================== ', sfOptions)
datasource0 = glueContext.create_dynamic_frame.from_catalog(database = "aws-nonprod-datalake-glue-catalog", table_name = "nm_s_amaster", transformation_ctx = "datasource0")
applymapping1 = ApplyMapping.apply(frame = datasource0, mappings = [ mappings], transformation_ctx = "applymapping1")
selectfields2 = SelectFields.apply(frame = applymapping1, paths = [columns], transformation_ctx = "selectfields2")
resolvechoice3 = ResolveChoice.apply(frame = selectfields2, choice = "MATCH_CATALOG", database = "aws-nonprod-datalake-glue-catalog", table_name = "NM_TEMP", transformation_ctx = "resolvechoice3")
resolvechoice4 = ResolveChoice.apply(frame = resolvechoice3, choice = "make_cols", transformation_ctx = "resolvechoice4")
##Convert DataFrames to AWS Glue's DynamicFrames Object
resolvechoice4.toDF().write.format(SNOWFLAKE_SOURCE_NAME).options(**sfOptions).option("preactions","USE DATABASE dev_lz").option("dbtable", "nm_temp").mode("overwrite").save()
glueJob.commit()
But after running code i am getting
net.snowflake.client.jdbc.SnowflakeSQLException: SQL compilation error: Table 'NM_TEMP_STAGING_1100952600' does not exist
please let me know if I am missing anything.
I have permission for create, select stage, create, select table and create future tables.
above code I have removed columns and mappings. but original code it is available.
resolvechoice4.toDF().write.format(SNOWFLAKE_SOURCE_NAME).options(**sfOptions).option("preactions","USE DATABASE dev_lz").option("dbtable", "nm_temp").mode("overwrite").save()
Added following in above dbtable option it started working,
.option("preactions","USE ROLE DEVELOPER;USE DATABASE dev_db;USE SCHEMA aws_test")
as following
resolvechoice4.toDF().write.format(SNOWFLAKE_SOURCE_NAME).options(**sfOptions).option("preactions","USE DATABASE dev_lz").option("preactions","USE ROLE DEVELOPER;USE DATABASE dev_db;USE SCHEMA aws_test").option("dbtable", "nm_temp").mode("overwrite").save()

Displaying SQLite data in a Tkinter GUI

So I have a very simply Tkinter GUI which takes an input parameter and inputs it into a SQLite database. I'm looking to create a secondary GUI which will extract this parameter from the SQLite database and display it on the secondary GUI. Can you please help on how to do this? Preferably I want to display this data from the DB on a text field or something of the like.
from Tkinter import *
from PIL import Image, ImageTk
import sqlite3
root = Tk()
root.wm_attributes('-fullscreen','true')
root.title("My Test GUI")
Fullname=StringVar()
conn = sqlite3.connect('Form.db')
cursor=conn.cursor()
def database():
name1=Fullname.get()
cursor.execute('CREATE TABLE IF NOT EXISTS Student (Fullname TEXT)')
cursor.execute('INSERT INTO Student (FullName) VALUES(?)',(name1,))
conn.commit()
def error():
root1 = Toplevel(root)
root1.geometry("150x90")
root1.title("Warning")
Label(root1, text = "All fields required", fg = "red").pack()
def read_from_db():
cursor.execute('SELECT * FROM Student')
data = cursor.fetchall()
print(data)
label_0 = Label(root, text="My Test GUI",width=20,font=("bold", 20))
label_0.place(x=650,y=53)
label_1 = Label(root, text="Name",width=20,font=("bold", 10))
label_1.place(x=550,y=130)
entry_1 = Entry(root,textvar=Fullname)
entry_1.place(x=700,y=130)
Button(root, text='Submit',width=20,bg='brown',fg='white', command=database).place(x=650,y=380)
root.mainloop()
read_from_db()
Within your read_from_db function, instead of printing the value of data you can make a label out of it:
def read_from_db():
cursor.execute("SELECT *, oid FROM Student")
data = c.fetchall()
showData = ''
for data in Student:
showData += str(data) + "\n"
dataLabel = Label(master, text=showData)
playerLabel.grid(row=0, column=0)
conn.commit()
conn.close()

Operational error at cursor when creating function in SQLite

I am trying to insert data into my SQLite database, it goes fine until I get the error while creating a function for DB. It sends OperationalError at cursor.
I couldn't find solution for my problem.
Code I'm using:
import sqlite3
from sqlite3 import *
SQL_CREATE_STATEMENT = '''CREATE TABLE password
(id integer PRIMARY KEY NOT NULL,username text, password text, source text)'''
SQL_INSERT_STATEMENT = '''INSERT INTO password (username, password, source)VALUES({},{},{});'''
DATABASE_PATH = 'home/taha/lessons/projects/passStorage/passDB.db'
DATA = dict()
def create_connection(db_file):
try:
conn = sqlite3.connect(db_file)
return conn
except Error as e:
return e
def create_table(connection, sql_commands):
c = connection.cursor()
c.execute(sql_commands)
print('done')
def get_input():
USERNAME = input('username: ')
PASSWORD = input('password: ')
SOURCE = input('source: ')
return USERNAME,PASSWORD,SOURCE
def insert_date(connection, data):
c = connection.cursor()
c.execute(SQL_INSERT_STATEMENT.format(data.values))
def main():
conn = create_connection(DATABASE_PATH)
create_table(conn, SQL_CREATE_STATEMENT)
user_info = get_input()
DATA['username'], DATA['password'], DATA['SOURCE'] = user_info
insert_date(conn, DATA)
if __name__ == '__main__':
main()
I expect no error but it sends this:
c = connection.cursor()
AttributeError: 'OperationalError' object has no attribute 'cursor'
def create_connection(db_file):
try:
conn = sqlite3.connect(db_file)
return conn
except Error as e:
return e # <-- here you return OperationalError instance
AttributeError: 'OperationalError' object has no attribute 'cursor'
Shows that OperationalError has no attribute cursor
Add additional logic that check connection here.
I believe the core of your problem is wrong file path:
DATABASE_PATH = 'home/taha/lessons/projects/passStorage/passDB.db'
But I believe should be
DATABASE_PATH = '/home/taha/lessons/projects/passStorage/passDB.db'

SQLAlchemy and QThread: Session handling in multi-thread

I have a problem: I don't know how to work with session, in combination with multi-thread, efficiently. Well you can see that I use scoped_session. It will transparently creates a thread-local session - so its threadsafe.
My below and executable example works, as long as the interval of QTimeris 1000, but if you set the value to 1, then there are some problems. On GUI-site, there a 8 QComboBox()-objects, and I will start 8 threads. In this example, I work with one table. When I run this program, not all QComboBox()-objects are filled. Sometimes there is only one QComboBox()-object blank or more. In addition, it happens once in a while that I am told by SQLAlchemy that the connection was closed. For this I have an error message:
Traceback (most recent call last):
File "D:\Dan\Python\Xarphus\xarphus\subclass_master_data_load_data_item.py", line 151, in populate_item
self.populate_item_signal.emit(next(self._element))
File "D:\Dan\Python\Xarphus\xarphus\core\manage_data_manipulation_master_data.py", line 232, in select_all
yield record.id, record.relationship
File "D:\Dan\Python\Xarphus\xarphus\core\manage_db_connection.py", line 245, in __exit__
self.session.commit()
File "C:\Python27\lib\site-packages\sqlalchemy\orm\session.py", line 906, in commit
self.transaction.commit()
File "C:\Python27\lib\site-packages\sqlalchemy\orm\session.py", line 465, in commit
t[1].commit()
File "C:\Python27\lib\site-packages\sqlalchemy\engine\base.py", line 1632, in commit
self._do_commit()
File "C:\Python27\lib\site-packages\sqlalchemy\engine\base.py", line 1663, in _do_commit
self.connection._commit_impl()
File "C:\Python27\lib\site-packages\sqlalchemy\engine\base.py", line 726, in _commit_impl
self.connection._reset_agent is self.__transaction:
File "C:\Python27\lib\site-packages\sqlalchemy\engine\base.py", line 351, in connection
self._handle_dbapi_exception(e, None, None, None, None)
File "C:\Python27\lib\site-packages\sqlalchemy\engine\base.py", line 1405, in _handle_dbapi_exception
util.reraise(*exc_info)
File "C:\Python27\lib\site-packages\sqlalchemy\engine\base.py", line 349, in connection
return self._revalidate_connection()
File "C:\Python27\lib\site-packages\sqlalchemy\engine\base.py", line 429, in _revalidate_connection
raise exc.ResourceClosedError("This Connection is closed")
ResourceClosedError: This Connection is closed
And, if I run my program several times, I get, among other things sometimes the following message:
Traceback (most recent call last):
File "C:\Users\Sophus\Desktop\ver_2_simple_problem.py", line 83, in init_object
self._element = self.master_data_manipulation.select_all()
File "C:\Users\Sophus\Desktop\ver_2_simple_problem.py", line 178, in __exit__
self.session.commit()
File "C:\Python27\lib\site-packages\sqlalchemy\orm\session.py", line 906, in commit
self.transaction.commit()
File "C:\Python27\lib\site-packages\sqlalchemy\orm\session.py", line 459, in commit
self._assert_active(prepared_ok=True)
File "C:\Python27\lib\site-packages\sqlalchemy\orm\session.py", line 258, in _assert_active
"This session is in 'committed' state; no further "
sqlalchemy.exc.InvalidRequestError: This session is in 'committed' state; no further SQL can be emitted within this transaction.
Here is my example code: Is there a better and more elegant source code on how to work with SQLAlchemy in multiple threads? Becaouse I'm not sure if this is the right way.
from PyQt4.QtCore import QObject, QThread, pyqtSignal, pyqtSlot, QTimer
from PyQt4.QtGui import QApplication, QPushButton, QVBoxLayout, QDialog, \
QComboBox, QLabel
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import create_engine
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.orm import sessionmaker, scoped_session
from sqlalchemy import Table, Column, Integer, String, MetaData
from traceback import format_exc
from sys import exc_info
''' setting up root class for declarative declaration '''
Base = declarative_base()
class PERSON_SALUTATION(Base):
__tablename__ = "person_salutation"
id = Column(Integer, primary_key=True)
salutation = Column(String(50), nullable=False, unique=True)
class MasterDataManipulation(object):
def __init__(self, session_object=None):
self._session_scope = session_object
def select_all(self):
try:
with self._session_scope as session:
for record in session.query(PERSON_SALUTATION):
yield record.id, record.salutation
except AttributeError:
print "select all, desired_trace", format_exc(exc_info())
return
class Worker(QObject):
finish_progress = pyqtSignal()
populate_item_signal = pyqtSignal(object, object)
def __init__(self,
combo_box=None,
query_data=None,
parent=None):
QObject.__init__(self, parent)
self.query_data = query_data
self.combo_box=combo_box
''' Create attributes '''
self._run_semaphore = 1
def init_object(self):
self._element = self.query_data()
self.timer = QTimer()
self.timer.setSingleShot(False)
self.timer.setInterval(1)
self.timer.timeout.connect(self.populate_item)
self.timer.start()
def populate_item(self):
try:
if self._run_semaphore == 0:
self._run_semaphore = 1
raise StopIteration
else:
self.populate_item_signal.emit(next(self._element), self.combo_box)
except StopIteration:
print "StopIteration is raised"
self.timer.stop()
def stop(self):
self._run_semaphore == 0
self.timer.stop()
class SessionScope(object):
def __init__(self, dbms=None, dbdriver=None,
dbuser=None, dbuser_pwd=None,
db_server_host=None, dbport=None, db_name=None,
admin_database=None):
self.dbms = dbms
self.dbdriver = dbdriver
self.dbuser = dbuser
self.dbuser_pwd = dbuser_pwd
self.db_server_host = db_server_host
self.dbport = dbport
self.db_name = db_name
self.admin_database = admin_database
url = '{}+{}://{}:{}#{}:{}/{}'.format(
self.dbms, self.dbdriver, self.dbuser, self.dbuser_pwd, self.db_server_host, self.dbport, self.db_name)
self._Engine = create_engine(url, encoding='utf8', echo=True)
self.session = None
self._session_factory = sessionmaker(bind=self._Engine)
self._Session = scoped_session(sessionmaker(bind=self._Engine, expire_on_commit=False))
''' create tables '''
Base.metadata.create_all(self._Engine)
def __enter__(self):
self.session = self._Session()
return self.session
def __exit__(self, exception, exc_value, traceback):
try:
if exception:
self.session.rollback()
else:
self.session.commit()
finally:
self.session.close()
class MyCustomDialog(QDialog):
finish = pyqtSignal()
def __init__(self, scoped_session=None, parent=None):
QDialog.__init__(self, parent)
self._session_scope = scoped_session
self._list_threads = []
self.init_ui()
self.start_all_selection()
def init_ui(self):
layout = QVBoxLayout(self)
self.combo_person_title = QComboBox(self)
self.combo_person_salutation = QComboBox(self)
self.combo_person_gender = QComboBox(self)
self.combo_person_religion = QComboBox(self)
self.combo_person_relationship_status = QComboBox(self)
self.combo_person_nationality = QComboBox(self)
self.combo_person_eye_color = QComboBox(self)
self.combo_person_hair_color = QComboBox(self)
self.pushButton_populate_combo = QPushButton("Re-populate", self)
self.pushButton_stopp = QPushButton("Stopp", self)
self.pushButton_close = QPushButton("Close", self)
layout.addWidget(self.combo_person_title)
layout.addWidget(self.combo_person_salutation)
layout.addWidget(self.combo_person_gender)
layout.addWidget(self.combo_person_religion)
layout.addWidget(self.combo_person_nationality)
layout.addWidget(self.combo_person_relationship_status)
layout.addWidget(self.combo_person_eye_color)
layout.addWidget(self.combo_person_hair_color)
layout.addWidget(self.pushButton_populate_combo)
layout.addWidget(self.pushButton_stopp)
layout.addWidget(self.pushButton_close)
self.pushButton_stopp.clicked.connect(self.on_finish)
self.pushButton_populate_combo.clicked.connect(self.start_all_selection)
self.pushButton_close.clicked.connect(self.close)
def start_all_selection(self):
list_comboxes = self.findChildren(QComboBox)
for combo_box in list_comboxes:
combo_box.clear()
self.start_thread(combo_box=combo_box)
def fill_combo_boxt(self, item, combo_box):
id, text = item
combo_box.addItem(text)
def on_label(self, i):
self.label.setText("Result: {}".format(i))
def start_thread(self, combo_box=None):
master_data_manipulation = MasterDataManipulation(session_object=self._session_scope)
query_data=master_data_manipulation.select_all
task_thread = QThread(self)
task_thread.work = Worker(query_data=query_data,
combo_box=combo_box,)
''' We need to store threads '''
self._list_threads.append(task_thread)
task_thread.work.moveToThread(task_thread)
task_thread.work.populate_item_signal.connect(self.fill_combo_boxt)
self.finish.connect(task_thread.work.stop)
task_thread.started.connect(task_thread.work.init_object)
task_thread.finished.connect(task_thread.deleteLater)
''' This will emit 'started' and start thread's event loop '''
task_thread.start()
#pyqtSlot()
def abort_workers(self):
self.finish.emit()
for thread in self._list_threads:
''' this will quit **as soon as thread event loop unblocks** '''
thread.quit()
''' so you need to wait for it to *actually* quit'''
thread.wait()
def on_finish(self):
self.finish.emit()
def closeEvent(self, event):
''' Re-implementaate to handle with created threads '''
self.abort_workers()
sys.exit()
def populate_database(sess=None):
try:
with sess as session:
salutations = [PERSON_SALUTATION(salutation="Mister"),
PERSON_SALUTATION(salutation="Miss"),
PERSON_SALUTATION(salutation="Lady"),
PERSON_SALUTATION(salutation="Ma'am"),
PERSON_SALUTATION(salutation="Sir"),
PERSON_SALUTATION(salutation="Queen"),
PERSON_SALUTATION(salutation="Grandma"),]
session.add_all(salutations)
session.commit()
except SQLAlchemyError:
print "SQLAlchemyError", format_exc(exc_info())
def main():
dbms = raw_input('Enter database type: ')
dbdriver = raw_input('Enter database driver: ')
dbuser = raw_input('Enter user name: ')
dbuser_pwd = raw_input('Enter user password: ')
db_server_host = raw_input('Enter server host: ')
dbport = raw_input('Enter port: ')
db_name = raw_input('Enter database name: ')
try:
''' create_engine and scoped_session once per process (per database). '''
session_scope = SessionScope(dbms = dbms,
dbdriver = dbdriver,
dbuser = dbuser,
dbuser_pwd = dbuser_pwd,
db_server_host = db_server_host,
dbport = dbport,
db_name = db_name)
answer = raw_input('Do you want to populate database? Type yes or no: ')
if answer.lower() == 'yes':
populate_database(sess=session_scope)
app = QApplication(sys.argv)
window = MyCustomDialog(scoped_session = session_scope)
window.show()
sys.exit(app.exec_())
except TypeError:
print "ERROR", format_exc(exc_info())
if __name__ == "__main__":
main()

Resources