Export all Tables and Views from a database (data dump)

Export all Tables and Views from a database (data dump) - snowflake-cloud-data-platform

I understood that you could not do a full snowflake data dump and need to use the COPY command to unload data from a table into an internal (i.e. Snowflake) stage.
To automate the process, I thought to do it with Python. Do you think that is the best method?
import traceback
import snowflake.connector
import pandas as pd
from snowflake.sqlalchemy import URL
from sqlalchemy import create_engine
url = URL(
user='??????',
password='????????',
account='??????-??????',
database='SNOWFLAKE',
role = 'ACCOUNTADMIN'
)
out_put_string = ""
try:
engine = create_engine(url)
connection = engine.connect()
# Get all the views from the SNOWFLAKE database
query = '''
show views in database SNOWFLAKE
'''
df = pd.read_sql(query, connection)
# Loop over all the views
df = df.reset_index() # make sure indexes pair with number of rows
for index, row in df.iterrows():
out_put_string += "VIEW:----------" + row['schema_name'] + "." + row['name'] + "----------\n"
df_view = pd.read_sql('select * from ' + row['schema_name'] + "." + row['name'], connection)
df_view.to_csv("/Temp/Output_CVS/" + row['schema_name'] + "-" + row['name'] + ".csv")
out_put_string += df_view.to_string() + "\n"
except:
print("ERROR:")
traceback.print_exc()
connection.close()
#Export all the Views in one file
text_file = open("/Temp/Output_CVS/AllViewsData.txt", "w")
text_file.write(out_put_string)
text_file.close()

Related

Apache Flink version 1.13 Convert Table to Dataset?

I am converting some legacy Java code written for Flink version 1.5 to Flink version 1.13.1. Specifically, I'm working with Table API. I have to read data from CSV file, perform some basic SQL and then write results back to a file.
For Flink version 1.5, I used the following code to perform above actions
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);
TableSource tableSrc = CsvTableSource.builder()
.path("<CSV_PATH>")
.fieldDelimiter(",")
.field("date", Types.STRING)
.field("month", Types.STRING)
...
.build();
tableEnv.registerTableSource("CatalogTable", tableSrc);
String sql = "...";
Table result = tableEnv.sqlQuery(sql);
DataSet<Row1> resultSet = tableEnv.toDataSet(result, Row1.class);
resultSet.writeAsText("<OUT_PATH>");
env.execute("Flink Table-Sql Example");
In order to convert above code to Flink version 1.13.1, I wrote the following code
import org.apache.flink.table.api.Table;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.table.api.TableEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.table.api.bridge.java.BatchTableEnvironment;
EnvironmentSettings settings = EnvironmentSettings
.newInstance()
.inBatchMode()
.build();
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
TableEnvironment tableEnv = TableEnvironment.create(settings);
final String tableDDL = "CREATE TEMPORARY TABLE CatalogTable (" +
"date STRING, " +
"month STRING, " +
"..." +
") WITH (" +
"'connector' = 'filesystem', " +
"'path' = 'file:///CSV_PATH', " +
"'format' = 'csv'" +
")";
tableEnv.executeSql(tableDDL);
String sql = "...";
Table result = tableEnv.sqlQuery(sql);
// DEPRECATED - BatchTableEnvironment required to convert Table to Dataset
BatchTableEnvironment bTableEnv = BatchTableEnvironment.create(env);
DataSet<Row1> resultSet = bTableEnv.toDataSet(result, Row1.class);
resultSet.writeAsText("<OUT_PATH>");
env.execute("Flink Table-Sql Example");
However, BatchTableEnvironment is marked as "Deprecated" in Flink version 1.13. Is there any alternative to convert Table to Dataset or to directly write a Table to a file?

How to extract data from a snowflake table into CSV using Python

import snowflake.connector
import sys
import logging
import logging.config
class opMetadataExt:
def __init__(self):
#self.logger = logging.getLogger('csv_to_json')
self.sfExtract()
def sfExtract(self):
#pwd = input("Password: ")
sqlSnowflake = snowflake.connector.connect(
user=
password=
account=
database=
role=
warehouse=
#warehouse=
schema=
)
sqlSnowflake.cursor().execute("USE WAREHOUSE ")
sqlSnowflake.cursor().execute("USE DATABASE ")
sqlSnowflake = sqlSnowflake.cursor()
test = "copy into '#/extractFile/' from (select * from information_schema.tables limit 1) file_format=(format_name=(type='csv'))"
try:
sqlSnowflake.execute(test)
rows = sqlSnowflake.fetchall() print(f'Data - {rows}')
except Exception as e:
print(e)
#sqlSnowflake.rollback()
finally:
sqlSnowflake.close()
But getting the below error:
001012 (42601): 0195a241-01fd-1365-0000-54ad01f956da: SQL compilation error:
missing stage name in URL: #/extractFile/

Pandas dataframe how to escape null integers to float

I'm dynamically generating queries from 11 different tables in SQL Server and storing that into S3 CSV file.
However, when I store null integer fields in CSV it converts them to float so when I'm doing a copy command it returns an error.
I really need to avoid that. is there and option for that?
for object in table_list:
if args.load_type == "full":
query_load = object["query_full"]
else:
query_load = object["query_active"]
df = pd.read_sql_query(query_load, sql_server_conn)
df = df.replace(",", " ", regex=True)
df = df.replace("\n", " ", regex=True)
#print(df)
#df = df * 1
#print(df.dtypes)
#print(df.info())
df = df.assign(extraction_dttm=currentdate)
csv_buffer = StringIO()
df.to_csv(csv_buffer, index=False)
folder_name = "{}".format(object["lake_table_name"])
file_name = "{}_{}.csv".format(object["lake_table_name"], currentdate.strftime("%Y%m%d"))
full_path_to_file = DATALAKE_PATH + "/" + folder_name + "/" + file_name
# print("{} - Storing files in {} ... ".format(dt.utcnow(), datalake_bucket))
s3_resource.Object(datalake_bucket, full_path_to_file).put(Body=csv_buffer.getvalue())

Displaying SQLite data in a Tkinter GUI

So I have a very simply Tkinter GUI which takes an input parameter and inputs it into a SQLite database. I'm looking to create a secondary GUI which will extract this parameter from the SQLite database and display it on the secondary GUI. Can you please help on how to do this? Preferably I want to display this data from the DB on a text field or something of the like.
from Tkinter import *
from PIL import Image, ImageTk
import sqlite3
root = Tk()
root.wm_attributes('-fullscreen','true')
root.title("My Test GUI")
Fullname=StringVar()
conn = sqlite3.connect('Form.db')
cursor=conn.cursor()
def database():
name1=Fullname.get()
cursor.execute('CREATE TABLE IF NOT EXISTS Student (Fullname TEXT)')
cursor.execute('INSERT INTO Student (FullName) VALUES(?)',(name1,))
conn.commit()
def error():
root1 = Toplevel(root)
root1.geometry("150x90")
root1.title("Warning")
Label(root1, text = "All fields required", fg = "red").pack()
def read_from_db():
cursor.execute('SELECT * FROM Student')
data = cursor.fetchall()
print(data)
label_0 = Label(root, text="My Test GUI",width=20,font=("bold", 20))
label_0.place(x=650,y=53)
label_1 = Label(root, text="Name",width=20,font=("bold", 10))
label_1.place(x=550,y=130)
entry_1 = Entry(root,textvar=Fullname)
entry_1.place(x=700,y=130)
Button(root, text='Submit',width=20,bg='brown',fg='white', command=database).place(x=650,y=380)
root.mainloop()
read_from_db()

Within your read_from_db function, instead of printing the value of data you can make a label out of it:
def read_from_db():
cursor.execute("SELECT *, oid FROM Student")
data = c.fetchall()
showData = ''
for data in Student:
showData += str(data) + "\n"
dataLabel = Label(master, text=showData)
playerLabel.grid(row=0, column=0)
conn.commit()
conn.close()

Openshift action hook can't access environment variables

For my application on Openshift, I am trying to write a pre_build script that accesses the database. The goal is to have migration scripts between database versions that are executed when the code is deployed. The script would compare the current database version with the version needed by the application code and then run the correct script to migrate the database.
Now the problem is that apparently the pre_build script is executed on Jenkins and not on the destination cartridge and therefore the environment variables with the database connection arguments are not available.
This is the pre_build script that I've written so far:
#!/usr/bin/env python
print "*** Database migration script ***"
# get goal version
import os
homedir = os.environ["OPENSHIFT_HOMEDIR"]
migration_scripts_dir = homedir + "app-root/runtime/repo/.openshift/action_hooks/migration-scripts/"
f = open(migration_scripts_dir + "db-version.txt")
goal = int(f.read())
f.close()
print "I need database version " + str(goal)
# get database connection details
# TODO: find a solution of not hard coding the connection details here!!!
# Maybe by using jenkins environment variables like OPENSHIFT_APP_NAME and JOB_NAME
db_host = "..."
db_port = "..."
db_user = "..."
db_password = "..."
db_name = "..."
import psycopg2
try:
conn = psycopg2.connect("dbname='" + db_name + "' user='" + db_user + "' host='" + db_host + "' password='" + db_password + "' port='" + db_port + "'")
print "Successfully connected to the database"
except:
print "I am unable to connect to the database"
cur = conn.cursor()
def get_current_version(cur):
try:
cur.execute("""SELECT * from db_version""")
except:
conn.set_isolation_level(0)
cur.execute("""CREATE TABLE db_version (db_version bigint NOT NULL)""")
cur.execute("""INSERT INTO db_version VALUES (0)""")
cur.execute("""SELECT * from db_version""")
current_version = cur.fetchone()[0]
print "The current database version is " + str(current_version)
return current_version
def recursive_execute_migration(cursor):
current_version = get_current_version(cursor)
if (current_version == goal):
print "Database is on the correct version"
return
elif (current_version < goal):
sql_filename = "upgrade" + str(current_version) + "-" + str(current_version + 1) + ".sql"
print "Upgrading database with " + sql_filename
cursor.execute(open(migration_scripts_dir + sql_filename, "r").read())
recursive_execute_migration(cursor)
else:
sql_filename = "downgrade" + str(current_version) + "-" + str(current_version - 1) + ".sql"
print "Downgrading database with " + sql_filename
cursor.execute(open(migration_scripts_dir + sql_filename, "r").read())
recursive_execute_migration(cursor)
conn.set_isolation_level(0)
recursive_execute_migration(cur)
cur.close()
conn.close()
Is there another way of doing automatic database migrations?
Thanks for your help.

Develop Reference

c reactjs sql-server angularjs arrays wpf database batch-file google-app-engine silverlight

Export all Tables and Views from a database (data dump) - snowflake-cloud-data-platform

Related

Apache Flink version 1.13 Convert Table to Dataset?

How to extract data from a snowflake table into CSV using Python

Pandas dataframe how to escape null integers to float

Displaying SQLite data in a Tkinter GUI

Openshift action hook can't access environment variables

Categories

Resources