I am trying to use the asyncio packages to execute concurrent calls from one SQL Server to another in order to extract data. I'm hitting an issue of at the portion of myLoop.run_until_complete(cors) where it is telling me that the event loop is already running. I will admit that I am new to this package and may be overlooking something simple.
import pyodbc
import sqlalchemy
import pandas
import asyncio
import time
async def getEngine(startString):
sourceList = str.split(startString,'=')
server = str.split(sourceList[1],';')[0]
database = str.split(sourceList[2],';')[0]
user = str.split(sourceList[3],';')[0]
password = str.split(sourceList[4],';')[0]
returnEngine = sqlalchemy.create_engine("mssql+pyodbc://"+user+":"+password+"#"+server+"/"+database+"?driver=SQL+Server+Native+Client+11.0")
return returnEngine
async def getConnString(startString):
sourceList = str.split(startString,'=')
server = str.split(sourceList[1],';')[0]
database = str.split(sourceList[2],';')[0]
user = str.split(sourceList[3],';')[0]
password = str.split(sourceList[4],';')[0]
return "Driver={SQL Server Native Client 11.0};Server="+server+";Database="+database+";Uid="+user+";Pwd="+password+";"
async def executePackage(source,destination,query,sourceTable,destTable,lastmodifiedDate,basedOnStation):
sourceConnString = getConnString(source)
destEngine = getEngine(destination)
sourceConn = pyodbc.connect(sourceConnString)
newQuery = str.replace(query,'dateTest',str(lastmodifiedDate))
df = pandas.read_sql(newQuery,sourceConn)
print('Started '+sourceTable+'->'+destTable)
tic = time.perf_counter()
await df.to_sql(destTable,destEngine,index=False,if_exists="append")
toc = time.perf_counter()
secondsToFinish = toc - tic
print('Finished '+sourceTable+'->'+destTable+' in '+ str(secondsToFinish) +' seconds')
async def main():
connString = "Driver={SQL Server Native Client 11.0};Server=myServer;Trusted_Connection=yes;"
myConn = pyodbc.connect(connString)
cursor = myConn.cursor()
df = pandas.read_sql('exec mySql_stored_proc',myConn)
if len(df.index) > 0:
tasks = [executePackage(df.iloc[i,10],df.iloc[i,11],df.iloc[i,7],df.iloc[i,8],df.iloc[i,9],df.iloc[i,5],df.iloc[i,17])for i in range(len(df))]
myLoop = asyncio.get_event_loop()
cors = asyncio.wait(tasks)
myLoop.run_until_complete(cors)
if __name__ =="__main__":
asyncio.run(main())
Related
It's possible to get the Snowflake Query Id when using the snowflake-connector-python, i.e. the sfqid attribute from the cursor object.
Is it possible to get that attribute when using Snowflake's SQLAlchemy Toolkit? The doc page doesn't mention it - https://docs.snowflake.com/en/user-guide/sqlalchemy.html.
Thanks,
Eric
You can use sqlalchemy's execute method and get a reference to the SnowflakeCursor like this:
import os
from sqlalchemy import create_engine
from dotenv import load_dotenv
load_dotenv()
snowflake_username = os.getenv('SNOWFLAKE_USERNAME')
snowflake_password = os.getenv('SNOWFLAKE_PASSWORD')
snowflake_account = os.getenv('SNOWFLAKE_ACCOUNT')
snowflake_warehouse = os.getenv('SNOWFLAKE_WAREHOUSE')
snowflake_database = 'simon_db'
snowflake_schema = 'public'
if __name__ == '__main__':
engine = create_engine(
'snowflake://{user}:{password}#{account}/{db}/{schema}?warehouse={warehouse}'.format(
user=snowflake_username,
password=snowflake_password,
account=snowflake_account,
db=snowflake_database,
schema=snowflake_schema,
warehouse=snowflake_warehouse,
)
)
connection = engine.connect()
results = connection.execute("SELECT * FROM TEST_TABLE")
queryId = results.cursor.sfqid
print(f"queryId = {queryId}")
print(f"results: {results.fetchone()}")
connection.close()
engine.dispose()
This prints out:
queryId = 019edbef-0000-114f-0000-0f9500612j23
results: ('n/a', '2021-01-01')
One way I found was using the function LAST_QUERY_ID, something like this:
results = connection.execute("SELECT * FROM CITIBIKE_TRIPS LIMIT 1").fetchone()
query_id = connection.execute("SELECT LAST_QUERY_ID()").fetchone()
print(query_id)
I get back something like:
$ python test_sqlalchemy.py
('019edb4b-0502-8a31-0000-16490cd95072',)
Might not be the ideal way.
I am using glue job to write data pipeline. I took code from community, which is as following
import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.dynamicframe import DynamicFrame
from awsglue.job import Job
from py4j.java_gateway import java_import
SNOWFLAKE_SOURCE_NAME = "net.snowflake.spark.snowflake"
#args = getResolvedOptions(sys.argv, ['JOB_NAME'])
args = getResolvedOptions(sys.argv, ['JOB_NAME', 'URL', 'ACCOUNT', 'WAREHOUSE', 'DB', 'SCHEMA', 'USERNAME', 'PASSWORD', 'ROLE'])
sparkContext = SparkContext()
glueContext = GlueContext(sparkContext)
sparkSession = glueContext.spark_session
glueJob = Job(glueContext)
glueJob.init(args['JOB_NAME'], args)
##Use the CData JDBC driver to read Snowflake data from the Products table into a DataFrame
##Note the populated JDBC URL and driver class name
java_import(sparkSession._jvm, SNOWFLAKE_SOURCE_NAME)
sparkSession._jvm.net.snowflake.spark.snowflake.SnowflakeConnectorUtils.enablePushdownSession(sparkSession._jvm.org.apache.spark.sql.SparkSession.builder().getOrCreate())
tmp_dir=args["TempDir"]
sfOptions = {
"sfURL" : args['URL'],
"sfAccount" : args['ACCOUNT'],
"sfUser" : args['USERNAME'],
"sfPassword" : args['PASSWORD'],
"sfDatabase" : args['DB'],
"sfSchema" : args['SCHEMA'],
"sfRole" : args['ROLE'],
"sfWarehouse" : args['WAREHOUSE'],
"preactions" : "USE DATABASE dev_lz;",
}
#"tempDir" : tmp_dir,
print('=========DB Connection details ================== ', sfOptions)
datasource0 = glueContext.create_dynamic_frame.from_catalog(database = "aws-nonprod-datalake-glue-catalog", table_name = "nm_s_amaster", transformation_ctx = "datasource0")
applymapping1 = ApplyMapping.apply(frame = datasource0, mappings = [ mappings], transformation_ctx = "applymapping1")
selectfields2 = SelectFields.apply(frame = applymapping1, paths = [columns], transformation_ctx = "selectfields2")
resolvechoice3 = ResolveChoice.apply(frame = selectfields2, choice = "MATCH_CATALOG", database = "aws-nonprod-datalake-glue-catalog", table_name = "NM_TEMP", transformation_ctx = "resolvechoice3")
resolvechoice4 = ResolveChoice.apply(frame = resolvechoice3, choice = "make_cols", transformation_ctx = "resolvechoice4")
##Convert DataFrames to AWS Glue's DynamicFrames Object
resolvechoice4.toDF().write.format(SNOWFLAKE_SOURCE_NAME).options(**sfOptions).option("preactions","USE DATABASE dev_lz").option("dbtable", "nm_temp").mode("overwrite").save()
glueJob.commit()
But after running code i am getting
net.snowflake.client.jdbc.SnowflakeSQLException: SQL compilation error: Table 'NM_TEMP_STAGING_1100952600' does not exist
please let me know if I am missing anything.
I have permission for create, select stage, create, select table and create future tables.
above code I have removed columns and mappings. but original code it is available.
resolvechoice4.toDF().write.format(SNOWFLAKE_SOURCE_NAME).options(**sfOptions).option("preactions","USE DATABASE dev_lz").option("dbtable", "nm_temp").mode("overwrite").save()
Added following in above dbtable option it started working,
.option("preactions","USE ROLE DEVELOPER;USE DATABASE dev_db;USE SCHEMA aws_test")
as following
resolvechoice4.toDF().write.format(SNOWFLAKE_SOURCE_NAME).options(**sfOptions).option("preactions","USE DATABASE dev_lz").option("preactions","USE ROLE DEVELOPER;USE DATABASE dev_db;USE SCHEMA aws_test").option("dbtable", "nm_temp").mode("overwrite").save()
I am trying to insert data into my SQLite database, it goes fine until I get the error while creating a function for DB. It sends OperationalError at cursor.
I couldn't find solution for my problem.
Code I'm using:
import sqlite3
from sqlite3 import *
SQL_CREATE_STATEMENT = '''CREATE TABLE password
(id integer PRIMARY KEY NOT NULL,username text, password text, source text)'''
SQL_INSERT_STATEMENT = '''INSERT INTO password (username, password, source)VALUES({},{},{});'''
DATABASE_PATH = 'home/taha/lessons/projects/passStorage/passDB.db'
DATA = dict()
def create_connection(db_file):
try:
conn = sqlite3.connect(db_file)
return conn
except Error as e:
return e
def create_table(connection, sql_commands):
c = connection.cursor()
c.execute(sql_commands)
print('done')
def get_input():
USERNAME = input('username: ')
PASSWORD = input('password: ')
SOURCE = input('source: ')
return USERNAME,PASSWORD,SOURCE
def insert_date(connection, data):
c = connection.cursor()
c.execute(SQL_INSERT_STATEMENT.format(data.values))
def main():
conn = create_connection(DATABASE_PATH)
create_table(conn, SQL_CREATE_STATEMENT)
user_info = get_input()
DATA['username'], DATA['password'], DATA['SOURCE'] = user_info
insert_date(conn, DATA)
if __name__ == '__main__':
main()
I expect no error but it sends this:
c = connection.cursor()
AttributeError: 'OperationalError' object has no attribute 'cursor'
def create_connection(db_file):
try:
conn = sqlite3.connect(db_file)
return conn
except Error as e:
return e # <-- here you return OperationalError instance
AttributeError: 'OperationalError' object has no attribute 'cursor'
Shows that OperationalError has no attribute cursor
Add additional logic that check connection here.
I believe the core of your problem is wrong file path:
DATABASE_PATH = 'home/taha/lessons/projects/passStorage/passDB.db'
But I believe should be
DATABASE_PATH = '/home/taha/lessons/projects/passStorage/passDB.db'
local port 127.0.0.1:4321 cannot be bound, please see code below for details, note that i am using the lport - 4321. Do i need to use something different? If yes, how can i find one?
package mypackage
import groovy.sql.Sql
import java.sql.*
import com.jcraft.jsch.JSch
import com.jcraft.jsch.Session
// ssh login
String sshHost = 'abc.com'
String sshUser = 'test'
String sshPass = 'test'
int sshPort = 22
String boundaddress ="0.0.0.0";
// database login
targetHost = 'localhost'
targetUser = 'test'
targetPass = 'test'
targetPort = 3306
lport = 4321
JSch jsch = new JSch();
Session session = jsch.getSession(sshUser, sshHost, sshPort);
session.setPassword(sshPass);
session.setConfig("StrictHostKeyChecking", "no");
System.out.println("Establishing Connection...");
session.connect();
int assinged_port=session.setPortForwardingL(lport, targetHost, targetPort);
Connection con = null;
String driver = "org.mariadb.jdbc.Driver";
String connectionString = "jdbc:mariadb://" + targetHost +":" + lport + "/";
con = DriverManager.getConnection(connectionString, targetUser, targetPass);
Statement st = con.createStatement();
String sql = "select * company "
st.execute(sql);
I want to retrieve a set of keys from a cloudant db. I tried a few ways. However, custom_result performs lightning fast as compared to other methods. Can someone explain why?
from cloudant import cloudant
import json
import time
from cloudant.result import Result,ResultByKey
with open('credentials.json') as f:
cred = json.load(f)
with cloudant(str(cred['credentials']['username']),str(cred['credentials']['password']),url=str(cred['credentials']['url'])) as client:
my_database = client['my_database']
#Using POST
payload = {"keys": ["012","345"]}
end_point = '{0}/{1}'.format(client.server_url, 'my_database/_all_docs')
params = {'include_docs': 'true'}
start = time.time()
response = client.r_session.post(end_point, data=json.dumps(payload), params=params)
end = time.time()
print end-start
#using custom_result
start = time.time()
result = my_database.custom_result(include_docs=True,keys=["012","345"])
end = time.time()
print end-start
#using all_doc
start = time.time()
result = my_database.all_docs(include_docs=True,keys=["012","345"])
end = time.time()
print end-start
#using iteration
keys=["012","345"]
start = time.time()
result=[]
result_collection = Result(my_database.all_docs, include_docs=True)
for i in range(len(keys)):
result.append(result_collection[ResultByKey(i)])
end = time.time()
print end-start
My output is as follows:
0.426064968109
4.10079956055e-05
0.409541845322
0.819295167923
Can someone tell me why custom_result performs so fast?
Function database.custom_result() only creates the cloudant.result.Result instance, no results are returned from Cloudant. You can try test it this way, end-start will be the whole time you get response from Cloudant.
start = time.time()
with database.custom_result(include_docs=True) as result:
data = result[:]
end = time.time()
print end-start