SQLAlchemy and Snowflake Query ID - snowflake-cloud-data-platform

It's possible to get the Snowflake Query Id when using the snowflake-connector-python, i.e. the sfqid attribute from the cursor object.
Is it possible to get that attribute when using Snowflake's SQLAlchemy Toolkit? The doc page doesn't mention it - https://docs.snowflake.com/en/user-guide/sqlalchemy.html.
Thanks,
Eric

You can use sqlalchemy's execute method and get a reference to the SnowflakeCursor like this:
import os
from sqlalchemy import create_engine
from dotenv import load_dotenv
load_dotenv()
snowflake_username = os.getenv('SNOWFLAKE_USERNAME')
snowflake_password = os.getenv('SNOWFLAKE_PASSWORD')
snowflake_account = os.getenv('SNOWFLAKE_ACCOUNT')
snowflake_warehouse = os.getenv('SNOWFLAKE_WAREHOUSE')
snowflake_database = 'simon_db'
snowflake_schema = 'public'
if __name__ == '__main__':
engine = create_engine(
'snowflake://{user}:{password}#{account}/{db}/{schema}?warehouse={warehouse}'.format(
user=snowflake_username,
password=snowflake_password,
account=snowflake_account,
db=snowflake_database,
schema=snowflake_schema,
warehouse=snowflake_warehouse,
)
)
connection = engine.connect()
results = connection.execute("SELECT * FROM TEST_TABLE")
queryId = results.cursor.sfqid
print(f"queryId = {queryId}")
print(f"results: {results.fetchone()}")
connection.close()
engine.dispose()
This prints out:
queryId = 019edbef-0000-114f-0000-0f9500612j23
results: ('n/a', '2021-01-01')

One way I found was using the function LAST_QUERY_ID, something like this:
results = connection.execute("SELECT * FROM CITIBIKE_TRIPS LIMIT 1").fetchone()
query_id = connection.execute("SELECT LAST_QUERY_ID()").fetchone()
print(query_id)
I get back something like:
$ python test_sqlalchemy.py
('019edb4b-0502-8a31-0000-16490cd95072',)
Might not be the ideal way.

Related

Asyncio Run Loop Errors in Python 3.7

I am trying to use the asyncio packages to execute concurrent calls from one SQL Server to another in order to extract data. I'm hitting an issue of at the portion of myLoop.run_until_complete(cors) where it is telling me that the event loop is already running. I will admit that I am new to this package and may be overlooking something simple.
import pyodbc
import sqlalchemy
import pandas
import asyncio
import time
async def getEngine(startString):
sourceList = str.split(startString,'=')
server = str.split(sourceList[1],';')[0]
database = str.split(sourceList[2],';')[0]
user = str.split(sourceList[3],';')[0]
password = str.split(sourceList[4],';')[0]
returnEngine = sqlalchemy.create_engine("mssql+pyodbc://"+user+":"+password+"#"+server+"/"+database+"?driver=SQL+Server+Native+Client+11.0")
return returnEngine
async def getConnString(startString):
sourceList = str.split(startString,'=')
server = str.split(sourceList[1],';')[0]
database = str.split(sourceList[2],';')[0]
user = str.split(sourceList[3],';')[0]
password = str.split(sourceList[4],';')[0]
return "Driver={SQL Server Native Client 11.0};Server="+server+";Database="+database+";Uid="+user+";Pwd="+password+";"
async def executePackage(source,destination,query,sourceTable,destTable,lastmodifiedDate,basedOnStation):
sourceConnString = getConnString(source)
destEngine = getEngine(destination)
sourceConn = pyodbc.connect(sourceConnString)
newQuery = str.replace(query,'dateTest',str(lastmodifiedDate))
df = pandas.read_sql(newQuery,sourceConn)
print('Started '+sourceTable+'->'+destTable)
tic = time.perf_counter()
await df.to_sql(destTable,destEngine,index=False,if_exists="append")
toc = time.perf_counter()
secondsToFinish = toc - tic
print('Finished '+sourceTable+'->'+destTable+' in '+ str(secondsToFinish) +' seconds')
async def main():
connString = "Driver={SQL Server Native Client 11.0};Server=myServer;Trusted_Connection=yes;"
myConn = pyodbc.connect(connString)
cursor = myConn.cursor()
df = pandas.read_sql('exec mySql_stored_proc',myConn)
if len(df.index) > 0:
tasks = [executePackage(df.iloc[i,10],df.iloc[i,11],df.iloc[i,7],df.iloc[i,8],df.iloc[i,9],df.iloc[i,5],df.iloc[i,17])for i in range(len(df))]
myLoop = asyncio.get_event_loop()
cors = asyncio.wait(tasks)
myLoop.run_until_complete(cors)
if __name__ =="__main__":
asyncio.run(main())

getting Table 'NM_TEMP_STAGING_1100952600' does not exist using aws glue and snowflake

I am using glue job to write data pipeline. I took code from community, which is as following
import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.dynamicframe import DynamicFrame
from awsglue.job import Job
from py4j.java_gateway import java_import
SNOWFLAKE_SOURCE_NAME = "net.snowflake.spark.snowflake"
#args = getResolvedOptions(sys.argv, ['JOB_NAME'])
args = getResolvedOptions(sys.argv, ['JOB_NAME', 'URL', 'ACCOUNT', 'WAREHOUSE', 'DB', 'SCHEMA', 'USERNAME', 'PASSWORD', 'ROLE'])
sparkContext = SparkContext()
glueContext = GlueContext(sparkContext)
sparkSession = glueContext.spark_session
glueJob = Job(glueContext)
glueJob.init(args['JOB_NAME'], args)
##Use the CData JDBC driver to read Snowflake data from the Products table into a DataFrame
##Note the populated JDBC URL and driver class name
java_import(sparkSession._jvm, SNOWFLAKE_SOURCE_NAME)
sparkSession._jvm.net.snowflake.spark.snowflake.SnowflakeConnectorUtils.enablePushdownSession(sparkSession._jvm.org.apache.spark.sql.SparkSession.builder().getOrCreate())
tmp_dir=args["TempDir"]
sfOptions = {
"sfURL" : args['URL'],
"sfAccount" : args['ACCOUNT'],
"sfUser" : args['USERNAME'],
"sfPassword" : args['PASSWORD'],
"sfDatabase" : args['DB'],
"sfSchema" : args['SCHEMA'],
"sfRole" : args['ROLE'],
"sfWarehouse" : args['WAREHOUSE'],
"preactions" : "USE DATABASE dev_lz;",
}
#"tempDir" : tmp_dir,
print('=========DB Connection details ================== ', sfOptions)
datasource0 = glueContext.create_dynamic_frame.from_catalog(database = "aws-nonprod-datalake-glue-catalog", table_name = "nm_s_amaster", transformation_ctx = "datasource0")
applymapping1 = ApplyMapping.apply(frame = datasource0, mappings = [ mappings], transformation_ctx = "applymapping1")
selectfields2 = SelectFields.apply(frame = applymapping1, paths = [columns], transformation_ctx = "selectfields2")
resolvechoice3 = ResolveChoice.apply(frame = selectfields2, choice = "MATCH_CATALOG", database = "aws-nonprod-datalake-glue-catalog", table_name = "NM_TEMP", transformation_ctx = "resolvechoice3")
resolvechoice4 = ResolveChoice.apply(frame = resolvechoice3, choice = "make_cols", transformation_ctx = "resolvechoice4")
##Convert DataFrames to AWS Glue's DynamicFrames Object
resolvechoice4.toDF().write.format(SNOWFLAKE_SOURCE_NAME).options(**sfOptions).option("preactions","USE DATABASE dev_lz").option("dbtable", "nm_temp").mode("overwrite").save()
glueJob.commit()
But after running code i am getting
net.snowflake.client.jdbc.SnowflakeSQLException: SQL compilation error: Table 'NM_TEMP_STAGING_1100952600' does not exist
please let me know if I am missing anything.
I have permission for create, select stage, create, select table and create future tables.
above code I have removed columns and mappings. but original code it is available.
resolvechoice4.toDF().write.format(SNOWFLAKE_SOURCE_NAME).options(**sfOptions).option("preactions","USE DATABASE dev_lz").option("dbtable", "nm_temp").mode("overwrite").save()
Added following in above dbtable option it started working,
.option("preactions","USE ROLE DEVELOPER;USE DATABASE dev_db;USE SCHEMA aws_test")
as following
resolvechoice4.toDF().write.format(SNOWFLAKE_SOURCE_NAME).options(**sfOptions).option("preactions","USE DATABASE dev_lz").option("preactions","USE ROLE DEVELOPER;USE DATABASE dev_db;USE SCHEMA aws_test").option("dbtable", "nm_temp").mode("overwrite").save()

Displaying SQLite data in a Tkinter GUI

So I have a very simply Tkinter GUI which takes an input parameter and inputs it into a SQLite database. I'm looking to create a secondary GUI which will extract this parameter from the SQLite database and display it on the secondary GUI. Can you please help on how to do this? Preferably I want to display this data from the DB on a text field or something of the like.
from Tkinter import *
from PIL import Image, ImageTk
import sqlite3
root = Tk()
root.wm_attributes('-fullscreen','true')
root.title("My Test GUI")
Fullname=StringVar()
conn = sqlite3.connect('Form.db')
cursor=conn.cursor()
def database():
name1=Fullname.get()
cursor.execute('CREATE TABLE IF NOT EXISTS Student (Fullname TEXT)')
cursor.execute('INSERT INTO Student (FullName) VALUES(?)',(name1,))
conn.commit()
def error():
root1 = Toplevel(root)
root1.geometry("150x90")
root1.title("Warning")
Label(root1, text = "All fields required", fg = "red").pack()
def read_from_db():
cursor.execute('SELECT * FROM Student')
data = cursor.fetchall()
print(data)
label_0 = Label(root, text="My Test GUI",width=20,font=("bold", 20))
label_0.place(x=650,y=53)
label_1 = Label(root, text="Name",width=20,font=("bold", 10))
label_1.place(x=550,y=130)
entry_1 = Entry(root,textvar=Fullname)
entry_1.place(x=700,y=130)
Button(root, text='Submit',width=20,bg='brown',fg='white', command=database).place(x=650,y=380)
root.mainloop()
read_from_db()
Within your read_from_db function, instead of printing the value of data you can make a label out of it:
def read_from_db():
cursor.execute("SELECT *, oid FROM Student")
data = c.fetchall()
showData = ''
for data in Student:
showData += str(data) + "\n"
dataLabel = Label(master, text=showData)
playerLabel.grid(row=0, column=0)
conn.commit()
conn.close()

WatsonApiException: Error: invalid-api-key, Code: 401

I cant find Alchemy Language API in IBM Watson.
Can I do this with natural-language-understanding service and how?
When I add
from watson_developer_cloud import NaturalLanguageUnderstandingV1
from watson_developer_cloud.natural_language_understanding_v1 \
import Features, EntitiesOptions, KeywordsOptions
It shows some error with combined keyword
# In[]:
import tweepy
import re
import time
import math
import pandas as pd
from watson_developer_cloud import AlchemyLanguageV1
def initAlchemy():
al = AlchemyLanguageV1(api_key='GRYVUMdBbOtJXxNOIs1aopjjaiyOmLG7xJBzkAnvvwLh')
return al
def initTwitterApi():
consumer_key = 'OmK1RrZCVJSRmKxIuQqkBExvw'
consumer_secret = 'VWn6OR4rRgSi7qGnZHCblJMhrSvj1QbJmf0f62uX6ZQWZUUx5q'
access_token = '4852231552-adGooMpTB3EJYPHvs6oGZ40qlo3d2JbVjqUUWkJ'
access_token_secret = 'm9hgeM9p0r1nn8IoQWJYBs5qUQu56XmrAhsDSYKjuiVA4'
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
return api
'''This function is implemented to handle tweepy exception errors
because search is rate limited at 180 queries per 15 minute window by twitter'''
def limit(cursor):
while True:
try:
yield cursor.next()
except tweepy.TweepError as error:
print(repr(error))
print("Twitter Request limit error reached sleeping for 15 minutes")
time.sleep(16*60)
except tweepy.RateLimitError:
print("Rate Limit Error occurred Sleeping for 16 minutes")
time.sleep(16*60)
def retrieveTweets(api, search, lim):
if(lim == ""):
lim = math.inf
else:
lim = int(lim)
text = []
for tweet in limit(tweepy.Cursor(api.search, q=search).items(limit = lim)):
t = re.sub('\s+', ' ', tweet.text)
text.append(t)
data = {"Tweet":text,
"Sentiment":"",
"Score":""}
dataFrame = pd.DataFrame(data, columns=["Tweet","Sentiment","Score"])
return dataFrame
def analyze(al,dataFrame):
sentiment = []
score = []
for i in range(0, dataFrame["Tweet"].__len__()):
res = al.combined(text=dataFrame["Tweet"][i],
extract="doc-sentiment",
sentiment=1)
sentiment.append(res["docSentiment"]["type"])
if(res["docSentiment"]["type"] == "neutral"):
score.append(0)
else:
score.append(res["docSentiment"]["score"])
dataFrame["Sentiment"] = sentiment
dataFrame["Score"] = score
return dataFrame
def main():
#Initialse Twitter Api
api = initTwitterApi()
#Retrieve tweets
dataFrame = retrieveTweets(api,input("Enter the search query (e.g. #hillaryclinton ) : "), input("Enter limit for number of tweets to be searched or else just hit enter : "))
#Initialise IBM Watson Alchemy Language Api
al = initAlchemy()
#Do Document Sentiment analysis
dataFrame = analyze(al, dataFrame)
#Save tweets, sentiment, and score data frame in csv file
dataFrame.to_csv(input("Enter the name of the file (with .csv extension) : "))
if __name__ == '__main__':
main()# -*- coding: utf-8 -*-
The Watson Natural Language Understanding only has a combined call, but since it is the only call, it isn't called combined, its actually analyze. Best place to go for details would be the API documentation - https://www.ibm.com/watson/developercloud/natural-language-understanding/api/v1/?python#post-analyze

ValueError: Unknown protobuf attr type <type 'datetime.date'>

Getting an error in executing the code. I have a datastore entity which has a property of type Date. An example date property value stored in an entity for a particular row is 2016-01-03 (19:00:00.000) EDT
The code i am executing is filtering the entity values based on date greater than 2016-01-01. Any idea what is wrong with the code
Error
ValueError: Unknown protobuf attr type <type 'datetime.date'>
Code
import pandas as pd
import numpy as np
from datetime import datetime
from google.cloud import datastore
from flask import Flask,Blueprint
app = Flask(__name__)
computation_cron= Blueprint('cron.stock_data_transformation', __name__)
#computation_cron.route('/cron/stock_data_transformation')
def cron():
ds = datastore.Client(project="earningspredictor-173913")
query = ds.query(kind='StockPrice')
query.add_filter('date', '>', datetime.strptime("2016-01-01", '%Y-%m-%d').date())
dataframe_data = []
temp_dict = {}
for q in query.fetch():
temp_dict["stock_code"] = q["stock_code"]
temp_dict["date"] = q["date"]
temp_dict["ex_dividend"] = q["ex_dividend"]
temp_dict["split_ratio"] = q["split_ratio"]
temp_dict["adj_open"] = q["adj_open"]
temp_dict["adj_high"] = q["adj_high"]
temp_dict["adj_low"] = q["adj_low"]
temp_dict["adj_close"] = q["adj_close"]
temp_dict["adj_volume"] = q["adj_volume"]
dataframe_data.append(temp_dict)
sph = pd.DataFrame(data=dataframe_data,columns=temp_dict.keys())
# print sph.to_string()
query = ds.query(kind='EarningsSurprise')
query.add_filter('act_rpt_date', '>', datetime.strptime("2016-01-01", '%Y-%m-%d').date())
dataframe_data = []
temp_dict = {}
for q in query.fetch():
temp_dict["stock_code"] = q["stock_code"]
temp_dict["eps_amount_diff"] = q["eps_amount_diff"]
temp_dict["eps_actual"] = q["eps_actual"]
temp_dict["act_rpt_date"] = q["act_rpt_date"]
temp_dict["act_rpt_code"] = q["act_rpt_code"]
temp_dict["eps_percent_diff"] = q["eps_percent_diff"]
dataframe_data.append(temp_dict)
es = pd.DataFrame(data=dataframe_data,columns=temp_dict.keys())
You seem to be using the generic google-cloud-datastore client library, not the NDB Client Library.
For google-cloud-datastore all date and/or time properties have the same format. From Date and time:
JSON
field name: timestampValue
type: string (RFC 3339 formatted, with milliseconds, for instance 2013-05-14T00:01:00.234Z)
Protocol buffer
field name: timestamp_value
type: Timestamp
Sort order: Chronological
Notes: When stored in Cloud Datastore, precise only to microseconds; any additional precision is rounded down.
So when setting/comparing such properties try to use strings formatted as specified (or integers for protobuf Timestamp?), not directly objects from the datetime modules (which work with the NDB library). The same might be true for queries as well.
Note: this is based on documentation only, I didn't use the generic library myself.

Resources