FastAPI with MQTT client deployed to GAE not receive any message after running for a while - google-app-engine

I built a FastAPI app to forward MQTT message using paho-mqtt lib from broker to another REST api service.
After deploying the app to google app engine, everything works fine (can normally send/receive message) at the beginning.
But about hours later, my app can still publish messages but not receiving any message.
app.yaml
runtime: python310
entrypoint: gunicorn -b :$PORT -w 1 -k uvicorn.workers.UvicornWorker main:app
...
instance_class: F1
inbound_services:
- warmup
automatic_scaling:
min_instances: 1
max_instances: 1
min_idle_instances: 1
mqtt client wrapper
class MqttClient:
_instance = None
_lock = threading.Lock()
_on_msg_callback: Set[Callable] = set()
_subscribe_topics: Set[str] = set()
_mqtt_client = None
def __new__(cls, *args, **kwargs):
if cls._instance is None:
with cls._lock:
if not cls._instance:
cls._instance = super().__new__(cls)
return cls._instance
def init(self, config: MqttConfig):
broker_host = config.broker_host
random_id = ''.join(random.choices(string.digits, k=6))
client_id = f'mqtt-forwarder-{random_id}'
port = config.port
keepalive = config.keepalive
client = mqtt.Client(client_id=client_id, userdata=self)
client.username_pw_set(
username=config.username,
password=config.password
)
context = ssl.SSLContext(protocol=ssl.PROTOCOL_TLSv1_2)
client.tls_set_context(context)
# set callbacks
client.on_log = self._on_log
client.on_connect = self._on_connect
client.on_disconnect = self._on_disconnect
client.on_publish = self._on_publish
client.on_subscribe = self._on_subscribe
client.on_message = self._on_message
client.connect(broker_host, port=port, keepalive=keepalive)
client.loop_start()
self._mqtt_client = client
main.py
import...
app = FastAPI()
mqtt_client = MqttClient()
mqtt_client.init(
MqttConfig(
broker_host='...',
port=8883,
keepalive=60,
username=...,
password=...
))
mqtt_client.subscribe('...')
mqtt_client.subscribe('...')
mqtt_client.register_on_message_callback(mqtt_cb.mqtt_on_msg_callback)
#app.get('/_ah/warmup')
def warmup():
return Response('', status_code=status.HTTP_200_OK)
After checking the GAE logging, I found a process is terminated (pid1, 16:23:44.397).
I am not sure if this caused MQTT client receiving no message, and curious about why the process was terminated.
GAE log
My main Question:
How can I fix this issue and keep receiving MQTT messages?
Other Question:
Is it a good idea to deploy a GAE FastAPI to forward MQTT messages to my other services? Any suggestion is appreciated.
Thank you.

Related

Unble to open multiple simultaneous websockets to the same endpoint in case where the consumer is busy for the first websocket

I am using Django channels with React websockets. I am unable to open multiple simulataneous websockets if the consumer of the first websocket channel is busy with some activity. I want to open multiple simultaneous websockets where consumers is doing something for each individual websocket.
The module versions are:
asgiref 3.6.0
daphne 3.0.2
django-cors-headers 3.13.0
djangorestframework 3.14.0
djangorestframework-simplejwt 5.2.2
From the snippet below, once one websocket completes the task the second websocket can be connected (in a separate tab) but while 1st websocket is busy (sleeping), the other websockets in the new browser tabs fails after handshake with the following error:
**
WebSocket HANDSHAKING /ws/socket-server/monitor-rack-kpis/ushoeuhrti/ [127.0.0.1:49228]
django.channels.server INFO WebSocket HANDSHAKING /ws/socket-server/monitor-rack-kpis/ushoeuhrti/ [127.0.0.1:49228]
daphne.http_protocol DEBUG Upgraded connection ['127.0.0.1', 49228] to WebSocket
daphne.ws_protocol DEBUG WebSocket closed for ['127.0.0.1', 49228]
WebSocket DISCONNECT /ws/socket-server/monitor-rack-kpis/ushoeuhrti/ [127.0.0.1:49228]
django.channels.server INFO WebSocket DISCONNECT /ws/socket-server/monitor-rack-kpis/ushoeuhrti/ [127.0.0.1:49228]
daphne.server WARNING Application instance <Task pending name='Task-56' coro=<StaticFilesWrapper.__call__() running at /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/channels/staticfiles.py:44> wait_for=<Future pending cb=[_chain_future.<locals>._call_check_cancel() at /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/asyncio/futures.py:387, Task.task_wakeup()]>> for connection <WebSocketProtocol client=['127.0.0.1', 49216] path=b'/ws/socket-server/monitor-rack-kpis/vhofoefwmr/'> took too long to shut down and was killed.
**
I tried using url in routing instead of re_path but that didn't help either
settings.py
`
ASGI_APPLICATION = 'backend.asgi.application'
CHANNEL_LAYERS={'default':{'BACKEND':'channels.layers.InMemoryChannelLayer'}}
`
asgi.py
application = ProtocolTypeRouter({
'http':get_asgi_application(),
'websocket':AuthMiddlewareStack(
URLRouter(logstatus.routing.websocket_urlpatterns)
)
})
routing.py
websocket_urlpatterns=[
re_path(r'^ws/socket-server/monitor-rack-kpis/(?P<username>[A-Za-z]+)/', consumers.InfluxWritePromesthusConsumer.as_asgi())
]
consumer.py
class InfluxWritePromesthusConsumer(WebsocketConsumer):
def ws_connect(message):
message.reply_channel.send({"accept": True})
def receive(self, text_data):
print(f"\nReceiving from: {self.channel_name}\n")
t = 0
while t<=100:
self.send(text_data=json.dumps({
'type':"LearnSocket",
'message': "Received Messages"
}))
t += 10
time.sleep(10)
Frontend- React.JS
const randN = generate()
console.log(randN)
const socket = new WebSocket('ws://127.0.0.1:8000/ws/socket-server/monitor-rack-kpis/'+ randN+ '/');
console.log(socket)
socket.onopen=function(e){
console.log(socket.readyState)
if (user && user.access) {
socket.send("Hi");
//Receiving response from WebSocket
socket.onmessage=function(e){
//let data1=JSON.parse(e.data);
console.log(e.data, new Date());
//setOutputResponse(data1['message']);
}
}
}

GCP Terraform app_engine_standard_app_version internal error

I have a GCP project utilizing a variety of services: GC Functions, Firestore, VPC Connector, Storage Buckets, App Engine, Compute Engine. I use Terraform for deployment.
I decided to add a message queue using Google Tasks Queue, and run it on App Engine.
There is a nodejs handler function I put into an archive_file before attempting to deploy it to App Engine. I do this for my GC Function and it works great.
From my research it looks like I need Terraform resource app_engine_standard_app_version or app_engine_flexible_app_version to run my handler function on my App Engine.
When I try to create this resource via terraform apply I get an error. Neither flexible or standard works.
Code
locals {
timestamp = formatdate("YYMMDDhhmmss", timestamp())
}
resource "google_cloud_tasks_queue" "task-queue-b" {
name = "task-queue-b"
location = "us-east1"
rate_limits {
max_concurrent_dispatches = 3
max_dispatches_per_second = 2
}
retry_config {
max_attempts = 5
max_retry_duration = "4s"
max_backoff = "3s"
min_backoff = "2s"
max_doublings = 1
}
}
# Compress source code and make archive file
data "archive_file" "task-queue-source" {
type = "zip"
output_path = "/tmp/task-queue-${local.timestamp}.zip"
source_dir = "../../../../queue/build/src" # index.js is here with handler func
}
# Create bucket that will host the source code
resource "google_storage_bucket" "task-queue-bucket" {
name = "${var.project}-task-queue"
}
# Add source code zip to bucket
resource "google_storage_bucket_object" "task-queue-zip" {
# Append file MD5 to force bucket to be recreated
name = "task-queue-source.zip#${data.archive_file.task-queue-source.output_md5}"
bucket = google_storage_bucket.task-queue-bucket.name
source = data.archive_file.task-queue-source.output_path
depends_on = [data.archive_file.task-queue-source]
}
resource "google_app_engine_flexible_app_version" "task-queue-flexible-v1" {
project = var.project
service = "default"
version_id = "v1"
runtime = "nodejs14"
entrypoint {
shell = "node ./index.js"
}
deployment {
zip {
source_url = "https://storage.googleapis.com/task-queue/${google_storage_bucket.task-queue-bucket.name}/${google_storage_bucket_object.task-queue-zip.name}"
}
}
liveness_check {
path = "."
}
readiness_check {
path = "."
}
automatic_scaling {
cool_down_period = "120s"
cpu_utilization {
target_utilization = 0.5
}
}
delete_service_on_destroy = true
}
I receive the following errorr:
First issue: I don't think this is related...
2021-10-10T12:37:50.296-0400 [WARN] Provider "provider[\"registry.terraform.io/hashicorp/google\"]" produced an unexpected new value for module.analytics.google_cloudfunctions_function.analytics, but we are tolerating it because it is using the legacy plugin SDK.
The following problems may be the cause of any confusing errors from downstream operations:
- .source_archive_object: was cty.StringVal("source.zip#d8d06f1045f9387d72429479c37eb6b3"), but now cty.StringVal("source.zip")
module.analytics.google_cloudfunctions_function.analytics: Modifications complete after 2m16s [id=projects/<proj>/locations/us-east1/functions/analytics]
Main error I can not for the life of me figure out:
╷
│ Error: Error waiting to create FlexibleAppVersion: Error waiting for Creating FlexibleAppVersion: Error code 13, message: An internal error occurred.
│
│ with module.task-queue.google_app_engine_flexible_app_version.task-queue-flexible-v1,
│ on ..\..\modules\taskqueue\main.tf line 74, in resource "google_app_engine_flexible_app_version" "task-queue-flexible-v1":
│ 74: resource "google_app_engine_flexible_app_version" "task-queue-flexible-v1" {
│
╵
2021-10-10T12:37:50.302-0400 [DEBUG] provider.stdio: received EOF, stopping recv loop: err="rpc error: code = Unavailable desc = transport is closing"
Questions
Am I correct with my chosen resources to run my Task Queue handler function (nodejs express app)? Are there any resources I am missing? Do I need an actual google_app_engine_application TFE resource?
Any insight into this unhelpful error?
Thanks so much

What happens to an App Engine request if autoscaling can't create a new instance?

Because of the instance limit. So there is a request, it sits in the queue long enough, but App Engine autoscaling can't start a new instance.
What happens to this request? Is it kept in the queue indefinitely or is it aborted after some time?
It returns a message "Rate exceeded." to the user and the following error in the logs "Request was aborted after waiting too long to attempt to service your request."
Here's how I tested it:
I created a class to count the time elapsed to make sure that I am indeed executing multiple concurrent requests. And a basic Python app that has a sleep function for 20 seconds.
Then in app.yaml I set the max-instances to 1, and max-concurrent requests to 1.
Then by simply opening 5 tabs with the app URL and running them at the same time, at least one of them will fail with the errors mentioned above.
Tested on GAE Standard
timer.py:
import time
class TimerError(Exception):
"""A custom exception used to report errors in use of Timer class"""
class Timer:
def __init__(self):
self._start_time = None
def start(self):
"""Start a new timer"""
if self._start_time is not None:
raise TimerError(f"Timer is running. Use .stop() to stop it")
self._start_time = time.perf_counter()
def stop(self):
"""Stop the timer, and report the elapsed time"""
if self._start_time is None:
raise TimerError(f"Timer is not running. Use .start() to start it")
elapsed_time = time.perf_counter() - self._start_time
self._start_time = None
print(f"Elapsed time: {elapsed_time:0.4f} seconds")
main.py:
from flask import Flask
app = Flask(__name__)
#app.route('/')
def hello():
import time
from timer import Timer
t = Timer()
t.start()
print('Started')
time.sleep(20)
t.stop()
return 'Hello World!'
if __name__ == '__main__':
requirements.txt:
Flask==1.1.2
codetiming
app.yaml:
service: scaling
runtime: python37
instance_class: F1
automatic_scaling:
target_cpu_utilization: 0.65
min_instances: 1
max_instances: 1
min_pending_latency: 30ms # default value
max_pending_latency: automatic
max_concurrent_requests: 1
Deploy:
gcloud app deploy
Then: Open 5 tabs with the link of the deployed app at the same time.
Results:
User gets: "Rate exceeded."
GAE logs show: ERROR "Request was aborted after waiting too long to attempt to service your request."

AppEngine python's send_email not working anymore

My appengine app has a cron job that calls a url endpoint whose handler uses mail.send_mail from google.appengine.api. This has been working fine for several months so far.
Today, the email never arrived. I wrote some test code to invoke send_mail, but the email does not get sent. I have adhered to the necesarry requirements like sending from a email address of the form anything#appname.appspotmail.com.
The function is not throwing any exception either. The appengine logs note that the url is invoked, but there is no error or exception.
What might be the problem? Thanks.
Editing to add some code as suggested. Note that to actually test this code one'd need an AppEngine App. In that case you'd need to change myApp etc. in the code below to the actual app name that is used.
Looking forward to any help/insights.
from google.appengine.api import mail
class TestEmailHandler(webapp2.RequestHandler):
def get(self):
mySender = "mySender <mySender#myApp.appspotmail.com>"
myTo = "myToAddress#example.com"
mySubject = "Test Subject"
myBody = "Test Body Text"
myHtml = "<html><body>Test body</body></html>"
try:
mail.send_mail(sender=mySender,
to=myTo,
subject=mySubject,
body=myBody,
html=myHtml)
self.response.headers['Content-Type'] = 'text/plain'
self.response.write("Sent email. Body: " + myBody)
except:
self.response.write("Exception. " + sys.exc_info()[0])
application = webapp2.WSGIApplication([
('/', MainPage),
('/test_email', TestEmailHandler)
], debug=True)
My app.yaml looks like this:
application: myApp
version: 2
runtime: python27
api_version: 1
threadsafe: true
handlers:
- url: /.*
script: myApp.application
inbound_services:
- mail
I think the problem's now fixed. I associated my credit card by enabling billing and now an email got sent when I tested it. Who would've thought...

Writing files to Dropbox account from GAE

I am trying to create files in a Dropbox.com folder from a GAE application.
I have done all the steps the register a Dropbox application and installed the Python SDK from Dropbox locally on my development machine. (see dropbox.com API).
It all works perfectly when I use the cli_client.py test script in the dropbox SDK on my local machine to access dropbox - can 'put' files etc.
I now want to start working in GAE environment, so things get a bit tricky.
Some help would be useful.
For those familiar with the Dropbox API code, I had the following issues thus far:
Issue 1
The rest.py Dropbox API module uses pkg_resources to get the certs installed in site-packages of a local machine installation.
I replaced
TRUSTED_CERT_FILE = pkg_resources.resource_filename(__name__, 'trusted-certs.crt')
with
TRUSTED_CERT_FILE = file('trusted-certs.crt')
and placed the cert file in my GAE application directory. Perhaps this is not quite right; see my authentication error code below.
Issue 2
The session.py Dropbox API module uses oauth module, so I changed the include to appengine oauth.
But raised an exception that GAE's oauth does not have OAuthConsumer method used by the Dropbox session.py module. So i downloaded oauth 1.0 and added to my application an now import this instead of GAE oauth.
Issue 3
GAE ssl module does not seem to have CERT_REQUIRED property.
This is a constant, so I changed
self.cert_reqs = ssl.CERT_REQUIRED
to
self.cert_reqs = 2
This is used when calling
ssl.wrap_socket(sock, cert_reqs=self.cert_reqs, ca_certs=self.ca_certs)
Authentication Error
But I still can't connect to Dropbox:
Status: 401
Reason: Unauthorized
Body: {"error": "Authentication failed"}
Headers: [('date', 'Sun, 19 Feb 2012 15:11:12 GMT'), ('transfer-encoding', 'chunked'), ('connection', 'keep-alive'), ('content-type', 'application/json'), ('server', 'dbws')]
Here's my patched version of Dropbox Python SDK 1.4 which works well for me with Python 2.7 GAE: dropbox_python_sdk_gae_patched.7z.base64. No extra third-party libraries needed, only those provided by GAE environment.
Only file uploading (put_file) is tested. Here're setup steps:
Unpack archive to the root folder of GAE application (if main app is in the root folder). You can decode BASE64 using Base64 Encoder/Decoder: base64.exe -d dropbox_python_sdk_gae_patched.7z.base64 dropbox_python_sdk_gae_patched.7z.
Setup APP_KEY, APP_SECRET, ACCESS_TYPE, ACCESS_TOKEN_KEY, ACCESS_TOKEN_SECRET. First three are configured at dropbox application creation time. Last two are obtained when granting application access to specific dropbox account, you can get them through cli_client.py (from DB Python SDK) from token_store.txt file.
Use in the code like this:
import dropbox
# ...
def DropboxUpload(path, data):
sess = dropbox.session.DropboxSession(APP_KEY, APP_SECRET, ACCESS_TYPE)
sess.set_token(ACCESS_TOKEN_KEY, ACCESS_TOKEN_SECRET)
cli = dropbox.client.DropboxClient(sess)
data_file = StringIO.StringIO(data)
return cli.put_file(path, data_file)
# ...
import json
class DropboxUploadHandlerExample(webapp2.RequestHandler):
def get(self):
url = "http://www.google.com/"
result = urlfetch.fetch(url)
self.response.headers['Content-Type'] = 'application/json'
self.response.out.write(json.dumps(DropboxUpload('/fetch_result.dat', result.content)))
I successfully uploaded from Google Appengine to Dropbox with my own patched version
of the Dropbox SDK: https://github.com/cklein/dropbox-client-python
The usage of urllib2 was replaced by huTools.http: https://github.com/hudora/huTools/
This is the code that is called in a request handler:
db_client = dropbox.get_dropbox_client(consumer_key='', consumer_secret='', access_token_key='', access_token_secret='')
fileobj = StringIO.StringIO(data)
path = '/some/path/filename'
resp = db_client.put_file(path, fileobj)
fileobj.close()
As of April 2016, none of the other suggestions work. (Dropbox API version 2, Python SDK version 6.2).
If you only need a few of the SDK functions, I found it easiest to just use the HTTP API directly:
def files_upload(f, path, mode='add', autorename=False, mute=False):
args = {
'path': path,
'mode': mode,
'autorename': autorename,
'mute': mute,
}
headers = {
'Authorization': 'Bearer {}'.format(ACCESS_TOKEN),
'Dropbox-API-Arg': json.dumps(args),
'Content-Type': 'application/octet-stream',
}
request = urllib2.Request('https://content.dropboxapi.com/2/files/upload', f, headers=headers)
r = urllib2.urlopen(request)
I have patched the Dropbox Python SDK version 2.2 to work on Google App Engine. Please find the relevant code here:
https://github.com/duncanhawthorne/gae-dropbox-python
The relevant code patch (copied from github) for rest.py is here:
import io
import pkg_resources
-import socket
+#import socket
import ssl
import sys
import urllib
+import urllib2
+def mock_urlopen(method,url,body,headers,preload_content):
+ request = urllib2.Request(url, body, headers=headers)
+ r = urllib2.urlopen(request)
+ return r
+
try:
import json
except ImportError:
## -23,7 +29,10 ##
SDK_VERSION = "2.2.0"
-TRUSTED_CERT_FILE = pkg_resources.resource_filename(__name__, 'trusted-certs.crt')
+try:
+ TRUSTED_CERT_FILE = pkg_resources.resource_filename(__name__, 'trusted-certs.crt')
+except:
+ TRUSTED_CERT_FILE = file('trusted-certs.crt')
class RESTResponse(io.IOBase):
## -125,6 +134,7 ## def flush(self):
pass
def create_connection(address):
+ return
host, port = address
err = None
for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
## -152,7 +162,7 ## def json_loadb(data):
class RESTClientObject(object):
- def __init__(self, max_reusable_connections=8, mock_urlopen=None):
+ def __init__(self, max_reusable_connections=8, mock_urlopen=mock_urlopen):
"""
Parameters
max_reusable_connections
## -206,7 +216,7 ## def request(self, method, url, post_params=None, body=None, headers=None, raw_re
raise ValueError("headers should not contain newlines (%s: %s)" %
(key, value))
- try:
+ if True:
# Grab a connection from the pool to make the request.
# We return it to the pool when caller close() the response
urlopen = self.mock_urlopen if self.mock_urlopen else self.pool_manager.urlopen
## -217,14 +227,14 ## def request(self, method, url, post_params=None, body=None, headers=None, raw_re
headers=headers,
preload_content=False
)
- r = RESTResponse(r) # wrap up the urllib3 response before proceeding
- except socket.error as e:
- raise RESTSocketError(url, e)
- except urllib3.exceptions.SSLError as e:
- raise RESTSocketError(url, "SSL certificate error: %s" % e)
+ #r = RESTResponse(r) # wrap up the urllib3 response before proceeding
+ #except socket.error as e:
+ # raise RESTSocketError(url, e)
+ #except urllib3.exceptions.SSLError as e:
+ # raise RESTSocketError(url, "SSL certificate error: %s" % e)
- if r.status not in (200, 206):
- raise ErrorResponse(r, r.read())
+ #if r.status not in (200, 206):
+ # raise ErrorResponse(r, r.read())
return self.process_response(r, raw_response)
## -321,10 +331,11 ## def PUT(cls, *n, **kw):
return cls.IMPL.PUT(*n, **kw)
-class RESTSocketError(socket.error):
+class RESTSocketError():
"""A light wrapper for ``socket.error`` that adds some more information."""
def __init__(self, host, e):
+ return
msg = "Error connecting to \"%s\": %s" % (host, str(e))
socket.error.__init__(self, msg)

Resources