Predicting locally with a model trained on Sagemaker - amazon-sagemaker

I have trained a model on AWS SageMaker by using the built-in algorithm Semantic Segmentation. This trained model named as model.tar.gz is stored on S3. So I want to download this file from S3 and then use it to make inference on my local PC without using AWS SageMaker.
Here are the three files:
hyperparams.json: includes the parameters for network architecture, data inputs, and training. Refer to Semantic Segmentation Hyperparameters.
model_algo-1
model_best.params
My code:
import mxnet as mx
from mxnet import image
from gluoncv.data.transforms.presets.segmentation import test_transform
import gluoncv
img = image.imread('./bdd100k/validation/14df900d-c5c145cb.jpg')
img = test_transform(img, ctx)
img = img.astype('float32')
model = gluoncv.model_zoo.PSPNet(2)
# load the trained model
model.load_parameters('./model/model_best.params')
Error:
AssertionError: Parameter 'head.psp.conv1.0.weight' is missing in file './model/model_best.params', which contains parameters: 'layer3.2.bn3.beta', 'layer3.0.conv3.weight', 'conv1.1.running_var', ..., 'layer2.2.bn3.running_mean', 'layer3.4.bn2.running_mean', 'layer4.2.bn3.beta', 'layer3.4.bn3.beta'. Set allow_missing=True to ignore missing parameters.

The following should work after extracting model_algo-1 from the tar.gz file. This will run on local ctx.
import gluoncv
from gluoncv import model_zoo
from gluoncv.data.transforms.presets.segmentation import test_transform
model = model_zoo.DeepLabV3(nclass=2, backbone='resnet50',
pretrained_base=False, height=800, width=1280, crop_size=240)
model.load_parameters("model_algo-1")
img = test_transform(img, ctx)
img = img.astype('float32')
output = model.predict(img)
print(output.shape)
max_predict = mx.nd.squeeze(mx.nd.argmax(output, 1)).asnumpy()
print(max_predict.shape)
prob_mask = mx.nd.squeeze(output).asnumpy()
def NormalizeData(data):
return (data - np.min(data)) / (np.max(data) - np.min(data))
target_cls_id = 1
prob_mat = prob_mask[target_cls_id, :, :]
norm_prob = NormalizeData(prob_mat)
plt.hist(norm_prob.flatten(), bins=50)

Related

sagemaker inference container ModuleNotFoundError: No module named 'model_handler'

I am trying to deploy a model using my own custom inference container on sagemaker. I am following the documentation here https://docs.aws.amazon.com/sagemaker/latest/dg/adapt-inference-container.html
I have an entrypoint file:
from sagemaker_inference import model_server
#HANDLER_SERVICE = "/home/model-server/model_handler.py:handle"
HANDLER_SERVICE = "model_handler.py"
model_server.start_model_server(handler_service=HANDLER_SERVICE)
I have a model_handler.py file:
from sagemaker_inference.default_handler_service import DefaultHandlerService
from sagemaker_inference.transformer import Transformer
from CustomHandler import CustomHandler
class ModelHandler(DefaultHandlerService):
def __init__(self):
transformer = Transformer(default_inference_handler=CustomHandler())
super(HandlerService, self).__init__(transformer=transformer)
And I have my CustomHandler.py file:
import os
import json
import pandas as pd
from joblib import dump, load
from sagemaker_inference import default_inference_handler, decoder, encoder, errors, utils, content_types
class CustomHandler(default_inference_handler.DefaultInferenceHandler):
def model_fn(self, model_dir: str) -> str:
clf = load(os.path.join(model_dir, "model.joblib"))
return clf
def input_fn(self, request_body: str, content_type: str) -> pd.DataFrame:
if content_type == "application/json":
items = json.loads(request_body)
for item in items:
processed_item1 = process_item1(items["item1"])
processed_item2 = process_item2(items["item2])
all_item1 += [processed_item1]
all_item2 += [processed_item2]
return pd.DataFrame({"item1": all_item1, "comments": all_item2})
def predict_fn(self, input_data, model):
return model.predict(input_data)
Once I deploy the model to an endpoint with these files in the image, I get the following error: ml.mms.wlm.WorkerLifeCycle - ModuleNotFoundError: No module named 'model_handler'.
I am really stuck what to do here. I wish there was an example of how to do this in the above way end to end but I don't think there is. Thanks!
This is because of the path mismatch. The entrypoint is trying to look for "model_handler.py" in WORKDIR directory of the container.
To avoid this, always specify absolute path when working with containers.
Moreover your code looks confusing. Please use this sample code as the reference:
import subprocess
from subprocess import CalledProcessError
import model_handler
from retrying import retry
from sagemaker_inference import model_server
import os
def _retry_if_error(exception):
return isinstance(exception, CalledProcessError or OSError)
#retry(stop_max_delay=1000 * 50, retry_on_exception=_retry_if_error)
def _start_mms():
# by default the number of workers per model is 1, but we can configure it through the
# environment variable below if desired.
# os.environ['SAGEMAKER_MODEL_SERVER_WORKERS'] = '2'
print("Starting MMS -> running ", model_handler.__file__)
model_server.start_model_server(handler_service=model_handler.__file__ + ":handle")
def main():
_start_mms()
# prevent docker exit
subprocess.call(["tail", "-f", "/dev/null"])
main()
Further, notice this line - model_server.start_model_server(handler_service=model_handler.__file__ + ":handle")
Here we are starting the server, and telling it to call handle() function in model_handler.py to invoke your custom logic for all incoming requests.
Also remember that Sagemaker BYOC requires model_handler.py to implement another function ping()
So your "model_handler.py" should look like this -
custom_handler = CustomHandler()
# define your own health check for the model over here
def ping():
return "healthy"
def handle(request, context): # context is necessary input otherwise Sagemaker will throw exception
if request is None:
return "SOME DEFAULT OUTPUT"
try:
response = custom_handler.predict_fn(request)
return [response] # Response must be a list otherwise Sagemaker will throw exception
except Exception as e:
logger.error('Prediction failed for request: {}. \n'
.format(request) + 'Error trace :: {} \n'.format(str(e)))

Django with postgres backend attribute error

SOLVED
Currently I am working on my first django project. The DB is modeling the structure of an Abaqus input file. Here is the Code
from django.db import models as m
import django.contrib.postgres as pg
class node(m.Model):
inputfile = m.CharField(max_length = 255)
source_id = m.IntegerField()
source_sim = m.CharField(max_length = 255)
coordinates = pg.fields.ArrayField(m.FloatField(), size = 3)
When I call manage.py makemigrations (Or just python) it gives me the error message:
AttributeError: module 'django.contrib.postgres' has no attribute 'fields'
When I import ArrayField in a testscript, it works:
from django.contrib.postgres.fields import ArrayField
from django.db import models as m
a = ArrayField(m.FloatField(), size=3)
print(a)
>>><django.contrib.postgres.fields.array.ArrayField>
I was able to migrate my classes into a TestDB without the ArrayField.
My Python version 3.7.1, my Django version is 2.1.3
What's my mistake?
edit: Style&formatting. Thanks to suggestions.
Edit: Solved, but cant find how to flag that

Upload to Amazon S3 using Boto3 and return public url

Iam trying to upload files to s3 using Boto3 and make that uploaded file public and return it as a url.
class UtilResource(BaseZMPResource):
class Meta(BaseZMPResource.Meta):
queryset = Configuration.objects.none()
resource_name = 'util_resource'
allowed_methods = ['get']
def post_list(self, request, **kwargs):
fileToUpload = request.FILES
# write code to upload to amazone s3
# see: https://boto3.readthedocs.org/en/latest/reference/services/s3.html
self.session = Session(aws_access_key_id=settings.AWS_KEY_ID,
aws_secret_access_key=settings.AWS_ACCESS_KEY,
region_name=settings.AWS_REGION)
client = self.session.client('s3')
client.upload_file('zango-static','fileToUpload')
url = "some/test/url"
return self.create_response(request, {
'url': url // return's public url of uploaded file
})
I searched whole documentation I couldn't find any links which describes how to do this can someone explain or provide any resource where I can find the soultion?
I'm in the same situation.
Not able to find anything in the Boto3 docs beyond generate_presigned_url which is not what I need in my case since I have public readable S3 Objects.
The best I came up with is:
bucket_location = boto3.client('s3').get_bucket_location(Bucket=s3_bucket_name)
object_url = "https://s3-{0}.amazonaws.com/{1}/{2}".format(
bucket_location['LocationConstraint'],
s3_bucket_name,
key_name)
You might try posting on the boto3 github issues list for a better solution.
I had the same issue.
Assuming you know the bucket name where you want to store your data, you can then use the following:
import boto3
from boto3.s3.transfer import S3Transfer
credentials = {
'aws_access_key_id': aws_access_key_id,
'aws_secret_access_key': aws_secret_access_key
}
client = boto3.client('s3', 'us-west-2', **credentials)
transfer = S3Transfer(client)
transfer.upload_file('/tmp/myfile', bucket, key,
extra_args={'ACL': 'public-read'})
file_url = '%s/%s/%s' % (client.meta.endpoint_url, bucket, key)
The best solution I found is still to use the generate_presigned_url, just that the Client.Config.signature_version needs to be set to botocore.UNSIGNED.
The following returns the public link without the signing stuff.
config = Config(signature_version=botocore.UNSIGNED)
config.signature_version = botocore.UNSIGNED
boto3.client('s3', config=config).generate_presigned_url('get_object', ExpiresIn=0, Params={'Bucket': bucket, 'Key': key})
The relevant discussions on the boto3 repository are:
https://github.com/boto/boto3/issues/110
https://github.com/boto/boto3/issues/169
https://github.com/boto/boto3/issues/1415
Somebody who wants to build up a direct URL for the public accessible object to avoid using generate_presigned_url for some reason.
Please build URL with urllib.parse.quote_plus considering whitespace and special character issue.
My object key: 2018-11-26 16:34:48.351890+09:00.jpg
please note whitespace and ':'
S3 public link in aws console: https://s3.my_region.amazonaws.com/my_bucket_name/2018-11-26+16%3A34%3A48.351890%2B09%3A00.jpg
Below code was OK for me
import boto3
s3_client = boto3.client
bucket_location = s3_client.get_bucket_location(Bucket='my_bucket_name')
url = "https://s3.{0}.amazonaws.com/{1}/{2}".format(bucket_location['LocationConstraint'], 'my_bucket_name', quote_plus('2018-11-26 16:34:48.351890+09:00.jpg')
print(url)
Going through the existing answers and their comments, I did the following and works well for special cases of file names like having whitespaces, having special characters (ASCII), corner cases. E.g. file names of the form: "key=value.txt"
import boto3
import botocore
config = botocore.client.Config(signature_version=botocore.UNSIGNED)
object_url = boto3.client('s3', config=config).generate_presigned_url('get_object', ExpiresIn=0, Params={'Bucket': s3_bucket_name, 'Key': key_name})
print(object_url)
For Django, if you use Django storages with boto3 the code below does exactly what you want:
default_storage.url(name=f.name)
I used an f-string for the same
import boto3
#s3_client = boto3.session.Session(profile_name='sssss').client('s3')
s3_client=boto3.client('s3')
s3_bucket_name = 'xxxxx'
s3_website_URL= f"http://{s3_bucket_name}.s3-website.{s3_client.get_bucket_location(Bucket=s3_bucket_name)['LocationConstraint']}.amazonaws.com"

Loading a blob(Google app Engine) into PIL or NumPy

I'd like to be able to load a blob(image into the Python Image Processing Library or into a numpy array for analysis(such as mean, median, standard deviation) without using the serving url.
Here is my image database
the t_image_url contains the serving url for the blob
from google.appengine.ext import db, blobstore
class ImageModel(db.Model):
t_image = blobstore.BlobReferenceProperty(required=True)
t_imageUrl = db.StringProperty(required = True)
here is a segment of what I tried
import numpy as np
import Image
import ImageOps
class ImageAnalysisHandler(BaseHandler):
def get(self, imageModel_id):
if self.user:
i = ImageModel.get_by_id(int(imageModel_id))
OpenedImage = Image.open(i.t_image)
self.render('imageAnalysis.html', imageD = i)
else:
self.redirect('login')
This obviously didn't work since the Image Module(from the Python Imaging Library) doesn't know how to read blobs. I was wondering if anyone knew how to read in a blob into PIL or a numpy array accurately.
Take a look at the BlobReader class. It let you read a file store in blobstore with a file-like interface.

<class 'google.appengine.runtime.DeadlineExceededError'>: how to get around?

Ok guys I am having tons of problems getting my working dev server to a working production server :). I have a task that will go through and request urls and collect and update data. It takes 30 minutes to run.
I uploaded to production server and going to the url with its corresponding .py script appname.appspot.com/tasks/rrs after 30 seconds I am getting the class google.appengine.runtime.DeadlineExceededError' Is there any way to get around this? Is this a 30 second deadline for a page? This script works fine in development server I go to the url and the associate .py script runs until completion.
import time
import random
import string
import cPickle
from StringIO import StringIO
try:
import json
except ImportError:
import simplejson as json
import urllib
import pprint
import datetime
import sys
sys.path.append("C:\Program Files (x86)\Google\google_appengine")
sys.path.append("C:\Program Files (x86)\Google\google_appengine\lib\yaml\lib")
sys.path.append("C:\Program Files (x86)\Google\google_appengine\lib\webob")
from google.appengine.api import users
from google.appengine.ext import webapp
from google.appengine.ext.webapp.util import run_wsgi_app
from google.appengine.ext import db
class SR(db.Model):
name = db.StringProperty()
title = db.StringProperty()
url = db.StringProperty()
##request url and returns JSON_data
def overview(page):
u = urllib.urlopen(page)
bytes = StringIO(u.read())
##print bytes
u.close()
try:
JSON_data = json.load(bytes)
return JSON_data
except ValueError,e:
print e," Couldn't get .json for %s" % page
return None
##specific code to parse particular JSON data and append new SR objects to the given url list
def parse_json(JSON_data,lists):
sr = SR()
sr.name = ##data gathered
sr.title = ##data gathered
sr.url = ##data gathered
lists.append(sr)
return lists
## I want to be able to request lets say 500 pages without timeing out
page = 'someurlpage.com'##starting url
url_list = []
for z in range(0,500):
page = 'someurlpage.com/%s'%z
JSON_data = overview(page)##get json data for a given url page
url_list = parse_json(JSON_data,url_list)##parse the json data and append class objects to a given list
db.put(url_list)##finally add object to gae database
Yes, the App Engine imposes a 30 seconds deadline. One way around it might be a try/except DeadlineExceededError and putting the rest in a taskqueue.
But you can't make your requests run for a longer period.
You can also try Bulkupdate
Example:
class Todo(db.Model):
page = db.StringProperty()
class BulkPageParser(bulkupdate.BulkUpdater):
def get_query(self):
return Todo.all()
def handle_entity(self, entity):
JSON_data = overview(entity.page)
db.put(parse_json(JSON_data, [])
entity.delete()
# Put this in your view code:
for i in range(500):
Todo(page='someurlpage.com/%s' % i).put()
job = BulkPageParser()
job.start()
ok so if I am dynamically adding links as I am parsing the pages, I would add to the todo queue like so I believe.
def handle_entity(self, entity):
JSON_data = overview(entity.page)
data_gathered,new_links = parse_json(JSON_data, [])##like earlier returns the a list of sr objects, and now a list of new links/pages to go to
db.put(data_gathered)
for link in new_links:
Todo(page=link).put()
entity.delete()

Resources