I have trained a model on AWS SageMaker by using the built-in algorithm Semantic Segmentation. This trained model named as model.tar.gz is stored on S3. So I want to download this file from S3 and then use it to make inference on my local PC without using AWS SageMaker.
Here are the three files:
hyperparams.json: includes the parameters for network architecture, data inputs, and training. Refer to Semantic Segmentation Hyperparameters.
My code:
import mxnet as mx
from mxnet import image
from gluoncv.data.transforms.presets.segmentation import test_transform
import gluoncv
img = image.imread('./bdd100k/validation/14df900d-c5c145cb.jpg')
img = test_transform(img, ctx)
img = img.astype('float32')
model = gluoncv.model_zoo.PSPNet(2)
# load the trained model
AssertionError: Parameter 'head.psp.conv1.0.weight' is missing in file './model/model_best.params', which contains parameters: 'layer3.2.bn3.beta', 'layer3.0.conv3.weight', 'conv1.1.running_var', ..., 'layer2.2.bn3.running_mean', 'layer3.4.bn2.running_mean', 'layer4.2.bn3.beta', 'layer3.4.bn3.beta'. Set allow_missing=True to ignore missing parameters.
The following should work after extracting model_algo-1 from the tar.gz file. This will run on local ctx.
import gluoncv
from gluoncv import model_zoo
from gluoncv.data.transforms.presets.segmentation import test_transform
model = model_zoo.DeepLabV3(nclass=2, backbone='resnet50',
pretrained_base=False, height=800, width=1280, crop_size=240)
img = test_transform(img, ctx)
img = img.astype('float32')
output = model.predict(img)
max_predict = mx.nd.squeeze(mx.nd.argmax(output, 1)).asnumpy()
prob_mask = mx.nd.squeeze(output).asnumpy()
def NormalizeData(data):
return (data - np.min(data)) / (np.max(data) - np.min(data))
target_cls_id = 1
prob_mat = prob_mask[target_cls_id, :, :]
norm_prob = NormalizeData(prob_mat)
plt.hist(norm_prob.flatten(), bins=50)
I am trying to deploy a model using my own custom inference container on sagemaker. I am following the documentation here https://docs.aws.amazon.com/sagemaker/latest/dg/adapt-inference-container.html
I have an entrypoint file:
from sagemaker_inference import model_server
#HANDLER_SERVICE = "/home/model-server/model_handler.py:handle"
HANDLER_SERVICE = "model_handler.py"
I have a model_handler.py file:
from sagemaker_inference.default_handler_service import DefaultHandlerService
from sagemaker_inference.transformer import Transformer
from CustomHandler import CustomHandler
class ModelHandler(DefaultHandlerService):
def __init__(self):
transformer = Transformer(default_inference_handler=CustomHandler())
super(HandlerService, self).__init__(transformer=transformer)
And I have my CustomHandler.py file:
import os
import json
import pandas as pd
from joblib import dump, load
from sagemaker_inference import default_inference_handler, decoder, encoder, errors, utils, content_types
class CustomHandler(default_inference_handler.DefaultInferenceHandler):
def model_fn(self, model_dir: str) -> str:
clf = load(os.path.join(model_dir, "model.joblib"))
return clf
def input_fn(self, request_body: str, content_type: str) -> pd.DataFrame:
if content_type == "application/json":
items = json.loads(request_body)
for item in items:
processed_item1 = process_item1(items["item1"])
processed_item2 = process_item2(items["item2])
all_item1 += [processed_item1]
all_item2 += [processed_item2]
return pd.DataFrame({"item1": all_item1, "comments": all_item2})
def predict_fn(self, input_data, model):
return model.predict(input_data)
Once I deploy the model to an endpoint with these files in the image, I get the following error: ml.mms.wlm.WorkerLifeCycle - ModuleNotFoundError: No module named 'model_handler'.
I am really stuck what to do here. I wish there was an example of how to do this in the above way end to end but I don't think there is. Thanks!
This is because of the path mismatch. The entrypoint is trying to look for "model_handler.py" in WORKDIR directory of the container.
To avoid this, always specify absolute path when working with containers.
Moreover your code looks confusing. Please use this sample code as the reference:
import subprocess
from subprocess import CalledProcessError
import model_handler
from retrying import retry
from sagemaker_inference import model_server
import os
def _retry_if_error(exception):
return isinstance(exception, CalledProcessError or OSError)
#retry(stop_max_delay=1000 * 50, retry_on_exception=_retry_if_error)
def _start_mms():
# by default the number of workers per model is 1, but we can configure it through the
# environment variable below if desired.
print("Starting MMS -> running ", model_handler.__file__)
model_server.start_model_server(handler_service=model_handler.__file__ + ":handle")
def main():
# prevent docker exit
subprocess.call(["tail", "-f", "/dev/null"])
Further, notice this line - model_server.start_model_server(handler_service=model_handler.__file__ + ":handle")
Here we are starting the server, and telling it to call handle() function in model_handler.py to invoke your custom logic for all incoming requests.
Also remember that Sagemaker BYOC requires model_handler.py to implement another function ping()
So your "model_handler.py" should look like this -
custom_handler = CustomHandler()
# define your own health check for the model over here
def ping():
return "healthy"
def handle(request, context): # context is necessary input otherwise Sagemaker will throw exception
if request is None:
response = custom_handler.predict_fn(request)
return [response] # Response must be a list otherwise Sagemaker will throw exception
except Exception as e:
logger.error('Prediction failed for request: {}. \n'
.format(request) + 'Error trace :: {} \n'.format(str(e)))
Currently I am working on my first django project. The DB is modeling the structure of an Abaqus input file. Here is the Code
from django.db import models as m
import django.contrib.postgres as pg
class node(m.Model):
inputfile = m.CharField(max_length = 255)
source_id = m.IntegerField()
source_sim = m.CharField(max_length = 255)
coordinates = pg.fields.ArrayField(m.FloatField(), size = 3)
When I call manage.py makemigrations (Or just python) it gives me the error message:
AttributeError: module 'django.contrib.postgres' has no attribute 'fields'
When I import ArrayField in a testscript, it works:
from django.contrib.postgres.fields import ArrayField
from django.db import models as m
a = ArrayField(m.FloatField(), size=3)
I was able to migrate my classes into a TestDB without the ArrayField.
My Python version 3.7.1, my Django version is 2.1.3
What's my mistake?
edit: Style&formatting. Thanks to suggestions.
Edit: Solved, but cant find how to flag that
Iam trying to upload files to s3 using Boto3 and make that uploaded file public and return it as a url.
class UtilResource(BaseZMPResource):
class Meta(BaseZMPResource.Meta):
queryset = Configuration.objects.none()
resource_name = 'util_resource'
allowed_methods = ['get']
def post_list(self, request, **kwargs):
fileToUpload = request.FILES
# write code to upload to amazone s3
# see: https://boto3.readthedocs.org/en/latest/reference/services/s3.html
self.session = Session(aws_access_key_id=settings.AWS_KEY_ID,
client = self.session.client('s3')
url = "some/test/url"
return self.create_response(request, {
'url': url // return's public url of uploaded file
I searched whole documentation I couldn't find any links which describes how to do this can someone explain or provide any resource where I can find the soultion?
I'm in the same situation.
Not able to find anything in the Boto3 docs beyond generate_presigned_url which is not what I need in my case since I have public readable S3 Objects.
The best I came up with is:
bucket_location = boto3.client('s3').get_bucket_location(Bucket=s3_bucket_name)
object_url = "https://s3-{0}.amazonaws.com/{1}/{2}".format(
You might try posting on the boto3 github issues list for a better solution.
I had the same issue.
Assuming you know the bucket name where you want to store your data, you can then use the following:
import boto3
from boto3.s3.transfer import S3Transfer
credentials = {
'aws_access_key_id': aws_access_key_id,
'aws_secret_access_key': aws_secret_access_key
client = boto3.client('s3', 'us-west-2', **credentials)
transfer = S3Transfer(client)
transfer.upload_file('/tmp/myfile', bucket, key,
extra_args={'ACL': 'public-read'})
file_url = '%s/%s/%s' % (client.meta.endpoint_url, bucket, key)
The best solution I found is still to use the generate_presigned_url, just that the Client.Config.signature_version needs to be set to botocore.UNSIGNED.
The following returns the public link without the signing stuff.
config = Config(signature_version=botocore.UNSIGNED)
config.signature_version = botocore.UNSIGNED
boto3.client('s3', config=config).generate_presigned_url('get_object', ExpiresIn=0, Params={'Bucket': bucket, 'Key': key})
The relevant discussions on the boto3 repository are:
Somebody who wants to build up a direct URL for the public accessible object to avoid using generate_presigned_url for some reason.
Please build URL with urllib.parse.quote_plus considering whitespace and special character issue.
My object key: 2018-11-26 16:34:48.351890+09:00.jpg
please note whitespace and ':'
S3 public link in aws console: https://s3.my_region.amazonaws.com/my_bucket_name/2018-11-26+16%3A34%3A48.351890%2B09%3A00.jpg
Below code was OK for me
import boto3
s3_client = boto3.client
bucket_location = s3_client.get_bucket_location(Bucket='my_bucket_name')
url = "https://s3.{0}.amazonaws.com/{1}/{2}".format(bucket_location['LocationConstraint'], 'my_bucket_name', quote_plus('2018-11-26 16:34:48.351890+09:00.jpg')
Going through the existing answers and their comments, I did the following and works well for special cases of file names like having whitespaces, having special characters (ASCII), corner cases. E.g. file names of the form: "key=value.txt"
import boto3
import botocore
config = botocore.client.Config(signature_version=botocore.UNSIGNED)
object_url = boto3.client('s3', config=config).generate_presigned_url('get_object', ExpiresIn=0, Params={'Bucket': s3_bucket_name, 'Key': key_name})
For Django, if you use Django storages with boto3 the code below does exactly what you want:
I used an f-string for the same
import boto3
#s3_client = boto3.session.Session(profile_name='sssss').client('s3')
s3_bucket_name = 'xxxxx'
s3_website_URL= f"http://{s3_bucket_name}.s3-website.{s3_client.get_bucket_location(Bucket=s3_bucket_name)['LocationConstraint']}.amazonaws.com"
I'd like to be able to load a blob(image into the Python Image Processing Library or into a numpy array for analysis(such as mean, median, standard deviation) without using the serving url.
Here is my image database
the t_image_url contains the serving url for the blob
from google.appengine.ext import db, blobstore
class ImageModel(db.Model):
t_image = blobstore.BlobReferenceProperty(required=True)
t_imageUrl = db.StringProperty(required = True)
here is a segment of what I tried
import numpy as np
import Image
import ImageOps
class ImageAnalysisHandler(BaseHandler):
def get(self, imageModel_id):
if self.user:
i = ImageModel.get_by_id(int(imageModel_id))
OpenedImage = Image.open(i.t_image)
self.render('imageAnalysis.html', imageD = i)
This obviously didn't work since the Image Module(from the Python Imaging Library) doesn't know how to read blobs. I was wondering if anyone knew how to read in a blob into PIL or a numpy array accurately.
Take a look at the BlobReader class. It let you read a file store in blobstore with a file-like interface.
Ok guys I am having tons of problems getting my working dev server to a working production server :). I have a task that will go through and request urls and collect and update data. It takes 30 minutes to run.
I uploaded to production server and going to the url with its corresponding .py script appname.appspot.com/tasks/rrs after 30 seconds I am getting the class google.appengine.runtime.DeadlineExceededError' Is there any way to get around this? Is this a 30 second deadline for a page? This script works fine in development server I go to the url and the associate .py script runs until completion.
import time
import random
import string
import cPickle
from StringIO import StringIO
import json
except ImportError:
import simplejson as json
import urllib
import pprint
import datetime
import sys
sys.path.append("C:\Program Files (x86)\Google\google_appengine")
sys.path.append("C:\Program Files (x86)\Google\google_appengine\lib\yaml\lib")
sys.path.append("C:\Program Files (x86)\Google\google_appengine\lib\webob")
from google.appengine.api import users
from google.appengine.ext import webapp
from google.appengine.ext.webapp.util import run_wsgi_app
from google.appengine.ext import db
class SR(db.Model):
name = db.StringProperty()
title = db.StringProperty()
url = db.StringProperty()
##request url and returns JSON_data
def overview(page):
u = urllib.urlopen(page)
bytes = StringIO(u.read())
##print bytes
JSON_data = json.load(bytes)
return JSON_data
except ValueError,e:
print e," Couldn't get .json for %s" % page
return None
##specific code to parse particular JSON data and append new SR objects to the given url list
def parse_json(JSON_data,lists):
sr = SR()
sr.name = ##data gathered
sr.title = ##data gathered
sr.url = ##data gathered
return lists
## I want to be able to request lets say 500 pages without timeing out
page = 'someurlpage.com'##starting url
url_list = []
for z in range(0,500):
page = 'someurlpage.com/%s'%z
JSON_data = overview(page)##get json data for a given url page
url_list = parse_json(JSON_data,url_list)##parse the json data and append class objects to a given list
db.put(url_list)##finally add object to gae database
Yes, the App Engine imposes a 30 seconds deadline. One way around it might be a try/except DeadlineExceededError and putting the rest in a taskqueue.
But you can't make your requests run for a longer period.
You can also try Bulkupdate
class Todo(db.Model):
page = db.StringProperty()
class BulkPageParser(bulkupdate.BulkUpdater):
def get_query(self):
return Todo.all()
def handle_entity(self, entity):
JSON_data = overview(entity.page)
db.put(parse_json(JSON_data, [])
# Put this in your view code:
for i in range(500):
Todo(page='someurlpage.com/%s' % i).put()
job = BulkPageParser()
ok so if I am dynamically adding links as I am parsing the pages, I would add to the todo queue like so I believe.
def handle_entity(self, entity):
JSON_data = overview(entity.page)
data_gathered,new_links = parse_json(JSON_data, [])##like earlier returns the a list of sr objects, and now a list of new links/pages to go to
for link in new_links: