Database migrations are a popular pattern, particularly with Ruby on Rails. Since migrations specify how to mold old data to fit a new schema, they can be helpful when you have production data that must be converted quickly and reliably.
But migrating models in App Engine is difficult since processing all entities sequentially is difficult, and there is no offline operation to migrate everything effectively in one big transaction.
What are your techniques for modifying a db.Model "schema" and migrating the data to fit the new schema?
Here is what I do.
I have a MigratingModel class, which all of my models inherit from. Here is migrating_model.py:
"""Models which know how to migrate themselves"""
import logging
from google.appengine.ext import db
from google.appengine.api import memcache
class MigrationError(Exception):
"""Error migrating"""
class MigratingModel(db.Model):
"""A model which knows how to migrate itself.
Subclasses must define a class-level migration_version integer attribute.
"""
current_migration_version = db.IntegerProperty(required=True, default=0)
def __init__(self, *args, **kw):
if not kw.get('_from_entity'):
# Assume newly-created entities needn't migrate.
try:
kw.setdefault('current_migration_version',
self.__class__.migration_version)
except AttributeError:
msg = ('migration_version required for %s'
% self.__class__.__name__)
logging.critical(msg)
raise MigrationError, msg
super(MigratingModel, self).__init__(*args, **kw)
#classmethod
def from_entity(cls, *args, **kw):
# From_entity() calls __init__() with _from_entity=True
obj = super(MigratingModel, cls).from_entity(*args, **kw)
return obj.migrate()
def migrate(self):
target_version = self.__class__.migration_version
if self.current_migration_version < target_version:
migrations = range(self.current_migration_version+1, target_version+1)
for self.current_migration_version in migrations:
method_name = 'migrate_%d' % self.current_migration_version
logging.debug('%s migrating to %d: %s'
% (self.__class__.__name__,
self.current_migration_version, method_name))
getattr(self, method_name)()
db.put(self)
return self
MigratingModel intercepts the conversion from the raw datastore entity to the full db.Model instance. If current_migration_version has fallen behind the class's latest migration_version, then it runs a series of migrate_N() methods which do the heavy lifting.
For example:
"""Migrating model example"""
# ...imports...
class User(MigratingModel):
migration_version = 3
name = db.StringProperty() # deprecated: use first_name and last_name
first_name = db.StringProperty()
last_name = db.StringProperty()
age = db.IntegerProperty()
invalid = db.BooleanProperty() # to search for bad users
def migrate_1(self):
"""Convert the unified name to dedicated first/last properties."""
self.first_name, self.last_name = self.name.split()
def migrate_2(self):
"""Ensure the users' names are capitalized."""
self.first_name = self.first_name.capitalize()
self.last_name = self.last_name.capitalize()
def migrate_3(self):
"""Detect invalid accounts"""
if self.age < 0 or self.age > 85:
self.invalid = True
On a busy site, the migrate() method should retry if db.put() fails, and possibly log a critical error if the migration didn't work.
I haven't gotten there yet, but at some point I would probably mix-in my migrations from a separate file.
Final thoughts
It is hard to test on App Engine. It's hard to get access to your production data in a test environment, and at this time it is difficult-to-impossible to make a coherent snapshot backup. Therefore, for major changes, consider making a new version that uses a completely different model name which imports from the old model and migrates as it needs. (For example, User2 instead of User). That way, if you need to fall back to the previous version, you have an effective backup of the data.
Related
Models.py
class tag_table(models.Model):
tag_id = models.IntegerField(unique=True)
book_tag = models.CharField(max_length=50,primary_key=True)
def __unicode__(self):
return self.book_tag
class Book(models.Model):
book_id = models.IntegerField(primary_key = True)
book_author = models.CharField(max_length=50,default='')
book_tag = models.ForeignKey(tag_table ,on_delete=models.CASCADE,default='')
def __unicode__(self):
return (self.book_author,self.book_tag)
I am using query in interactive shell(django)
Book.objects.all()[0].book_tag
Traceback (most recent call last):
File "", line 1, in
File "C:\Python27\lib\site-packages\django-1.10.3-py2.7.egg\django\db\models\fields\related_descriptors.py", line 1
in get
rel_obj = qs.get()
File "C:\Python27\lib\site-packages\django-1.10.3-py2.7.egg\django\db\models\query.py", line 385, in get
self.model._meta.object_name
DoesNotExist: tag_table matching query does not exist.
How can i access the foreign key "book_tag" by using the object list of model "Book"
your database schema is incorrect because you are using MVC in Django so you don't need to relate tables in database manually because its outdated and its a little hard too.
here is your hypothetical models i hope it can help
class Book(models.Model):
author = models.ForeignKey(Author)
tags = models.ForeignKey(Book_Tags)
def __unicode__(self): ###__str__() in python3
return self.author.name
Author and Book_Tags are two other models that you should define before
no need for id. Django will automatically declares it
you can use Django web Site while writing your codes. its very helpful
for example for writing your models get help from here
I am trying to implement an ndb model audit so that all changes to properties are stored within each model instance. Here is the code of the _pre_put_hook I chose to implement that.
def _pre_put_hook(self):
# save a history record for updates
if not (self.key is None or self.key.id() is None):
old_object = self.key.get(use_cache=True)
for attr in dir(self):
if not callable(getattr(self, attr)) and not attr.startswith("_"):
if getattr(self, attr) != getattr(old_object, attr):
logging.debug('UPDATE: {0}'.format(attr))
logging.debug('OLD: {0} NEW: {1}'.format(getattr(old_object, attr), getattr(self, attr)))
The problem is old_object is always populated with the same values of the self (object) being updated. How can I access the property values of the old object BEFORE the put() being actually made (_pre_put)?
EDIT:
I realized over time I was doing a bunch of work that didn't need to be done (alot of CPU/memory used copying entire entities and passing them around when may not be needed). Here's the updated version which stores a reference to the original protobuf and only deserializes it if you need it
__original = None # a shadow-copy of this object so we can see what changed... lazily inflated
_original_pb = None # the original encoded Protobuf representation of this entity
#property
def _original(self):
"""
Singleton to deserialize the protobuf into a new entity that looks like the original from database
"""
if not self.__original and self._original_pb:
self.__original = self.__class__._from_pb(self._original_pb)
return self.__original
#classmethod
def _from_pb(cls, pb, set_key=True, ent=None, key=None):
"""
save copy of original pb so we can track if anything changes between puts
"""
entity = super(ChangesetMixin, cls)._from_pb(pb, set_key=set_key, ent=ent, key=key)
if entity._original_pb is None and not entity._projection:
# _from_pb will get called if we unpickle a new object (like when passing through deferred library)
# so if we are being materialized from pb and we don't have a key, then we don't have _original
entity.__original = None
entity._original_pb = pb
return entity
Make a clone of the entity when you first read it:
Copy an entity in Google App Engine datastore in Python without knowing property names at 'compile' time
and put it on the entity itself so it can be referenced later when desired. That way you don't have to do a second datastore read just to make the comparison
We override two different Model methods to make this happen:
#classmethod
def _post_get_hook(cls, key, future):
"""
clone this entity so we can track if anything changes between puts
NOTE: this only gets called after a ndb.Key.get() ... NOT when loaded from a Query
see _from_pb override below to understand the full picture
also note: this gets called after EVERY key.get()... regardless if NDB had cached it already
so that's why we're only doing the clone() if _original is not set...
"""
entity = future.get_result()
if entity is not None and entity._original is None:
entity._original = clone(entity)
#classmethod
def _from_pb(cls, pb, set_key=True, ent=None, key=None):
"""
clone this entity so we can track if anything changes between puts
this is one way to know when an object loads from a datastore QUERY
_post_get_hook only gets called on direct Key.get()
none of the documented hooks are called after query results
SEE: https://code.google.com/p/appengine-ndb-experiment/issues/detail?id=211
"""
entity = super(BaseModel, cls)._from_pb(pb, set_key=set_key, ent=ent, key=key)
if entity.key and entity._original is None:
# _from_pb will get called if we unpickle a new object (like when passing through deferred library)
# so if we are being materialized from pb and we don't have a key, then we don't have _original
entity._original = clone(entity)
return entity
Is it possible to force strong consistency of the GAE datastore?
I have this code:
#!/usr/bin/env python
Import OS, says
import wsgiref.handlers
import webapp2
from google.appengine.ext import db
from google.appengine.ext.webapp.util import run_wsgi_app
from google.appengine.ext.webapp import template
class guestbook(db.Model):
message = db.StringProperty(required=True)
when = db.DateTimeProperty(auto_now_add=True)
who = db.StringProperty()
class ShowGuestbookPage(webapp2.RequestHandler):
def get(self):
# Read from the Datastore
shouts = db.GqlQuery('SELECT * FROM guestbook ORDER BY when DESC')
values = {'shouts': shouts}
self.response.out.write(template.render('main.html', values))
class MakeGuestbookEntry(webapp2.RequestHandler):
def post(self):
shout = guestbook(message=self.request.get('message'), who=self.request.get('who'))
# Write into the datastore
shout.put()
self.redirect('/')
app = webapp2.WSGIApplication([('/', ShowGuestbookPage),
('/make_entry', MakeGuestbookEntry),
debug=True)
def main():
run_wsgi_app(app)
if __name__ == "__main__":
main()
What is the most easy way to strong consistency?
I do not understand how to translate the explanations [1] from Google into the source code.
[1] https://developers.google.com/appengine/docs/java/datastore/structuring_for_strong_consistency
Thanks for any help.
Yes you can by using entity groups. Read about its advantages and dissadvantages in the docs.
you can also define your own entity ids and get them "by id" which is always guaranteed consistent.
Get, put and delete operations are always strongly consistent. Ancestor queries are also strongly consistent.
The rest of the queries are NOT strongly consistent and there is not special config parameter to make them strongly consistent.
To enforce strong consistency two conditions have to apply:
The db.Model Entity has to have a ancestor relationship
AND your query has to filter by ancestor
Then all children will be queried using strong consistency.
A good explanation can be foud here
I am trying to create a small django project of a football application to show the stats & stuff.. to start with I created two classes in my models.py.. with a many to many relation.. but for some reason it's throwing a strange Database Error: no such table: football_league_team
please any help is appreciated, thanks in advance.
from django.db import models
# Create your models here.
class Team(models.Model):
team_name = models.CharField(max_length=30, unique=True)
team_code = models.CharField(max_length=4, unique=True)
team_home = models.CharField(max_length=30, unique=True)
team_registry_date = models.DateTimeField('Date of Registry')
def __unicode__(self):
return self.team_name
class League(models.Model):
league_name = models.CharField(max_length=30)
league_code = models.CharField(max_length=4)
league_division = models.IntegerField()
team = models.ManyToManyField(Team)
def __unicode__(self):
return self.league_name
You removed the field football_league_team in one of your model. Django doesn't know it and is still expecting said field. Depending your Django version, there are several ways to reset the corresponding model.
Django 1.4 and lower
> ./manage.py reset <appname>
I believe it works for earlier versions of Django, not sure though. Keep in mind that this option will reset each models of your application as opposed to the below method which allow single table drops.
Django 1.5 and higher
> ./manage.py sqlclear <appname>
will print out the commands to clear the database from the application's models.
> ./manage.py dbshell
Will allow you to use the sqlclear commands in order to drop the tables yopu want to be reseted.
I am using django for a website where I have a database with users, people, locations, items and so on. Know i find that I need some extra information that requires one-to-many relations like Aliases for most of these tables.
Should I (1) create a common alias table for all of these by using the content type framework (will probably end up with billions of rows), or should I (2) create a alias table for each of these. If the latter one, how do I auto-create one-to-many table like this by just adding a single line like this
"alias = Ailias()"
in each model. I`m sure I saw an app doing something like that way a while ago, I think is was a reversion app of some kind. Even if the second method is not suited i would love tho understand how to do it. I do not know what to search after to find an explanation of this.
I plan to add Haystack with Solr to this, so method 2 might add much extra work there. But I do not have much experience with it jet, so I might be wrong.
PS: ended up wih method one.
Manage to do what I wanted in method 2, easily generate one-to-many fields. Not sure if this is the easiest way, or the best way. If someone has a better way of doing it, I would love to learn it. I am a long way from a django expert, so I might have meddled with some unnecessary complex stuff to do what I wanted.
This example creates an easy way of adding a one-to-many alias relationship.
Alias Managers
class AliasManagerDescriptor(object):
def __init__(self, model,fkName):
self.model = model
self.fkName = fkName
def __get__(self, instance, owner):
if instance is None:
return AliasManager(self.model,self.fkName)
return AliasManager(self.model, self.fkName, instance)
class AliasManager(models.Manager):
def __init__(self, model,fkName, instance=None):
super(AliasManager, self).__init__()
self.model = model
self.instance = instance
#Name of FK linking this model to linked model
self.fkName=fkName
def get_query_set(self):
"""
Get query set, or only get instances from this model that is linked
to the chosen instance from the linked model if one is chosen
"""
if self.instance is None:
return super(AliasManager, self).get_query_set()
if isinstance(self.instance._meta.pk, models.OneToOneField):
#TODO: Checkif this part works, not checked
filter = {self.instance._meta.pk.name+"_id":self.instance.pk}
else:
filter = {self.fkName: self.instance.pk}
return super(AliasManager, self).get_query_set().filter(**filter)
def create(self,**kwargs):
"""
Create alias instances. If FK is not given then it is automatically set
to the chosen instance from the linked model
"""
if self.fkName not in kwargs:
kwargs[self.fkName]=self.instance
print kwargs
super(AliasManager, self).create(**kwargs)
Alias Models
class Alias(object):
def contribute_to_class(self, cls, name):
self.manager_name = name
aliasModel = self.create_alias_model(cls)
descriptor = AliasManagerDescriptor(aliasModel,cls._meta.object_name.lower())
setattr(cls, self.manager_name, descriptor)
def create_alias_model(self, model):
"""
Creates a alias model to associate with the model provided.
"""
attrs = {
#'id': models.AutoField(primary_key=True),
"name": models.CharField(max_length=255),
#Not sure which to use of the two next methods
model._meta.object_name.lower(): models.ForeignKey(model),
#model._meta.object_name.lower(): AliasObjectDescriptor(model),
'__unicode__': lambda self: u'%s' % self.name,
'__module__': model.__module__
}
attrs.update(Meta=type('Meta', (), self.get_meta_options(model)))
name = '%s_alias' % model._meta.object_name
return type(name, (models.Model,), attrs)
def get_meta_options(self, model):
"""
Returns a dictionary of fields that will be added to
the Meta inner class.
"""
return {
}
"""class AliasObjectDescriptor(object):
def __init__(self, model):
self.model = model
def __get__(self, instance, owner):
values = (getattr(instance, f.attname) for f in self.model._meta.fields)
return self.model(*values)"""
Person Model - Only need to add "alias = Alias()" to a model to add a one-to-many alias field.
class Person(models.Model):
name = models.CharField(max_length=30,blank=True,null=True)
age = models.IntegerField(blank=True,null=True)
alias = Alias()
Now you I can do something like this:
per = Person(name="Per",age=99)
per.save()
per.alias.create(name="Mr.P")
per_alias = per.alias.all().values_list("name",flat=True)