bulk update/delete entities of different kind in db.run_in_transaction - google-app-engine

Here goes pseudo code of bulk update/delete entities of different kind in single transaction. Note that Album and Song entities have AlbumGroup as root entity. (i.e. has same parent entity)
class Album:
pass
class Song:
album = db.ReferenceProperty(reference_class=Album,collection_name="songs")
def bulk_update_album_group(album):
updated = [album]
deleted = []
for song in album.songs:
if song.is_updated:
updated.append(song)
if song.is_deleted:
deleted.append(song)
db.put(updated)
db.delete(deleted)
a = Album.all().filter("...").get()
# bulk update/delete album.
db.run_in_transaction(bulk_update_album,a)
But I met a famous "Only Ancestor Queries in Transactions" error at the iterating back-reference properties like "album.songs". I guess ancestor() filter does not help because those entities are modified in memory.
So I modify example like this: prepare all updated/deleted entities before calling transaction.
def bulk_update_album2(album):
updated = [album]
deleted = []
for song in album.songs:
if song.is_updated:
updated.append(song)
if song.is_deleted:
deleted.append(song)
def txn(updated,deleted):
db.put(updated)
db.delete(deleted)
db.run_in_transaction(txn,updated,deleted)
Now I found that iterating back-reference property force reload existing entities. So re-iterating back-reference property after modifying should be avoided!!
All I want to verify is:
When need to bulk update/delete many entities of different kind,
is there any good coding pattern for this situation?
my last code can be good one?
Here goes full code example:
from google.appengine.ext import webapp
from google.appengine.ext.webapp import util
import logging
from google.appengine.ext import db
class Album(db.Model):
name = db.StringProperty()
def __repr__(self):
return "%s%s"%(self.name,[song for song in self.songs])
class Song(db.Model):
album = db.ReferenceProperty(reference_class=Album,collection_name='songs')
name = db.StringProperty()
playcount = db.IntegerProperty(default=0)
def __repr__(self):
return "%s(%d)"%(self.name,self.playcount)
def create_album(name):
album = Album(name=name)
album.put()
for i in range(0,5):
song = Song(parent=album, album=album, name='song#%d'%i)
song.put()
return album
def play_all_songs(album):
logging.info(album)
# play all songs
for song in album.songs:
song.playcount += 1
logging.info(song)
# play count also 0 here
logging.info(album)
def save_play_count(album):
updated = []
for song in album.songs:
updated.append(song)
db.put(updated)
db.run_in_transaction(save_play_count,album)
def play_all_songs2(album):
logging.info("loading : %s"%album)
# play all songs
updated = []
for song in album.songs:
song.playcount += 1
updated.append(song)
logging.info("updated: %s"%updated)
db.put(updated)
logging.info("after save: %s"%album)
def play_all_songs3(album):
logging.info("loading : %s"%album)
# play all songs
updated = []
for song in album.songs:
song.playcount += 1
updated.append(song)
# reload
for song in album.songs:
pass
logging.info("updated: %s"%updated)
def bulk_save_play_count(updated):
db.put(updated)
db.run_in_transaction(bulk_save_play_count,updated)
logging.info("after save: %s"%album)
class MainHandler(webapp.RequestHandler):
def get(self):
self.response.out.write('Hello world!')
album = Album.all().filter('name =','test').get()
if not album:
album = db.run_in_transaction(create_album,'test')
# BadRequestError: Only ancestor queries are allowed inside transactions.
#play_all_songs(album)
# ok
#play_all_songs2(album)
play_all_songs3(album)
def main():
application = webapp.WSGIApplication([('/', MainHandler)],
debug=True)
util.run_wsgi_app(application)
if __name__ == '__main__':
main()

Please note the the ReferenceProperty is not enough to put the entities in the same group. When you create a Song model you should pass a parent argument with the model's parent (e.g., the Album).
It looks like this:
album = Album.all().filter("...").get()
new_song = Song(name='Seven Nation Army', parent=album)
new_song.save()
See the documentation about ancestors.

Related

How to create an object attribute without writing to database -- peewee -- python

Maybe i have an understanding problem. I try to make 2 tabeles in one database. But additionaly i need to have some temporary values in one class that i doen´t want to write to the database.
I try to switch to peewee and read the dokumentation but i find no solution at my own.
without peewee i would make an init method where i write my attributes. But where did i have to write them now?
from peewee import *
import datetime
db = SqliteDatabase('test.db', pragmas={'foreign_keys': 1})
class BaseModel(Model):
class Meta:
database = db
class Sensor(BaseModel):
id = IntegerField(primary_key=True)
sort = IntegerField()
name = TextField()
#def __init__(self):
#self.sometemporaryvariable = "blabla"
def meineparameter(self, hui):
self.hui = hui
print(self.hui)
class Sensor_measure(BaseModel):
id = ForeignKeyField(Sensor, backref="sensorvalues")
timestamp = DateTimeField(default=datetime.datetime.now)
value = FloatField()
class Meta:
primary_key = CompositeKey("id", "timestamp")
db.connect()
db.create_tables([Sensor_measure, Sensor])
sensor1 = Sensor.create(id=2, sort=20, name="Sensor2")
#sensor1.sometemporaryvariable = "not so important to write to the database"
sensor1.save()
Remember to call super() whenever overriding a method in a subclass:
class Sensor(BaseModel):
id = IntegerField(primary_key=True)
sort = IntegerField()
name = TextField()
def __init__(self, **kwargs):
self.sometemporaryvariable = "blabla"
super().__init__(**kwargs)

django how to save a big model with for loop in views.py?

I have a very big model in models.py:
simplified version is:
class MyModel(models.Model):
item_1 = models.FloatField(null=True, blank=True)
...
item_20 = models.FloatField(null=True, blank=True)
in views.py:
def form_valid(self, form_class):
instance = form_class.save(commit=False)
for i in range(1, 20):
name = 'item_' + str(i)
instance.name = i
With this the field name 'item_1' ... to 'item_20' in instance is not recogniced. Instead 'name' is added to instance like other new field...
How can I iterate and save my model?
Any suggestion?
Thanks!!!
You should probably use setattr in order to loop through the fields and set the values in them. Try this:
def form_valid(self, form_class):
instance = form_class.save(commit=False)
for i in range(1, 20):
name = 'item_' + str(i)
setattr(instance, name, value) # Where value is the data you wanted to save in the field `name`
Similary user getattr() to get the data by looping through the class instance.

Overriding validation for Django for base64 string for model.imagefield

I am using Angular and Bootstrap to serve my forms. If a user uploads an image, Angular serves it in the "data:" format, but Django is looking for a file type. I have fixed this issue by overriding both perform_authentication (To modify the image to a file) and perform_create (to inject my user_id). Is there a better way to override?
I'd rather not override my view. I'd rather override the way Django validates ImageFields. What I want to do is check if the passed value is a 64-bit string, if it is, modify it to a file type, then validate the ImageField. The below code works as is, I just don't feel is optimal.
Here is my view:
class UserCredentialList(generics.ListCreateAPIView):
permission_classes = (IsCredentialOwnerOrAdmin,)
serializer_class = CredentialSerializer
"""
This view should return a list of all the purchases
for the currently authenticated user.
"""
def get_queryset(self):
"""
This view should return a list of all models by
the maker passed in the URL
"""
user = self.request.user
return Credential.objects.filter(member=user)
def perform_create(self, serializer):
serializer.save(member_id=self.request.user.id)
def perform_authentication(self, request):
if request.method == 'POST':
data = request.data.pop('document_image', None)
from django.core.files.base import ContentFile
import base64
import six
import uuid
# Check if this is a base64 string
if isinstance(data, six.string_types):
# Check if the base64 string is in the "data:" format
if 'data:' in data and ';base64,' in data:
# Break out the header from the base64 content
header, data = data.split(';base64,')
# Try to decode the file. Return validation error if it fails.
try:
decoded_file = base64.b64decode(data)
except TypeError:
self.fail('invalid_image')
# Generate file name:
file_name = str(uuid.uuid4())[:12] # 12 characters are more than enough.
# Get the file name extension:
import imghdr
file_extension = imghdr.what(file_name, decoded_file)
file_extension = "jpg" if file_extension == "jpeg" else file_extension
complete_file_name = "%s.%s" % (file_name, file_extension,)
data = ContentFile(decoded_file, name=complete_file_name)
request.data['document_image'] = data
request.user
And here is my serializer:
class CredentialSerializer(serializers.ModelSerializer):
class Meta:
model = Credential
fields = (
'id',
'credential_type',
'credential_number',
'date_received',
'is_verified',
'date_verified',
'document_image',
)
And here is my model:
class Credential(models.Model):
"""Used to store various credentials for member validation."""
document_image = models.ImageField(
upload_to=get_upload_path(instance="instance",
filename="filename.ext",
path='images/credentials/'))
PASSENGER = 'P'
OWNER = 'O'
CAPTAIN = 'C'
CREDENTIAL_CHOICES = (
(PASSENGER, 'Passenger'),
(OWNER, 'Owner'),
(CAPTAIN, 'Captain'),
)
credential_type = models.CharField(max_length=1,
choices=CREDENTIAL_CHOICES,
default=PASSENGER)
credential_number = models.CharField(max_length=255)
date_received = models.DateTimeField(auto_now_add=True)
is_verified = models.BooleanField(default=False)
date_verified = models.DateTimeField(blank=True, null=True)
member = models.ForeignKey(settings.AUTH_USER_MODEL,
related_name='credentials')
I used the below link to help me, now I just want to figure out how override the proper method
Django REST Framework upload image: "The submitted data was not a file"
Well I've made one change since making: I have moved this function to my serializer and instead I now override the method: is_valid and that works as well. At least it's not in my view anymore.

effective counting of objects

I have 2 models:
Category(models.Model):
name = models.CharField(max_length=30)
no_of_posts = models.IntegerField(default=0) # a denormalised field to store post count
Post(models.Model):
category = models.ForeignKey(Category)
title = models.CharField(max_length=100)
desc = models.TextField()
user = models.ForeignKey(User)
pub_date = models.DateTimeField(null=True, blank=True)
first_save = models.BooleanField()
Since I always want to show the no. of posts alongwith each category, I always count & store them every time a user creates or deletes a post this way:
## inside Post model ##
def save(self):
if not pub_date and first_save:
pub_date = datetime.datetime.now()
# counting & saving category posts when a post is 1st published
category = self.category
super(Post, self).save()
category.no_of_posts = Post.objects.filter(category=category).count()
category.save()
def delete(self):
category = self.category
super(Post, self).delete()
category.no_of_posts = Post.objects.filter(category=category).count()
category.save()
........
My question is whether, instead of counting every object, can we not use something like:
category.no_of_posts += 1 // in save() # and
category.no_of_posts -= 1 // in delete()
Or is there a better solution!
Oh, I missed that! I updated the post model to include the relationship!
Yes, a much better solution:
from django.db.models import Count
class CategoryManager(models.Manager):
def get_query_set(self, *args, **kwargs):
qs = super(CategoryManager, self).get_query_set(*args, **kwargs)
return qs.annotate(no_of_posts=Count('post'))
class Category(models.Model):
...
objects = CategoryManager()
Since you didn't show the relationship between Post and Category, I guessed on the Count('posts') part. You might have to fiddle with that.
Oh, and you'll want to get rid of the no_of_posts field from the model. It's not necessary with this. Or, you can just change the name of the annotation.
You'll still be able to get the post count with category.no_of_posts but you're making the database do the legwork for you.

overwrite existing entity via bulkloader.Loader

I was going to CSV based export/import for large data with app engine. My idea was just simple.
First column of CSV would be key of entity.
If it's not empty, that row means existing entity and should overwrite old one.
Else, that row is new entity and should create new one.
I could export key of entity by adding key property.
class FrontExporter(bulkloader.Exporter):
def __init__(self):
bulkloader.Exporter.__init__(self, 'Front', [
('__key__', str, None),
('name', str, None),
])
But when I was trying to upload CSV, it had failed because bulkloader.Loader.generate_key() was just for "key_name" not "key" itself. That means all exported entities in CSV should have unique 'key_name' if I want to modify-and-reupload them.
class FrontLoader(bulkloader.Loader):
def __init__(self):
bulkloader.Loader.__init__(self, 'Front', [
('_UNUSED', lambda x: None),
('name', lambda x: x.decode('utf-8')),
])
def generate_key(self,i,values):
# first column is key
keystr = values[0]
if len(keystr)==0:
return None
return keystr
I also tried to load key directly without using generate_key(), but both failed.
class FrontLoader(bulkloader.Loader):
def __init__(self):
bulkloader.Loader.__init__(self, 'Front', [
('Key', db.Key), # not working. just create new one.
('__key__', db.Key), # same...
So, how can I overwrite existing entity which has no 'key_name'? It would be horrible if I should give unique name to all entities.....
From the first answer, I could handle this problem. :)
def create_entity(self, values, key_name=None, parent=None):
# if key_name is None:
# print 'key_name is None'
# else:
# print 'key_name=<',key_name,'> : length=',len(key_name)
Validate(values, (list, tuple))
assert len(values) == len(self._Loader__properties), (
'Expected %d columns, found %d.' %
(len(self._Loader__properties), len(values)))
model_class = GetImplementationClass(self.kind)
properties = {
'key_name': key_name,
'parent': parent,
}
for (name, converter), val in zip(self._Loader__properties, values):
if converter is bool and val.lower() in ('0', 'false', 'no'):
val = False
properties[name] = converter(val)
if key_name is None:
entity = model_class(**properties)
#print 'create new one'
else:
entity = model_class.get(key_name)
for key, value in properties.items():
setattr(entity, key, value)
#print 'overwrite old one'
entities = self.handle_entity(entity)
if entities:
if not isinstance(entities, (list, tuple)):
entities = [entities]
for entity in entities:
if not isinstance(entity, db.Model):
raise TypeError('Expected a db.Model, received %s (a %s).' %
(entity, entity.__class__))
return entities
def generate_key(self,i,values):
# first column is key
if values[0] is None or values[0] in ('',' ','-','.'):
return None
return values[0]
Your best option is probably to override create_entity. You'll need to copy most of the existing code there, but modify the constructor to supply a key argument instead of a key_name argument.

Resources