Query repeated property starts with - google-app-engine

Say I have a movie database where you can search by title.
I have a Movie model that looks like the following (simplified)
class Movie(ndb.Model):
name = ndb.StringProperty(required=True)
queryName = ndb.ComputedProperty(lambda self: [w.lower() for w in self.name.split()], repeated=True)
#staticmethod
def parent_key():
return ndb.Key(Movie, 'parent')
The queryName is just a lower case list of the words in Movie.name. The parent_key() is just for the query basically
If I was searching for the movie Forest Gump, I would want it to show up for the following search terms (and more, these are just examples)
'fo' - 'forest' starts with 'fo'
'gu' - 'gump' starts with 'gu'
'gu fo' - 'forest' starts with 'fo' and 'gump' starts with 'gu'
I can get the first two easily with a query similar to the following
movies = Movie\
.query(ancestor=Movie.parent_key())\
.filter(Movie.queryName >= x)\
.filter(Movie.queryName < x + u'\ufffd')\
.feth(10)
where x is 'fo' or 'gu'. Again, this is simply a query that works not my actual code. That comes later. If I expand a bit on the above query to look for two words I thought I could do something like the following however, it doesn't work.
movies = Movie\
.query(ancestor=Movie.parent_key())\
.filter(Movie.queryName >= 'fo')\
.filter(Movie.queryName < 'fo' + u'\ufffd')\
.filter(Movie.queryName >= 'gu')\
.filter(Movie.queryName < 'gu' + u'\ufffd')\
.feth(10)
Now , this doesn't work because it is looking in queryName to see if it has any item which starts with 'fo' and starts with 'gu'. Since that could never be true for a single item in the list, the query returns nothing.
The question is how do you query for Movies which have a queryName with an item that starts with 'fo' AND an item that starts with 'gu'?
Actual Code:
class MovieSearchHandler(BaseHandler):
def get(self):
q = self.request.get('q')
if q:
q = q.replace('&', '&').lower()
filters = self.create_filter(*q.split())
if filters:
movies = Movie\
.query(ancestor=Movie.parent_key())\
.filter(*filters)\
.fetch(10)
return self.write_json([{'id': m.movieId, 'name': m.name} for m in movies])
return self.write_json([])
def create_filter(self, *args):
filters = []
if args:
for prefix in args:
filters.append(Movie.queryName >= prefix)
filters.append(Movie.queryName < prefix + u'\ufffd')
return filters
Update:
My current solution is
class MovieSearchHandler(BaseHandler):
def get(self):
q = self.request.get('q')
if q:
q = q.replace('&', '&').lower().split()
movieFilter, reducable = self.create_filter(*q)
if movieFilter:
movies = Movie\
.query(ancestor=Movie.parent_key())\
.filter(movieFilter)\
.fetch(None if reducable else 10)
if reducable:
movies = self.reduce(movies, q)
return self.write_json([{'id': m.movieId, 'name': m.name} for m in movies])
return self.write_json([])
def create_filter(self, *args):
if args:
if len(args) == 1:
prefix = args[0]
return ndb.AND(Movie.queryName >= prefix, Movie.queryName < prefix + u'\ufffd'), False
ands = [ndb.AND(Movie.queryName >= prefix, Movie.queryName < prefix + u'\ufffd')
for prefix in args]
return ndb.OR(*ands), True
return None, False
def reduce(self, movies, terms):
reducedMovies = []
for m in movies:
if len(reducedMovies) >= 10:
return reducedMovies
if all(any(n.startswith(t) for n in m.queryName) for t in terms):
reducedMovies.append(m)
return reducedMovies
Still looking for something better though
Thanks

Related

Binary search sort, Time out

Climbing the Leaderboard. Terminated due to timeout :(
A HackerRank challenge of algorithm category.
My approach: To reduce the overhead when large arrays as input, I used map() and a Binar search function with it. No luck. I got 7/11 test cases passed. Can I ask for some help to improve my code. The code below is my solution so far. I need help to improve it.
Problem description
class Score_board:
def climbingLeaderboard(self, scores, alice):
self.boardScores = sorted(set(scores), reverse=True)
alice_rank = list(map(self.rankBoard,alice))
return alice_rank
def rankBoard(self, current):
score_bank = self.boardScores
midIndex = len(score_bank)//2
while midIndex > 0:
if current in score_bank:
return(self.boardScores.index(current)+1)
elif current < score_bank[midIndex]:
score_bank = score_bank[midIndex:]
midIndex = len(score_bank)//2
elif current > score_bank[midIndex]:
score_bank = score_bank[:midIndex]
midIndex = len(score_bank)//2
else:
boardIndex = self.boardScores.index(score_bank[0])
if current in score_bank:
return(self.boardScores.index(current)+1)
elif current > score_bank[0] and boardIndex == 0:
return(1)
elif current < score_bank[0] and boardIndex == (len(self.boardScores)-1):
return(boardIndex + 2)
elif current > score_bank[0]:
return(boardIndex)
elif current < score_bank[0]:
return(boardIndex + 2)
if __name__ == '__main__':
scores_count = int(input())
scores = list(map(int, input().rstrip().split()))
alice_count = int(input())
alice = list(map(int, input().rstrip().split()))
coord = Score_board()
result = coord.climbingLeaderboard(scores, alice)
print('\n'.join(map(str, result)))
print('\n')

OpenMDAO v0.13: performing an optimization when using multiple instances of a components initiated in a loop

I am setting up an optimization in OpenMDAO v0.13 using several components that are used many times. My assembly seems to be working just fine with the default driver, but when I run with an optimizer it does not solve. The optimizer simply runs with the inputs given and returns the answer using those inputs. I am not sure what the issue is, but I would appreciate any insights. I have included a simple code mimicking my structure that reproduces the error. I think the problem is in the connections, summer.fs does not update after initialization.
from openmdao.main.api import Assembly, Component
from openmdao.lib.datatypes.api import Float, Array, List
from openmdao.lib.drivers.api import DOEdriver, SLSQPdriver, COBYLAdriver, CaseIteratorDriver
from pyopt_driver.pyopt_driver import pyOptDriver
import numpy as np
class component1(Component):
x = Float(iotype='in')
y = Float(iotype='in')
term1 = Float(iotype='out')
a = Float(iotype='in', default_value=1)
def execute(self):
x = self.x
a = self.a
term1 = a*x**2
self.term1 = term1
print "In comp1", self.name, self.a, self.x, self.term1
def list_deriv_vars(self):
return ('x',), ('term1',)
def provideJ(self):
x = self.x
a = self.a
dterm1_dx = 2.*a*x
J = np.array([[dterm1_dx]])
print 'In comp1, J = %s' % J
return J
class component2(Component):
x = Float(iotype='in')
y = Float(iotype='in')
term1 = Float(iotype='in')
f = Float(iotype='out')
def execute(self):
y = self.y
x = self.x
term1 = self.term1
f = term1 + x + y**2
self.f = f
print "In comp2", self.name, self.x, self.y, self.term1, self.f
class summer(Component):
total = Float(iotype='out', desc='sum of all f values')
def __init__(self, size):
super(summer, self).__init__()
self.size = size
self.add('fs', Array(np.ones(size), iotype='in', desc='f values from all cases'))
def execute(self):
self.total = sum(self.fs)
print 'In summer, fs = %s and total = %s' % (self.fs, self.total)
class assembly(Assembly):
x = Float(iotype='in')
y = Float(iotype='in')
total = Float(iotype='out')
def __init__(self, size):
super(assembly, self).__init__()
self.size = size
self.add('a_vals', Array(np.zeros(size), iotype='in', dtype='float'))
self.add('fs', Array(np.zeros(size), iotype='out', dtype='float'))
print 'in init a_vals = %s' % self.a_vals
def configure(self):
# self.add('driver', SLSQPdriver())
self.add('driver', pyOptDriver())
self.driver.optimizer = 'SNOPT'
# self.driver.pyopt_diff = True
#create this first, so we can connect to it
self.add('summer', summer(size=len(self.a_vals)))
self.connect('summer.total', 'total')
print 'in configure a_vals = %s' % self.a_vals
# create instances of components
for i in range(0, self.size):
c1 = self.add('comp1_%d'%i, component1())
c1.missing_deriv_policy = 'assume_zero'
c2 = self.add('comp2_%d'%i, component2())
self.connect('a_vals[%d]' % i, 'comp1_%d.a' % i)
self.connect('x', ['comp1_%d.x'%i, 'comp2_%d.x'%i])
self.connect('y', ['comp1_%d.y'%i, 'comp2_%d.y'%i])
self.connect('comp1_%d.term1'%i, 'comp2_%d.term1'%i)
self.connect('comp2_%d.f'%i, 'summer.fs[%d]'%i)
self.driver.workflow.add(['comp1_%d'%i, 'comp2_%d'%i])
self.connect('summer.fs[:]', 'fs[:]')
self.driver.workflow.add(['summer'])
# set up main driver (optimizer)
self.driver.iprint = 1
self.driver.maxiter = 100
self.driver.accuracy = 1.0e-6
self.driver.add_parameter('x', low=-5., high=5.)
self.driver.add_parameter('y', low=-5., high=5.)
self.driver.add_objective('summer.total')
if __name__ == "__main__":
""" the result should be -1 at (x, y) = (-0.5, 0) """
import time
from openmdao.main.api import set_as_top
a_vals = np.array([1., 1., 1., 1.])
test = set_as_top(assembly(size=len(a_vals)))
test.a_vals = a_vals
print test.a_vals
test.x = 2.
test.y = 2.
tt = time.time()
test.run()
print "Elapsed time: ", time.time()-tt, "seconds"
print 'result = ', test.summer.total
print '(x, y) = (%s, %s)' % (test.x, test.y)
print test.fs
I played around with your model, and found that the following line caused problems:
#self.connect('summer.fs[:]', 'fs[:]')
When I commented it out, I got the optimization to move.
I am not sure what is happening there, but the graph transformations sometimes have some issues with component input nodes that are promoted as outputs on the assembly boundary. If you still want those values to be available on the assembly, you could try promoting the outputs from the comp2_n components instead.

How does it make query ndb.AND condition more smart

I try to make query for tag search.
tags: how many tags ex.3
q: array of tags ex.['foo','hoo','poo']
def queryByTags(cls, tags, q):
def one():
qry = models.Card.query(models.Card.tags_value == q[0])
return qry
def two():
qry = models.Card.query(ndb.AND(models.Card.tags_value == q[0],
models.Card.tags_value == q[1]))
return qry
def three():
qry = models.Card.query(ndb.AND(models.Card.tags_value == q[0],
models.Card.tags_value == q[1],
models.Card.tags_value == q[2]))
return qry
tags_len = {1: one,
2: two,
3: three,
}
return tags_len[tags]()
This method can use up to 3 tags. I can copy code myself and extend it until 7,8,9...
It is very sad way...
Is there any smart way?
In pseudo python-ndb (I didn't run my code but you'll get it) I would say that a way would be to do:
cards_count = Card.query().filter(tags_value==q[0])\
.filter(tags_value==q[1])\
.filter(tags_value==q[2]).count()
or if iterating dynamic array (unknown length)
cards_count = Card.query()
for value in q:
q = q.filter(tags_value==value)
cards_count = q.count()

How can I have model field names (or operators) in a variable?

I'm trying to refactor my little voting functions. I've got two of them which do almost exactly the same thing, only to different model fields. Voters can add stars to a song if they like it, and they can add flags to a song if they dislike it. The voters are then associated with the song via a many2many relationship, so it can be determined if they already voted on that song. If so, a star (or a flag) is removed from the song and the relationship between voter and song is removed also.
Here is the function that adds or removes stars:
# models.py
class UserProfile(models.Model):
user = models.ForeignKey(User, unique = True, related_name = 'profile')
liked_songs = models.ManyToManyField('Song', blank = True, null = True)
disliked_songs = models.ManyToManyField('Song', blank = True, null = True)
class Song(models.Model):
song_id = models.CharField(max_length = 36, primary_key = True)
last_accessed = models.DateTimeField()
stars = models.IntegerField(max_length = 5, default = 0)
flags = models.IntegerField(max_length = 5, default = 0)
# views.py
def vote(request, song_id, task): # 'task' should be either 'stars' or 'flags'
song = Song.objects.get(song_id = song_id)
voter = request.user.get_profile()
# Voter does not already like the song
if not voter.liked_songs.filter(song_id = song.song_id): # (*)
try:
# Increase vote, set 'last_accessed', add Song to UserProfile.liked_songs
Song.objects.filter(song_id = song_id).update(
stars = F('stars') + 1, # (*)
last_accessed = datetime.datetime.now()
)
voter.liked_songs.add(song) # (*)
except:
raise
else:
try:
# Decrease vote, set 'last_accessed', remove Song from UserProfile.liked_songs
Song.objects.filter(song_id = song_id).update(
stars = F('stars') - 1, # (*)
last_accessed = datetime.datetime.now()
)
voter.liked_songs.remove(song) # (*)
except:
raise
return "Done."
The function that adds or removes flags is exactly the same, except for the marked (*) lines:
if not voter.disliked_songs.filter(song_id = song.song_id)
flags = F('flags') + 1,
flags = F('flags') - 1,
voter.disliked_songs.add(song)
voter.disliked_songs.remove(song)
Now here's my first question: What do I have to do to use only one function for liking and disliking a song? I have already introduced the argument task but I can't seem to write task = F('task') + 1,, because there is no field name 'task' in the Song model.
Bonus question: The only difference between stars = F('stars') + 1, and stars = F('stars') - 1, is the operator (+ or -). Is there a way to have an operator in a variable, i.e. task = F('task') myoperator 1,?
Answer for Bonus question: Why dont you have a function that accepts the operator as argument and evaluates the expression based on argument?, like the following example:
>>> def f(op):
... return (eval("3"+op+"2"))
>>> f("+")
5
>>> f("-")
1

Is it better to change the db schema?

I'm building a web app with django. I use postgresql for the db. The app code is getting really messy(my begginer skills being a big factor) and slow, even when I run the app locally.
This is an excerpt of my models.py file:
REPEATS_CHOICES = (
(NEVER, 'Never'),
(DAILY, 'Daily'),
(WEEKLY, 'Weekly'),
(MONTHLY, 'Monthly'),
...some more...
)
class Transaction(models.Model):
name = models.CharField(max_length=30)
type = models.IntegerField(max_length=1, choices=TYPE_CHOICES) # 0 = 'Income' , 1 = 'Expense'
amount = models.DecimalField(max_digits=12, decimal_places=2)
date = models.DateField(default=date.today)
frequency = models.IntegerField(max_length=2, choices=REPEATS_CHOICES)
ends = models.DateField(blank=True, null=True)
active = models.BooleanField(default=True)
category = models.ForeignKey(Category, related_name='transactions', blank=True, null=True)
account = models.ForeignKey(Account, related_name='transactions')
The problem is with date, frequency and ends. With this info I can know all the dates in which transactions occurs and use it to fill a cashflow table. Doing things this way involves creating a lot of structures(dictionaries, lists and tuples) and iterating them a lot. Maybe there is a very simple way of solving this with the actual schema, but I couldn't realize how.
I think that the app would be easier to code if, at the creation of a transaction, I could save all the dates in the db. I don't know if it's possible or if it's a good idea.
I'm reading a book about google app engine and the datastore's multivalued properties. What do you think about this for solving my problem?.
Edit: I didn't know about the PickleField. I'm now reading about it, maybe I could use it to store all the transaction's datetime objects.
Edit2: This is an excerpt of my cashflow2 view(sorry for the horrible code):
def cashflow2(request, account_name="Initial"):
if account_name == "Initial":
uri = "/cashflow/new_account"
return HttpResponseRedirect(uri)
month_info = {}
cat_info = {}
m_y_list = [] # [(month,year),]
trans = []
min, max = [] , []
account = Account.objects.get(name=account_name, user=request.user)
categories = account.categories.all()
for year in range(2006,2017):
for month in range(1,13):
month_info[(month, year)] = [0, 0, 0]
for cat in categories:
cat_info[(cat, month, year)] = 0
previous_months = 1 # previous months from actual
next_months = 5
dates_list = month_year_list(previous_month, next_months) # Returns [(month,year)] from the requested range
m_y_list = [(date.month, date.year) for date in month_year_list(1,5)]
min, max = dates_list[0], dates_list[-1]
INCOME = 0
EXPENSE = 1
ONHAND = 2
transacs_in_dates = []
txs = account.transactions.order_by('date')
for tx in txs:
monthyear = ()
monthyear = (tx.date.month, tx.date.year)
if tx.frequency == 0:
if tx.type == 0:
month_info[monthyear][INCOME] += tx.amount
if tx.category:
cat_info[(tx.category, monthyear[0], monthyear[1])] += tx.amount
else:
month_info[monthyear][EXPENSE] += tx.amount
if tx.category:
cat_info[(tx.category, monthyear[0], monthyear[1])] += tx.amount
if monthyear in lista_m_a:
if tx not in transacs_in_dates:
transacs_in_dates.append(tx)
elif tx.frequency == 4: # frequency = 'Monthly'
months_dif = relativedelta.relativedelta(tx.ends, tx.date).months
if tx.ends.day < tx.date.day:
months_dif += 1
years_dif = relativedelta.relativedelta(tx.ends, tx.date).years
dif = months_dif + (years_dif*12)
dates_range = dif + 1
for i in range(dates_range):
dt = tx.date+relativedelta.relativedelta(months=+i)
if (dt.month, dt.year) in m_y_list:
if tx not in transacs_in_dates:
transacs_in_dates.append(tx)
if tx.type == 0:
month_info[(fch.month,fch.year)][INCOME] += tx.amount
if tx.category:
cat_info[(tx.category, fch.month, fch.year)] += tx.amount
else:
month_info[(fch.month,fch.year)][EXPENSE] += tx.amount
if tx.category:
cat_info[(tx.category, fch.month, fch.year)] += tx.amount
import operator
thelist = []
thelist = sorted((my + tuple(v) for my, v in month_info.iteritems()),
key = operator.itemgetter(1, 0))
thelistlist = []
for atuple in thelist:
thelistlist.append(list(atuple))
for i in range(len(thelistlist)):
if i != 0:
thelistlist[i][4] = thelistlist[i-1][2] - thelistlist[i-1][3] + thelistlist[i-1][4]
list = []
for el in thelistlist:
if (el[0],el[1]) in lista_m_a:
list.append(el)
transactions = account.transactions.all()
cats_in_dates_income = []
cats_in_dates_expense = []
for t in transacs_in_dates:
if t.category and t.type == 0:
if t.category not in cats_in_dates_income:
cats_in_dates_income.append(t.category)
elif t.category and t.type == 1:
if t.category not in cats_in_dates_expense:
cats_in_dates_expense.append(t.category)
cat_infos = []
for k, v in cat_info.items():
cat_infos.append((k[0], k[1], k[2], v))
Depends on how relevant App Engine is here. P.S. If you'd like to store pickled objects as well as JSON objects in the Google Datastore, check out these two code snippets:
http://kovshenin.com/archives/app-engine-json-objects-google-datastore/
http://kovshenin.com/archives/app-engine-python-objects-in-the-google-datastore/
Also note that the Google Datastore is a non-relational database, so you might have other trouble refactoring your code to switch to that.
Cheers and good luck!

Resources