How I can get difference of queries? - peewee

class PostTemplate(BaseModel):
content = TextField(unique=True)
class VkGroup(BaseModel):
group_id = IntegerField(unique=True)
class PostTemplateVkGroup(BaseModel):
"""
http://charlesleifer.com/blog/a-tour-of-tagging-schemas-many-to-many-bitmaps-and-more/
"""
group = ForeignKeyField(VkGroup)
post_template = ForeignKeyField(PostTemplate)
def get_posted_templates_for_group(group_id: int) -> Iterable:
"""Get posted templates.
Args:
group_id (int): id группы
"""
queries = (PostTemplate
.select()
.join(PostTemplateVkGroups)
.join(VkGroup)
.where(VkGroup.group_id == group_id))
return queries
all_post_templates = PostTemplate.select()
Many-to-many relationship.
For every record in PostTemplateVkGroup post template from this record is used in group from this record.
all_post_templates = not_posted_templates | posted_templates
How I can get not_posted_templates?

If your database supports the "EXCEPT" operation, you can:
all_post_templates = PostTemplate.alias().select()
post_templates = get_posted_templates_for_group(...)
difference = all_post_templates - post_templates
Sqlite example:
class Post(Base):
title = TextField()
class Tag(Base):
tag = TextField()
class PostTag(Base):
post = ForeignKeyField(Post)
tag = ForeignKeyField(Tag)
db.create_tables([Post, Tag, PostTag])
data = (
('pa', ('ta1', 'ta2')),
('pb', ('tb1', 'tb2')),
('pc', ()))
for title, tags in data:
post = Post.create(title=title)
for tag in tags:
tag = Tag.create(tag=tag)
PostTag.create(post=post, tag=tag)
# Create some tags that aren't associated with any post.
Tag.create(tag='tx1')
Tag.create(tag='tx2')
pa1_tags = (Tag
.select()
.join(PostTag)
.join(Post)
.where(Post.title == 'pa'))
all_tags = Tag.alias().select()
diff = all_tags - pa1_tags
for t in diff:
print(t.tag)
# Prints
# tb1
# tb2
# tx1
# tx2

Related

how to use initiate function for a cart in Django?

I'm learning Django and I'm trying to make a Cart, which the customer can get and item and add it in his/her order row and then the order will be submitted. so my teacher said use def initiate(customer), and I don't understand how to use it. Can someone please explain it to me? Thank you.
here is the code I'm working on it:
User = get_user_model()
class Customer(models.Model):
user = models.OneToOneField(User, on_delete=Product, related_name="User")
phone = models.CharField(max_length=20)
address = models.TextField()
balance = models.IntegerField(default=20000)
def deposit(self, amount):
self.balance += amount
self.save()
def spend(self, amount):
if amount > self.balance:
raise ValueError
self.balance -= amount
self.save()
class OrderRow(models.Model):
product = models.ManyToManyField(Product)
order = models.ForeignKey('Order', on_delete=models.CASCADE)
amount = models.IntegerField()
class Order(models.Model):
# Status values. DO NOT EDIT
STATUS_SHOPPING = 1
STATUS_SUBMITTED = 2
STATUS_CANCELED = 3
STATUS_SENT = 4
customer = models.ForeignKey('Customer', on_delete=models.SET_NULL)
order_time = models.DateTimeField(auto_now=True)
total_price = Sum(F('amount') * F('product__price'))
status = models.IntegerField(choices=status_choices)
#staticmethod
def initiate(customer):
Order.initiate(User)
def add_product(self, product, amount):
Order.status = 1
OrderRow.product = Product.objects.get(id=product.id)
print(product.id)
if OrderRow.objects.filter(product=product).exists():
preexisting_order = OrderRow.objects.get(product=product, order=self)
preexisting_order.amount += 1
preexisting_order.save()
else:
new_order = OrderRow.objects.create(
product=product,
cart=self,
amount=1,
)
new_order.save()
You are probably supposed to create a new Order associated with this customer. Something along the following lines:
#classmethod
def initiate(cls, customer):
return cls.objects.create(customer=customer, status=cls.STATUS_SHOPPING)
There are some other issues with your code. You cannot use SET_NULL if the fk is not nullable:
customer = models.ForeignKey('Customer', on_delete=models.SET_NULL, null=true)
There should not be multiple products per row:
class OrderRow(models.Model):
product = models.ForeignKey(Product) # not many2many!
# ...
Also, your add_product needs quite some fixing:
def add_product(self, product, amount):
self.status = self.STATUS_SHOPPING # the instance is self + use your descriptive variables
print(product.id)
# filter only rows in the current order!
if self.orderrow_set.filter(product=product).exists():
# fix naming: this is a row, not an order
preexisting_order_row = self.orderrow_set.get(product=product)
preexisting_order_row.amount += amount # why +1, you are adding amount
preexisting_order_row.save()
else:
new_order_row = OrderRow.objects.create(
product=product,
order=self,
amount=amount,
) # create saves already

Sentiment140 Preprocessing

I have been trying to do some preprocessing on the Sentiment140 database on Kaggle: https://www.kaggle.com/kazanova/sentiment140
The code I'm using is this:
import os
from nltk.stem.lancaster import LancasterStemmer
from nltk.tokenize import RegexpTokenizer
Base_location = ''
dataset_location = os.path.join(Base_location, 'Sentiment140.csv')
corpus = []
labels = []
# Parse tweets and sentiments
with open(dataset_location, 'r', encoding='latin-1') as df:
for i, line in enumerate(df):
parts = line.strip().split(',')
# Sentiment (0 = Negative, 1 = Positive)
labels.append(str(parts[0].strip()))
# Tweet
tweet = parts[5].strip()
if tweet.startswith('"'):
tweet = tweet[1:]
if tweet.endswith('"'):
tweet = tweet[::-1]
corpus.append(tweet.strip().lower())
print('Corpus size: {}'.format(len(corpus)))
# Tokenize and stem
tkr = RegexpTokenizer('[a-zA-Z0-9#]+')
stemmer = LancasterStemmer()
tokenized_corpus = []
for i, tweet in enumerate(corpus):
tokens = [stemmer.stem(t) for t in tkr.tokenize(tweet) if not t.startswith('#')]
tokenized_corpus.append(tokens)
print(tokenized_corpus)
However, I keep getting this error:
TypeError: '_io.TextIOWrapper' object is not subscriptable
Can anyone help me understand how to solve the issue?
Thanks in advance
TL;DR
To read .csv or structured datasets, use pandas https://pandas.pydata.org/ or any other dataframe libraries.
In Long:
Instead of doing:
Base_location = ''
dataset_location = os.path.join(Base_location, 'Sentiment140.csv')
corpus = []
labels = []
# Parse tweets and sentiments
with open(dataset_location, 'r', encoding='latin-1') as df:
for i, line in enumerate(df):
parts = line.strip().split(',')
# Sentiment (0 = Negative, 1 = Positive)
labels.append(str(parts[0].strip()))
# Tweet
tweet = parts[5].strip()
if tweet.startswith('"'):
tweet = tweet[1:]
if tweet.endswith('"'):
tweet = tweet[::-1]
corpus.append(tweet.strip().lower())
You could simply read the .csv file with pandas, e.g.
import pandas as pd
corpus = pd.read_csv('training.1600000.processed.noemoticon.csv', encoding='latin-1')
Then use the .apply() function to process the tweets:
"""
Columns
====
target: the polarity of the tweet (0 = negative, 2 = neutral, 4 = positive)
ids: The id of the tweet ( 2087)
date: the date of the tweet (Sat May 16 23:58:44 UTC 2009)
flag: The query (lyx). If there is no query, then this value is NO_QUERY.
user: the user that tweeted (robotickilldozr)
text: the text of the tweet (Lyx is cool)
"""
from nltk.stem.lancaster import LancasterStemmer
from nltk.tokenize import RegexpTokenizer
import pandas as pd
df = pd.read_csv('training.1600000.processed.noemoticon.csv',
header=None,
names=['target', 'ids', 'date', 'flag', 'user', 'text'],
encoding='latin-1')
tokenizer = RegexpTokenizer('[a-zA-Z0-9#]+')
stemmer = LancasterStemmer()
def process_tweet(tweet):
return [stemmer.stem(token) if not token.startswith('#') else token
for token in tokenizer.tokenize(tweet)]
# 1. Cast the column type to string
# 2. Lowercase it
# 3. Iterate throw each row and get the output from process_tweet()
# 4. # 3. Keep in a new column call `tokenized_text`
df['tokenized_text']= df['text'].str.lower().apply(process_tweet)

How does field renaming works on django migrations?

Django migration can detect if a field was renamed and ask you about it (instead of the old fashion delete/create)
Even if multiple fields are changed it seems to find the corresponding match. For example:
Before:
class DirectoryMirror(models.Model):
directory_origin = models.ForeignKey(TapeDirectory)
machine_target = models.ForeignKey(GenericMachine)
directory_target = models.CharField(max_length=255, blank=False)
After (changing field names):
class DirectoryMirror(models.Model):
source_directory = models.ForeignKey(TapeDirectory)
target_machine = models.ForeignKey(GenericMachine)
target_directory = models.CharField(max_length=255, blank=False)
Generating migration:
$ ./manage.py makemigrations
Did you rename directorymirror.directory_origin to directorymirror.source_directory (a ForeignKey)? [y/N] y
Did you rename directorymirror.directory_target to directorymirror.target_directory (a CharField)? [y/N] y
Did you rename directorymirror.machine_target to directorymirror.target_machine (a ForeignKey)? [y/N] y
How does it manage to detect the renaming and find the correct match?
Here it is the algorithm https://github.com/django/django/blob/bc77eb6d0858652e197c08c299efaeb06c51efee/django/db/migrations/autodetector.py#L757
Copying it here
def generate_renamed_fields(self):
"""
Works out renamed fields
"""
self.renamed_fields = {}
for app_label, model_name, field_name in sorted(self.new_field_keys - self.old_field_keys):
old_model_name = self.renamed_models.get((app_label, model_name), model_name)
old_model_state = self.from_state.models[app_label, old_model_name]
field = self.new_apps.get_model(app_label, model_name)._meta.get_field(field_name)
# Scan to see if this is actually a rename!
field_dec = self.deep_deconstruct(field)
for rem_app_label, rem_model_name, rem_field_name in sorted(self.old_field_keys - self.new_field_keys):
if rem_app_label == app_label and rem_model_name == model_name:
old_field_dec = self.deep_deconstruct(old_model_state.get_field_by_name(rem_field_name))
if field.remote_field and field.remote_field.model and 'to' in old_field_dec[2]:
old_rel_to = old_field_dec[2]['to']
if old_rel_to in self.renamed_models_rel:
old_field_dec[2]['to'] = self.renamed_models_rel[old_rel_to]
if old_field_dec == field_dec:
if self.questioner.ask_rename(model_name, rem_field_name, field_name, field):
self.add_operation(
app_label,
operations.RenameField(
model_name=model_name,
old_name=rem_field_name,
new_name=field_name,
)
)
self.old_field_keys.remove((rem_app_label, rem_model_name, rem_field_name))
self.old_field_keys.add((app_label, model_name, field_name))
self.renamed_fields[app_label, model_name, field_name] = rem_field_name
break

'Model is not immutable' TypeError

I am getting this traceback;
--- Trimmed parts ---
File "C:\Users\muhammed\Desktop\gifdatabase\gifdatabase.py", line 76, in maketransaction
gif.tags = list(set(gif.tags + tags))
File "C:\Program Files (x86)\Google\google_appengine\google\appengine\ext\ndb\model.py", line 2893, in __hash__
raise TypeError('Model is not immutable')
TypeError: Model is not immutable
Here is related parts of my code;
class Gif(ndb.Model):
author = ndb.UserProperty()
#tags = ndb.StringProperty(repeated=True)
tags = ndb.KeyProperty(repeated=True)
#classmethod
def get_by_tag(cls,tag_name):
return cls.query(cls.tags == ndb.Key(Tag, tag_name)).fetch()
class Tag(ndb.Model):
gif_count = ndb.IntegerProperty()
class PostGif(webapp2.RequestHandler):
def post(self):
user = users.get_current_user()
if user is None:
self.redirect(users.create_login_url("/static/submit.html"))
return
link = self.request.get('gif_link')
tag_names = shlex.split(self.request.get('tags').lower())
#ndb.transactional(xg=True)
def maketransaction():
tags = [Tag.get_or_insert(tag_name) for tag_name in tag_names]
gif = Gif.get_or_insert(link)
if not gif.author: # first time submission
gif.author = user
gif.tags = list(set(gif.tags + tags))
gif.put()
for tag in tags:
tag.gif_count += 1
tag.put()
if validate_link(link) and tag_names:
maketransaction()
self.redirect('/static/submit_successful.html')
else:
self.redirect('/static/submit_fail.html')
What is the problem with gif.tags = list(set(gif.tags + tags)) line?
You are inserting tags instead of keys, you need to access
tags = [Tag.get_or_insert(tag_name).key .....]
but you can also make this a single network hop like this
futures = [Tag.get_or_insert_async(tag_name) for tag_name in tag_names]
futures.append(Gif.get_or_insert_async(link))
ndb.Future.wait_all(futures)
gif = futures.pop().get_result()
tags = [future.get_result() for future in futures]
but that's not really the question just a suggestion ^, for clearer answer with .key is
gif.tags = gif.tags + [tag.key for tag in tags]
# or
gif.tags.extend([tag.key for tag in tags])

Is it better to change the db schema?

I'm building a web app with django. I use postgresql for the db. The app code is getting really messy(my begginer skills being a big factor) and slow, even when I run the app locally.
This is an excerpt of my models.py file:
REPEATS_CHOICES = (
(NEVER, 'Never'),
(DAILY, 'Daily'),
(WEEKLY, 'Weekly'),
(MONTHLY, 'Monthly'),
...some more...
)
class Transaction(models.Model):
name = models.CharField(max_length=30)
type = models.IntegerField(max_length=1, choices=TYPE_CHOICES) # 0 = 'Income' , 1 = 'Expense'
amount = models.DecimalField(max_digits=12, decimal_places=2)
date = models.DateField(default=date.today)
frequency = models.IntegerField(max_length=2, choices=REPEATS_CHOICES)
ends = models.DateField(blank=True, null=True)
active = models.BooleanField(default=True)
category = models.ForeignKey(Category, related_name='transactions', blank=True, null=True)
account = models.ForeignKey(Account, related_name='transactions')
The problem is with date, frequency and ends. With this info I can know all the dates in which transactions occurs and use it to fill a cashflow table. Doing things this way involves creating a lot of structures(dictionaries, lists and tuples) and iterating them a lot. Maybe there is a very simple way of solving this with the actual schema, but I couldn't realize how.
I think that the app would be easier to code if, at the creation of a transaction, I could save all the dates in the db. I don't know if it's possible or if it's a good idea.
I'm reading a book about google app engine and the datastore's multivalued properties. What do you think about this for solving my problem?.
Edit: I didn't know about the PickleField. I'm now reading about it, maybe I could use it to store all the transaction's datetime objects.
Edit2: This is an excerpt of my cashflow2 view(sorry for the horrible code):
def cashflow2(request, account_name="Initial"):
if account_name == "Initial":
uri = "/cashflow/new_account"
return HttpResponseRedirect(uri)
month_info = {}
cat_info = {}
m_y_list = [] # [(month,year),]
trans = []
min, max = [] , []
account = Account.objects.get(name=account_name, user=request.user)
categories = account.categories.all()
for year in range(2006,2017):
for month in range(1,13):
month_info[(month, year)] = [0, 0, 0]
for cat in categories:
cat_info[(cat, month, year)] = 0
previous_months = 1 # previous months from actual
next_months = 5
dates_list = month_year_list(previous_month, next_months) # Returns [(month,year)] from the requested range
m_y_list = [(date.month, date.year) for date in month_year_list(1,5)]
min, max = dates_list[0], dates_list[-1]
INCOME = 0
EXPENSE = 1
ONHAND = 2
transacs_in_dates = []
txs = account.transactions.order_by('date')
for tx in txs:
monthyear = ()
monthyear = (tx.date.month, tx.date.year)
if tx.frequency == 0:
if tx.type == 0:
month_info[monthyear][INCOME] += tx.amount
if tx.category:
cat_info[(tx.category, monthyear[0], monthyear[1])] += tx.amount
else:
month_info[monthyear][EXPENSE] += tx.amount
if tx.category:
cat_info[(tx.category, monthyear[0], monthyear[1])] += tx.amount
if monthyear in lista_m_a:
if tx not in transacs_in_dates:
transacs_in_dates.append(tx)
elif tx.frequency == 4: # frequency = 'Monthly'
months_dif = relativedelta.relativedelta(tx.ends, tx.date).months
if tx.ends.day < tx.date.day:
months_dif += 1
years_dif = relativedelta.relativedelta(tx.ends, tx.date).years
dif = months_dif + (years_dif*12)
dates_range = dif + 1
for i in range(dates_range):
dt = tx.date+relativedelta.relativedelta(months=+i)
if (dt.month, dt.year) in m_y_list:
if tx not in transacs_in_dates:
transacs_in_dates.append(tx)
if tx.type == 0:
month_info[(fch.month,fch.year)][INCOME] += tx.amount
if tx.category:
cat_info[(tx.category, fch.month, fch.year)] += tx.amount
else:
month_info[(fch.month,fch.year)][EXPENSE] += tx.amount
if tx.category:
cat_info[(tx.category, fch.month, fch.year)] += tx.amount
import operator
thelist = []
thelist = sorted((my + tuple(v) for my, v in month_info.iteritems()),
key = operator.itemgetter(1, 0))
thelistlist = []
for atuple in thelist:
thelistlist.append(list(atuple))
for i in range(len(thelistlist)):
if i != 0:
thelistlist[i][4] = thelistlist[i-1][2] - thelistlist[i-1][3] + thelistlist[i-1][4]
list = []
for el in thelistlist:
if (el[0],el[1]) in lista_m_a:
list.append(el)
transactions = account.transactions.all()
cats_in_dates_income = []
cats_in_dates_expense = []
for t in transacs_in_dates:
if t.category and t.type == 0:
if t.category not in cats_in_dates_income:
cats_in_dates_income.append(t.category)
elif t.category and t.type == 1:
if t.category not in cats_in_dates_expense:
cats_in_dates_expense.append(t.category)
cat_infos = []
for k, v in cat_info.items():
cat_infos.append((k[0], k[1], k[2], v))
Depends on how relevant App Engine is here. P.S. If you'd like to store pickled objects as well as JSON objects in the Google Datastore, check out these two code snippets:
http://kovshenin.com/archives/app-engine-json-objects-google-datastore/
http://kovshenin.com/archives/app-engine-python-objects-in-the-google-datastore/
Also note that the Google Datastore is a non-relational database, so you might have other trouble refactoring your code to switch to that.
Cheers and good luck!

Resources