GQL SELECT Sorting - google-app-engine

is there any easier way to select & sort by weight ?
fetchCount = 1000
date1 = datetime.datetime.utcnow().date()
entries = GqlQuery("SELECT * FROM Entry WHERE category = :category and date >= :datetime ORDER BY date, weight DESC", category = category, datetime = date1).fetch(fetchCount)
if entries is not None:
# Sort entries ( lazy way for now ).
sort = True
while sort:
sort = False
for i in range(0, len(entries)-1):
if entries[i].weight < entries[i + 1].weight:
e = entries[i + 1]
entries[i + 1] = entries[i]
entries[i] = e
sort = True

solved by:
entries = GqlQuery("SELECT * FROM Entry WHERE category = :category and date > :datetime ORDER BY date, weight DESC", category = category, datetime = date1).fetch(fetchCount)
entries = sorted(entries, key=lambda x: x.weight, reverse=True)
since there is no other way atm....

It's a limitation of the datastore that if you use an inequality filter (e.g. date >= :datetime) that must also be your first ordering key. Also, you can only have inequalities on one property per query. So, in your case you have no choice but sorting them in memory. The sorted() call in the other answer is perfect.

Related

How can I check in xarray if a coordinate exists?

I want something similar to this: if fileObj.is_file() == True: But for a dataset.
I want to check if a date exists befor I select it.
y_begin = 2007
y_end = 2020
begin_date = '05-01'
end_date = '09-31'
ds_so_merge = None
for y in range(y_begin, y_end +1):
begin = str(y) + '-' + begin_date
end = str(y) + '-' + end_date
!!!here checking if the date exists and if not trying the following date!!!
ds_so = dataset.sel(time=slice(begin, end))
if ds_so_merge is None:
ds_so_merge = ds_so
else:
ds_so_merge = ds_so.merge(ds_so_merge)
you can check if a coordinate contains a specific value with value in coord just like you could with a numpy array or a pandas index.
Another option since you're using slices would just be to pull all elements which match the slice criteria, then select the first matched element.
Something like the following should work:
first_time_matching_slice = dataset.sel(time=slice(begin, end)).isel(time=0)

peewee ORM limits query, how do I remove the limit?

I have this query:
SELECT * FROM "transaction"
WHERE type = {transaction_type} AND
from_member_id IN (SELECT member_id FROM dao_member WHERE org_id = "{org}") AND
to_member_id IN (SELECT member_id FROM dao_member WHERE org_id = "{org}")
Which I have written in peewee like this:
members = ORG_Member.select(ORG_Member.member).where(ORG_Member.org_id == org)
transactions = Transaction.select().where(
Transaction.type == transaction_type).where(
Transaction.from_member.in_(members)).where(
Transaction.to_member.in_(members)).sql()
The SQL shown is then;
SELECT * FROM "transaction" AS "t1"
WHERE ((("t1"."type" = ?)
AND ("t1"."from_member_id" IN
(SELECT "t2"."member_id" FROM "org_member" AS "t2" WHERE ("t2"."org_id" = ?) LIMIT ?)))
AND ("t1"."to_member_id" IN
(SELECT "t2"."member_id" FROM "org_member" AS "t2" WHERE ("t2"."org_id" = ?) LIMIT ?))),
[1, 'lala', 1, 'baba', 1])
Notice the limit! Where did this limit come from? I've tried setting limit explicitly to None, no luck.
I don't observe that. Probably you have omitted some important details.
from peewee import *
db = SqliteDatabase(':memory:')
class Member(Model):
org_id = IntegerField(default=0)
class Meta:
database = db
class Transaction(Model):
from_member = ForeignKeyField(Member, backref='from_tx')
to_member = ForeignKeyField(Member, backref='to_tx')
type = TextField()
class Meta:
database = db
members = Member.select().where(Member.org_id == 0)
txns = (Transaction.select()
.where(Transaction.type == 'tx-type')
.where(Transaction.from_member.in_(members))
.where(Transaction.to_member.in_(members)))
print(txns.sql())
This results in:
SELECT "t1"."id", ...
FROM "transaction" AS "t1"
WHERE "t1"."type" = ? AND
"t1"."from_member_id" IN (SELECT "t2"."id"
FROM "member" AS "t2"
WHERE ( "t2"."org_id" = ? )) AND
"t1"."to_member_id" IN (SELECT "t2"."id"
FROM "member" AS "t2"
WHERE ( "t2"."org_id" = ? ))

Linq query count

select count(tblVV.VNme) as total,
tblvV.VNme
from tblVV
inner join tblRV
on tblVV.MID=tblRV.ID
inner join tblRe
on tblRV.RID=tblRe.RID
where tblRe.StartDate>= '2016-07-01 00:00:00' and
tblRe.EndDate<= '2016-07-31 23:59:59' and
tblRe.Reg= 'uk' and
tblRV.RegNo='BR72' and
tblVV.VNme <>''
group by tblVV.VNme
For the above query I get:
total Vame
1 DDSB
11 MV
The above SQL query shows me correct data so now i try to convert above query to linq query
[WebMethod]
public static string GetVo(string RegNo)
{
string data = "[";
try
{
Ts1 DB = new Ts1();
var re = (from vehvoila in DB.tblVV
join regveh in DB.tblRV on vehvoila.MID equals regveh.ID
join reg in DB.tblReg on regveh.RID equals reg.RID
where regveh.RegNo == RegNo &&
vehvoila.Vame != ""
group vehvoila by vehvoila.Vame into g
select new
{
VNme = g.Key,
cnt = g.Select(t => t.Vame).Count()
}).ToList();
if (re.Any())
{
data += re.ToList().Select(x => "['" + x.Vame + "'," + x.cnt + "]")
.Aggregate((a, b) => a + "," + b);
}
data += "]";
}
linq query show me return data like this
[['DDSB',1],['DPSB',1],['DSB',109],['MV',39],['PSB',1]]
Whereas I want data this
[['DDSB',1],['MV',11]]
Now the data which return SQL query is correct so how I correct linq query
Note: forget fromdate,todate,region parameter in SQL query . because I have page in which I put dropdown and fromdate and todate picker and there is button so when I select values i.e. UK, and dates then data is display in table then when I click on any row in table then I want to get this data in data +=”]”;
actually above linq query work behind clicking on row
total Vame
1 DDSB
11 MV
You can write it all like this:
Ts1 db = new Ts1();
var result = (from vehvoila in db.tblVV
join regveh in db.tblRV on vehvoila.MID equals regveh.ID
join reg in db.tblReg on regveh.RID equals reg.RID
where reg.StartDate >= new DateTime(2016, 7, 1) &&
reg.EndDate < new DateTime(2016, 8, 1) &&
reg.Reg == "uk" &&
regveh == "BR72" &&
vehvoila != ""
group vehvoila by vehvoila.Vame into g
select $"[{g.Key},{g.Count()}]");
var data = $"[{string.Join(",", result)}]";
Because you only use the result for the creation of the string in the select I just return the string formatted for a single item and then later used string.Join instead of using the .Aggregate - I think a bit cleaner
The $"{}" syntax is the C# 6.0 string interpolation
In the condition of the EndDate I decided to use < instead of the <= with the change of the date - At least in oracle when you partition the table by date it is better for performance - maybe also in sql server
Without string interpolation:
Ts1 db = new Ts1();
var result = (from vehvoila in db.tblVV
join regveh in db.tblRV on vehvoila.MID equals regveh.ID
join reg in db.tblReg on regveh.RID equals reg.RID
where reg.StartDate >= new DateTime(2016, 7, 1) &&
reg.EndDate < new DateTime(2016, 8, 1) &&
reg.Reg == "uk" &&
regveh == "BR72" &&
vehvoila != ""
group vehvoila by vehvoila.Vame into g
select new { Key = g.Key, Count = g.Count()})
.AsEnumerable()
.Select(g => string.Format("[{0},{1}]",g.Key, g.Count));
var data = string.Format("[{0}]",string.Join(",", result));

slow SQLite read speed (100 records a second)

I have a large SQLite database (~134 GB) that has multiple tables each with 14 columns, about 330 million records, and 4 indexes. The only operation used on the database is "Select *" as I need all the columns(No inserts or updates). When I query the database, the response time is slow when the result set is big (takes 160 seconds for getting ~18,000 records).
I have improved the use of indexes multiple times and this is the fastest response time I got.
I am running the database as a back-end database for a web application on a server with 32 GB of RAM.
is there a way to use RAM (or anything else) to speed up the query process?
Here is the code that performs the query.
async.each(proteins,function(item, callback) {
`PI[item] = []; // Stores interaction proteins for all query proteins
PS[item] = []; // Stores scores for all interaction proteins
PIS[item] = []; // Stores interaction sites for all interaction proteins
var sites = {}; // a temporarily holder for interaction sites
var query_string = 'SELECT * FROM ' + organism + PIPE_output_table +
' WHERE ' + score_type + ' > ' + cutoff['range'] + ' AND (protein_A = "' + item + '" OR protein_B = "' + item '") ORDER BY PIPE_score DESC';
db.each(query_string, function (err, row) {
if (row.protein_A == item) {
PI[item].push(row.protein_B);
// add 1 to interaction sites to represent sites starting from 1 not from 0
sites['S1AS'] = row.site1_A_start + 1;
sites['S1AE'] = row.site1_A_end + 1;
sites['S1BS'] = row.site1_B_start + 1;
sites['S1BE'] = row.site1_B_end + 1;
sites['S2AS'] = row.site2_A_start + 1;
sites['S2AE'] = row.site2_A_end + 1;
sites['S2BS'] = row.site2_B_start + 1;
sites['S2BE'] = row.site2_B_end + 1;
sites['S3AS'] = row.site3_A_start + 1;
sites['S3AE'] = row.site3_A_end + 1;
sites['S3BS'] = row.site3_B_start + 1;
sites['S3BE'] = row.site3_B_end + 1;
PIS[item].push(sites);
sites = {};
}
}
The query you posted uses no variables.
It will always return the same thing: all the rows with a null score whose protein column is equal to its protein_a or protein_b column. You're then having to filter all those extra rows in Javascript, fetching a lot more rows than you need to.
Here's why...
If I'm understanding this query correctly, you have WHERE Score > [Score]. I've never encountered this syntax before, so I looked it up.
[keyword] A keyword enclosed in square brackets is an identifier. This is not standard SQL. This quoting mechanism is used by MS Access and SQL Server and is included in SQLite for compatibility.
An identifier is something like a column or table name, not a variable.
This means that this...
SELECT * FROM [TABLE]
WHERE Score > [Score] AND
(protein_A = [Protein] OR protein_B = [Protein])
ORDER BY [Score] DESC;
Is the same as this...
SELECT * FROM `TABLE`
WHERE Score > Score AND
(protein_A = Protein OR protein_B = Protein)
ORDER BY Score DESC;
You never pass any variables to the query. It will always return the same thing.
This can be seen here when you run it.
db.each(query_string, function (err, row) {
Since you're checking that each protein is equal to itself (or something very like itself), you're likely fetching every row. And it's why you have to filter all the rows again. And that is one of the reasons why your query is so slow.
if (row.protein_A == item) {
BUT! WHERE Score > [Score] will never be true, a thing cannot be greater than itself except for null! Trinary logic is weird. So only if Score is null can that be true.
So you're returning all the rows whose score is null and the protein column is equal to protein_a or protein_b. This is a lot more rows than you need, I guess you have a lot of rows with null scores.
Your query should incorporate variables (I'm assuming you're using node-sqlite3) and pass in their values when you execute the query.
var query = " \
SELECT * FROM `TABLE` \
WHERE Score > $score AND \
(protein_A = $protein OR protein_B = $protein) \
ORDER BY Score DESC; \
";
var stmt = db.prepare(query);
stmt.each({$score: score, $protein: protein}, function (err, row) {
PI[item].push(row.protein_B);
...
});

ndb query error with datetime field - Google App Engine

I'm having a problem and I don't find any information about.
I define a field in my model like this.
class Dates(ndb.model):
...
date = ndb.DateTimeProperty(required = True) # I want to store date and time
...
Later I try a query (now I want all the dates for a day, I don'tn mind the time):
kl = Dates.query(ndb.AND(Dates.date.year == year,
Dates.date.month == month,
Dates.date.day == day),
ancestor = customer.key).fetch(keys_only = True)
dates = ndb.get_multi(kl)
But I get this error log:
AttributeError: 'DateTimeProperty' object has no attribute 'year'
I don't know why. I've tried Dates.date() == date, Dates.date == date (<-DateTime obj), ...
My DB is still empty but I suppose this doesn't mind because I'll never have dates for every possible days.
Anybody knows why? Should I go with GQL instead?
You can use "range" queries for this. See example below.
import datetime
date = datetime.datetime.strptime('02/19/2013', '%m/%d/%Y')
kl = Dates.query(
ndb.AND(Dates.date >= date),
Dates.date < date + datetime.timedelta(days=1))
Will fetch all datetime's with 02/19/2013.
What you are trying to achieve is not really possible, because you can only query for the whole date and not for some parts of it.
In order to achieve what you are trying there I would suggest you to add few more properties to your model:
class Dates(ndb.model):
...
date = ndb.DateTimeProperty(requiered=True)
date_year = ndb.IntegerProperty()
date_month = ndb.IntegerProperty()
date_day = ndb.IntegerProperty()
...
You could update these values on every save or you could use Model Hooks to do it automagically and then your new query will become:
kl = Dates.query(ndb.AND(Dates.date_year == year,
Dates.date_month == month,
Dates.date_day == day),
ancestor=customer.key).fetch(keys_only=True)
dates = ndb.get_multi(kl)
Use a DateProperty. Then you can use a simple == query:
>>> import datetime
>>> from google.appengine.ext.ndb import *
>>> class D(Model):
... d = DateProperty()
...
>>> d = D(d=datetime.date.today())
>>> d.put()
Key('D', 9)
>>> d
D(key=Key('D', 9), d=datetime.date(2013, 2, 20))
>>> D.query(D.d == datetime.date.today()).fetch()
[D(key=Key('D', 9), d=datetime.date(2013, 2, 20))]
I expanded #Guido van Rossum code snippet to include <> and timedelta for calculations, mostly for my own satisfaction
import datetime
from datetime import timedelta
from google.appengine.ext.ndb import *
class D(Model):
d = DateProperty()
now = datetime.date.today()
date1 = now-timedelta(+500)
date2 = now-timedelta(+5)
d1 = D(d=now)
d2 = D(d=date1)
d3 = D(d=date2)
d1.put()
d2.put()
d3.put()
date2 = now-timedelta(+50)
result1 = D.query(D.d == now).fetch(4)
result2 = D.query(D.d > date2).fetch(2)
result3 = D.query(D.d < date2).fetch(2)
result4 = D.query(D.d >= date2, D.d <= now).fetch(2)
print result1
print "+++++++"
print result2
print "+++++++"
print result3
print "+++++++"
print result4

Resources