How to use cursors for search in gae? - google-app-engine

When I RTFM, I can't understand how to specify paginated searches using the technique described in the manual. Here's my code:
def find_documents(query_string, limit, cursor):
try:
subject_desc = search.SortExpression(
expression='date',
direction=search.SortExpression.DESCENDING,
default_value=datetime.now().date())
# Sort up to 1000 matching results by subject in descending order
sort = search.SortOptions(expressions=[subject_desc], limit=1000)
# Set query options
options = search.QueryOptions(
limit=limit, # the number of results to return
cursor=cursor,
sort_options=sort,
#returned_fields=['author', 'subject', 'summary'],
#snippeted_fields=['content']
)
query = search.Query(query_string=query_string, options=options)
index = search.Index(name=_INDEX_NAME)
# Execute the query
return index.search(query)
except search.Error:
logging.exception('Search failed')
return None
class MainAdvIndexedPage(SearchBaseHandler):
"""Handles search requests for comments."""
def get(self):
"""Handles a get request with a query."""
regionname = 'Delhi'
region = Region.all().filter('name = ', regionname).get()
uri = urlparse(self.request.uri)
query = ''
if uri.query:
query = parse_qs(uri.query)
query = query['query'][0]
results = find_documents(query, 50, search.Cursor())
next_cursor = results.cursor
template_values = {
'results': results,'next_cursor':next_cursor,
'number_returned': len(results.results),
'url': url, 'user' : users.get_current_user(),
'url_linktext': url_linktext, 'region' : region, 'city' : '', 'request' : self.request, 'form' : SearchForm(), 'query' : query
}
self.render_template('indexed.html', template_values)
The code above works and does a search but it doesn't page the result. I wonder about the following code in the manual:
next_cursor = results.cursor
next_cursor_urlsafe = next_cursor.web_safe_string
# save next_cursor_urlsafe
...
# restore next_cursor_urlsafe
results = find_documents(query_string, 20,
search.Cursor(web_safe_string=next_cursor_urlsafe))
What is next_cursor used for? How do I save and what is the purpose of saving? How do I get a cursor in the first place? Should the code look something like this instead, using memcache to save an restore the cursor?
class MainAdvIndexedPage(SearchBaseHandler):
"""Handles search requests for comments."""
def get(self):
"""Handles a get request with a query."""
regionname = 'Delhi'
region = Region.all().filter('name = ', regionname).get()
uri = urlparse(self.request.uri)
query = ''
if uri.query:
query = parse_qs(uri.query)
query = query['query'][0]
# restore next_cursor_urlsafe
next_cursor_urlsafe = memcache.get('results_cursor')
if last_cursor:
results = find_documents(query_string, 50,
search.Cursor(web_safe_string=next_cursor_urlsafe))
results = find_documents(query, 50, search.Cursor())
next_cursor = results.cursor
next_cursor_urlsafe = next_cursor.web_safe_string
# save next_cursor_urlsafe
memcache.set('results_cursor', results.cursor)
template_values = {
'results': results,'next_cursor':next_cursor,
'number_returned': len(results.results),
'url': url, 'user' : users.get_current_user(),
'url_linktext': url_linktext, 'region' : region, 'city' : '', 'request' : self.request, 'form' : SearchForm(), 'query' : query
}
self.render_template('indexed.html', template_values)
Update
From what I see from the answer, I'm supposed to use an HTTP GET query string to save the cursor but I still don't know exactly how. Please tell me how.
Update 2
This is my new effort.
def get(self):
"""Handles a get request with a query."""
regionname = 'Delhi'
region = Region.all().filter('name = ', regionname).get()
cursor = self.request.get("cursor")
uri = urlparse(self.request.uri)
query = ''
if uri.query:
query = parse_qs(uri.query)
query = query['query'][0]
logging.info('search cursor: %s', search.Cursor())
if cursor:
results = find_documents(query, 50, cursor)
else:
results = find_documents(query, 50, search.Cursor())
next_cursor = None
if results and results.cursor:
next_cursor = results.cursor.web_safe_string
logging.info('next cursor: %s', str(next_cursor))
template_values = {
'results': results,'cursor':next_cursor,
'number_returned': len(results.results),
'user' : users.get_current_user(),
'region' : region, 'city' : '', 'request' : self.request, 'form' : SearchForm(), 'query' : query
}
I think that I've understood how it's supposed to work with the above, and it's outputting a cursor at the first hit so I can know how to get the cursor in the first place. This is clearly documented enough. But I get this error message: cursor must be a Cursor, got unicode

No, you should not use memcache for that, especially with a constant key like 'results_cursor' - that would mean that all users would get the same cursor, which would be bad.
You are already passing the cursor to the template context (although you should be converting to the web_safe_string as you do in the second example). In the template, you should ensure that the cursor string is included in the GET parameters of your "next" button: then, back in the view, you should extract it from there and pass it into the find_documents call.
Apart from the memcache issue, you're almost there with the second example, but you should obviously ensure that the second call to find_documents is inside an else block so it doesn't overwrite the cursor version.

Related

Django-3.1/DRF/React: Unable to save nested images (linked through GenericRelation)

I am building a Django+DRF/React app (simple blog app) and i am facing difficulties saving nested images
Model Structure
Model:
Post
Children:
details: ContentType Model ( DRF: save is successfull )
images: ContentType Model ( DRF : save is not successfull )
Process
Send images from <input type="file" multiple />
Process data through FormData
Catch request.data and process it
class PostFormView(generics.RetrieveUpdateDestroyAPIView):
queryset = Post._objects.is_active()
serializer_class = PostModelSerializer
permission_classes = (IsOwnerOr401,)
parser_classes = (parsers.MultiPartParser,parsers.JSONParser,
parsers.FormParser, parsers.FileUploadParser)
lookup_field = 'slug'
lookup_url_kwarg = 'slug'
def get_queryset(self):
return super().get_queryset().annotate(**sharedAnnotations(request=self.request))
def update(self, request, *args, **kwargs):
data = request.data
_images = data.getlist('images')
images = []
for _ in _images:
if isinstance(_, dict):
images.append(images)
continue
images.append({'image': _, 'object_id': self.get_object().pk, 'content_type': self.get_object().get_content_type().pk})
data['images'] = images
print(data)
partial = kwargs.pop('partial', False)
instance = self.get_object()
serializer = self.get_serializer(instance, data=data, partial=partial)
serializer.is_valid(raise_exception=True)
self.perform_update(serializer)
if getattr(instance, '_prefetched_objects_cache', None):
instance._prefetched_objects_cache = {}
return Response(serializer.data)
Save images (FAIL):
class MediaModelSerializer(ContentTypeModelSerializer):
# inherits object_id & content_type fields just to avoid writing them over and over alongside (create & update fns)
class Meta:
model = Media
fields='__all__'
class PostModelSerializer(WritableNestedModelSerializer):
is_active = serializers.BooleanField(default=True)
path = serializers.HyperlinkedIdentityField(
view_name="api:post-detail", lookup_field='slug')
images = MediaModelSerializer(many=True)
details = DetailModelSerializer(required=False, many=True)
# annotated fields
is_author = serializers.BooleanField(read_only=True, default=False)
class Meta:
model = Post
fields = '__all__'
read_only_fields = ['is_locked', 'slug', 'user', 'is_author']
def create(self, validated_data):
return super().create(validated_data)
def update(self, instance, validated_data):
return super().update(instance, validated_data)
The print(data) statement from PostFormView.update(self, request, *args, **kwargs) (after manipulation) returns this:
<QueryDict: {'id': ['8'], ..., 'images': [[{'image': <InMemoryUploadedFile: bmw_3.jpeg (image/jpeg)>, 'object_id': 8, 'content_type': 20}, {'image': <InMemoryUploadedFile: bmw_2.jpeg (image/jpeg)>, 'object_id': 8, 'content_type': 20}, {'image': <InMemoryUploadedFile: bmw_1.jpeg (image/jpeg)>, 'object_id': 8, 'content_type': 20}]]}>
Server returns 400_BAD_REQUEST because images were not passed to PostModelSerializer
{"images":["This field is required."]}
i've been facing this issue for 3 days and i can't wrap my head around the root cause.
Thank you for your help.
i have been looking all over the internet but i could not find any anwsers so i had to go this way
I have removed the processing part from PostFormView.update(...) and accessed the images directly in the create & update methods of the ModelSerializer. I'll figure out later on how to handle removing these images
Here's the code:
class PostModelSerializer(WritableNestedModelSerializer):
is_active = serializers.BooleanField(default=True)
path = serializers.HyperlinkedIdentityField(
view_name="api:post-detail", lookup_field='slug')
images = MediaModelSerializer(read_only=True, many=True)
details = DetailModelSerializer(required=False, many=True)
# annotated fields
is_author = serializers.BooleanField(read_only=True, default=False)
class Meta:
model = Post
fields = '__all__'
read_only_fields = ['is_locked', 'slug', 'user', 'is_author']
def create(self, validated_data):
instance = super().create(validated_data)
request = self.context.get('request', None)
if request:
try:
images = request.data.getlist('images')
for image in images:
self.instance.images.create(image=image)
except Exception as e:
pass
return instance
def update(self, instance, validated_data):
instance = super().update(instance, validated_data)
request = self.context.get('request', None)
if request:
try:
images = request.data.getlist('images')
for image in images:
self.instance.images.create(image=image)
except Exception as e:
pass
return instance
If anyone has faced this issue before and managed to resolve it, please post your answer below.
Thank you !

Matomo ÄPI "Actions.getPageUrls" returns only 100 rows on rest api call

I am trying to fetch data from matomo api "Actions.getPageUrls" by using below code:
import requests
import pandas as pd
api_url="baseapi"
PARAMS = {'module': 'API',
'method':'Actions.getPageUrls',
'period' : 'range',
'date': '2019-01-01,2020-01-01',
'filter_limit' : '-1',
'idSite': '1',
'format': 'JSON',
'expanded' : '1',
'token_auth': "tocken"}
r = requests.post(url = api_url, params = PARAMS, verify=False)
print(r.url)
matomo_df = pd.DataFrame(r.json())
matomo_df.head()
matomo_df['label']
matomo_df = pd.DataFrame(r.json()[0]['subtable'])
matomo_df
But, it returns only 100 rows.
I want to get more than 100 rows. Could you please help me.
By default it is set to return only 100 rows, however when you set the 'filter-limit' to -1, it is suppose to return all the rows.Can you set the 'filter-limit' param to 10000 and try it.

Wagtail Snippets permissions per group

I have a Wagtail site where every group can work on a different page tree, with different images and documents permissions.
That is a multisite setup where I am trying to keep sites really separate.
Is that possible to limit the snippets permissions on a per-group basis?
I would like my groups to see just a subset of the snippets.
I was facing something similar when I wanted to use Site settings.
The only solution I found was to create a custom model and using ModelAdmin.
Some ‘snippets’ to get you on the run:
class SiteSettings(models.Model):
base_form_class = SiteSettingsForm
COMPANY_FORM_CHOICES = (
('BED', 'Bedrijf'),
('ORG', 'Organisatie'),
('STI', 'Stichting'),
('VER', 'Vereniging'),
)
site = models.OneToOneField(
Site,
unique = True,
db_index = True,
on_delete = models.CASCADE,
verbose_name = _('site'),
related_name = 'site_settings',
help_text = _('The sites these setting belong to.')
)
company_name = models.CharField(
_('company name'),
blank = True,
max_length = 50,
help_text = _('De naam van het bedrijf of de organisatie.')
)
company_form = models.CharField(
_('company form'),
max_length = 3,
blank = True,
default = 'COM',
choices = COMPANY_FORM_CHOICES
)
...
class MyPermissionHelper(PermissionHelper):
def user_can_edit_obj(self, user, obj):
result = super().user_can_edit_obj(user, obj)
if not user.is_superuser:
user_site = get_user_site(user)
result = user_site and user_site == obj.site
return result
class SiteSettingsAdmin(ThumbnailMixin, ModelAdmin):
model = SiteSettings
menu_label = _('Site settings')
menu_icon = 'folder-open-inverse'
add_to_settings_menu = True
list_display = ['admin_thumb', 'company_name', 'get_categories']
list_select_related = True
list_display_add_buttons = 'site'
thumb_image_field_name = 'logo'
thumb_col_header_text = _('logo')
permission_helper_class = MyPermissionHelper
create_view_class = CreateSiteSettingsView
...
class CreateSiteSettingsView(SiteSettingsViewMixin, CreateView):
#cached_property
def sites_without_settings(self):
sites = get_sites_without_settings()
if not sites:
messages.info(
self.request,
_('No sites without settings found.')
)
return sites
def dispatch(self, request, *args, **kwargs):
if request.user.is_superuser and not self.sites_without_settings:
return redirect(self.url_helper.get_action_url('index'))
return super().dispatch(request, *args, **kwargs)
def get_initial(self):
initial = super().get_initial().copy()
current_site = self.request.site
initial.update({
'company_name': current_site.site_name}
)
if self.request.user.is_superuser:
initial.update({
'site': current_site}
)
return initial
def get_form(self):
form = super().get_form()
flds = form.fields
if self.request.user.is_superuser:
fld = form.fields['site']
fld.queryset = self.sites_without_settings.order_by(
Lower('site_name')
)
return form
def form_valid(self, form):
instance = form.save(commit=False)
if not self.request.user.is_superuser:
instance.site = self.request.site
instance.save()
messages.success(
self.request, self.get_success_message(instance),
buttons=self.get_success_message_buttons(instance)
)
return redirect(self.get_success_url())

CakePHP 3.x : transform updateAll() into save() loop, for a multiple edit page

I use audit-stash plugin which works fine with all my tables. But I have a particular function in which the user selects rows with checkboxes, and then changes a specific field to all of them. The table audits contains a fields called "primary_key" which seems not working for such case.
in my Controller, function, I put this:
$this->request->data;
$data = $this->request->data;
if($this->request->is(['patch', 'post', 'put']))
{
$ids = $this->request->data('data.AssetsAssignations.id');
$room_id = $this->request->data('room_id');
$this->AssetsAssignations->updateAll(
['room_id ' => $room_id ],
['id IN' => $ids]
);
}
in my table, I used this:
$this->addBehavior('AuditStash.AuditLog');
I was told that there is no way around this for audit-stash, because updateAll bypasses model callbacks by directly sending a query to the database.
I was suggested to update records one by one if I need to keep the log.
How can I transform my updateAll() code into a Save() loop ?
This try did not work for me, using save() and saveMany() :
$this->request->data;
$data = $this->request->data;
if($this->request->is(['patch', 'post', 'put']))
{
$ids = $this->request->data('data.AssetsAssignations.id');
$asset_status_id = $this->request->data('asset_status_id');
foreach($ids as $id) {
$this->AssetsAssignations->saveMany(
['asset_status_id ' => $asset_status_id ]
);
}
}
thanks in advance.
Actually you don't have to call get($id) for every id. This get the entity from the table and causes a lot of useless queries
if($this->request->is(['patch', 'post', 'put']))
{
$ids = $this->request->data('data.AssetsAssignations.id');
$asset_status_id = $this->request->data('asset_status_id');
$assetsAssignationsTable = TableRegistry::get('AssetsAssignations');
foreach($ids as $id) {
$assetsAssignation = $assetsAssignationsTable->newEntity(); // returns an empty entity
$assetsAssignation->id = $id; // assign the id to the entity
$assetsAssignation->asset_status_id = $asset_status_id;
$assetsAssignationsTable->save($assetsAssignation);
}
}
Thanks to Greg, this code worked for me:
use Cake\ORM\TableRegistry;
...
if($this->request->is(['patch', 'post', 'put']))
{
$ids = $this->request->data('data.AssetsAssignations.id');
$asset_status_id = $this->request->data('asset_status_id');
$assetsAssignationsTable = TableRegistry::get('AssetsAssignations');
foreach($ids as $id) {
$assetsAssignation = $assetsAssignationsTable->get($id); // Return assetsAssignation with id
$assetsAssignation->asset_status_id = $asset_status_id;
$assetsAssignationsTable->save($assetsAssignation);
}
}

Why "InvalidRequest" on 2nd hit of page 2 of pagination?

I do pagination and a link that works to page 2 looks like this
/q?query=regionID%3D4700188&cursor=False:CqoECuYBCtgB_wDAQM8HgAD_AP8A_wD__wDAQCQA_wD_AP8A_wD_AP__AMBAHAD_AP8A_wD_AP8A___-9wllzNf_Z_-_jIGSkJGLnpCPjZCVmpyL_wB0baCgmYuMoKD_AF2ej4-akZiWkZr_AHN0bZaRm5qH_wBdnpuM_wBzdG2bkJyglpv_AF2emMaFmZLOiZ2RrZedzL2Gnc2Pk6bMrYa8iIysvpS5lLi0nsrOiLKy_wBzf56YxoWZks6JnZGtl53MvYadzY-TpsythryIjKy-lLmUuLSeys6IsrL_AP_-EDIh4o_3av1KJSkSDURvY3VtZW50SW5kZXganwEoQU5EIChJUyAiY3VzdG9tZXJfbmFtZSIgImFwcGVuZ2luZSIpIChJUyAiZ3JvdXBfbmFtZSIgInN-bW9udGFvcHJvamVjdCIpIChJUyAibmFtZXNwYWNlIiAiIikgKElTICJpbmRleF9uYW1lIiAiYWRzIikgKEVRIDQ3MDAxODguMDAwMDAwMDAwMDAgInNuYnJfcmVnaW9uSUQiKSk6GwoOKE4gc2RhdGVfZGF0ZSkQARkAAABzFPtzQjoaCg0oTiBzbmJyX2hvdXIpEAEZAAAAAAAAEEA6HAoPKE4gc25icl9taW51dGUpEAEZAAAAAAAAMkBKHAgBOhVzdDpidGlfZ2VuZXJpY19zY29yZXJAkE5SGQoMKE4gb3JkZXJfaWQpEAEZAAAAAAAA8P8
But every now and then, seemingly random, I get error message that doesn't seem to be a timeout but an error with the query. When I hit reload I got this error, is it a timeout error?
"GET /q?query=regionID%3D4700188&cursor=False:CqoECuYBCtgB_wDAQM8HgAD_AP8A_wD__wDAQCQA_wD_AP8A_wD_AP__AMBAHAD_AP8A_wD_AP8A___-9wllzNf_Z_-_jIGSkJGLnpCPjZCVmpyL_wB0baCgmYuMoKD_AF2ej4-akZiWkZr_AHN0bZaRm5qH_wBdnpuM_wBzdG2bkJyglpv_AF2emMaFmZLOiZ2RrZedzL2Gnc2Pk6bMrYa8iIysvpS5lLi0nsrOiLKy_wBzf56YxoWZks6JnZGtl53MvYadzY-TpsythryIjKy-lLmUuLSeys6IsrL_AP_-EDIh4o_3av1KJSkSDURvY3VtZW50SW5kZXganwEoQU5EIChJUyAiY3VzdG9tZXJfbmFtZSIgImFwcGVuZ2luZSIpIChJUyAiZ3JvdXBfbmFtZSIgInN-bW9udGFvcHJvamVjdCIpIChJUyAibmFtZXNwYWNlIiAiIikgKElTICJpbmRleF9uYW1lIiAiYWRzIikgKEVRIDQ3MDAxODguMDAwMDAwMDAwMDAgInNuYnJfcmVnaW9uSUQiKSk6GwoOKE4gc2RhdGVfZGF0ZSkQARkAAABzFPtzQjoaCg0oTiBzbmJyX2hvdXIpEAEZAAAAAAAAEEA6HAoPKE4gc25icl9taW51dGUpEAEZAAAAAAAAMkBKHAgBOhVzdDpidGlfZ2VuZXJpY19zY29yZXJAkE5SGQoMKE4gb3JkZXJfaWQpEAEZAAAAAAAA8P8 HTTP/1.1" 200 8611 "http://www.koolbusiness.com/q?query=regionID%3D4700188" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.71 Safari/537.36" "www.koolbusiness.com" ms=133 cpu_ms=42 cpm_usd=0.060962 app_engine_release=1.8.1 instance=00c61b117cd1983617eb4b0968a84b71874563
D 2013-07-06 06:20:37.095
query regionID=4700188
E 2013-07-06 06:20:37.120
Search failed
Traceback (most recent call last):
File "/base/data/home/apps/s~montaoproject/2013e.368581150756737282/search_demo.py", line 87, in find_documents
return index.search(query)
File "/python27_runtime/python27_lib/versions/1/google/appengine/api/search/search.py", line 2732, in search
_CheckStatus(response.status())
File "/python27_runtime/python27_lib/versions/1/google/appengine/api/search/search.py", line 413, in _CheckStatus
raise _ERROR_MAP[status.code()](status.error_detail())
InvalidRequest: Failed to execute search request "regionID=4700188"
Code
def find_documents(query_string, limit, cursor):
try:
date_desc = search.SortExpression(expression='date',
direction=search.SortExpression.DESCENDING,
default_value=datetime.now().date())
hr_desc = search.SortExpression(expression='hour',
direction=search.SortExpression.DESCENDING,
default_value=datetime.now().hour)
min_desc = search.SortExpression(expression='minute',
direction=search.SortExpression.DESCENDING,
default_value=datetime.now().minute)
# Sort up to 5000 matching results by subject in descending order
sort = search.SortOptions(expressions=[date_desc, hr_desc,
min_desc], limit=10000)
# Set query options
options = search.QueryOptions(limit=limit, cursor=cursor,
sort_options=sort, number_found_accuracy=10000) # the number of results to return
# returned_fields=['author', 'subject', 'summary'],
# snippeted_fields=['content']
query = search.Query(query_string=query_string, options=options)
index = search.Index(name=_INDEX_NAME)
# Execute the query
return index.search(query)
except search.Error:
logging.exception('Search failed')
return None
regions_in = [
('3', u'Entire India'),
('4703187', u'Andaman & Nicobar Islands'),
('4694186', u'Andhra Pradesh'),
('4699188', u'Arunachal Pradesh'),
('4692186', u'Assam'),
('4702186', u'Bihar'),
('4698185', u'Chandigarh'),
('4676188', u'Chhattisgarh'),
('4691190', u'Dadra & Nagar Haveli'),
('4704183', u'Daman & Diu'),
('4699183', u'Delhi'),
('4702187', u'Goa'),
('4691189', u'Gujarat'),
('4700186', u'Haryana'),
('4703185', u'Himachal Pradesh'),
('4694187', u'Jammu & Kashmir'),
('4699189', u'Jharkhand'),
('4701185', u'Karnataka'),
('4695189', u'Kerala'),
('4700189', u'Lakshadweep'),
('4697186', u'Madhya Pradesh'),
('4694184', u'Maharashtra'),
('4700187', u'Manipur'),
('4703186', u'Meghalaya'),
('4698184', u'Mizoram'),
('4692187', u'Nagaland'),
('4696185', u'Orissa'),
('4676189', u'Pondicherry'),
('4693185', u'Punjab'),
('4701186', u'Rajasthan'),
('4701187', u'Sikkim'),
('4701188', u'Tamil Nadu'),
('4697187', u'Tripura'),
('4699190', u'Uttaranchal'),
('4692188', u'Uttar Pradesh'),
('4700188', u'West Bengal'),
]
class RegionSearch(SearchBaseHandler):
"""Handles regional search requests."""
def get(self):
"""Handles a get request with a query."""
category = None
cityentity = None
next_cursor = None
country = ''
if self.request.host.find('hipheap') > -1: country = 'USA'
elif self.request.host.find('koolbusiness') > -1: country = 'India'
elif self.request.host.find('montao') > -1: country = 'Brasil'
number_returned = 0
regionname = None
cityname = None
regionentity = None
region = None
cursor = self.request.get('cursor')
uri = urlparse(self.request.uri)
query = ''
regionID = regionid = self.request.get('regionid', 0)
cityID = cityid = self.request.get('cityid', 0)
categoryID = categoryid = self.request.get('category', 0)
if uri.query:
query = parse_qs(uri.query)
try:
query = query['query'][0]
except KeyError, err:
query = ''
# logging.info('KeyError')
# Try find region ID and/or cityID and categoryID a.s.a.p.
if regionid or query.find('regionID') > -1:
regionID = re.sub("^regionID=(\d+).*", r'\1', query)
region = Region.get_by_id(long(regionID))
regionname = region.name
if regionid:
regionID = regionid
region = Region.get_by_id(long(regionID))
regionname = region.name
if cityid or query.find('cityID') > -1:
cityID = re.sub("^.*cityID=(\d+).*", r'\1', query)
if cityid: cityID = cityid
city = montaomodel.City.get_by_id(long(cityID))
cityID = city.key().id()
cityentity = city
cityname = city.name
region = Region.get_by_id(long(city.region.key().id()))
regionID = region.key().id()
if categoryid or query.find('category') > -1:
categoryID = re.sub("^.*category=(\d+).*", r'\1', query)
if categoryid: categoryID = categoryid
logging.debug('query %s', query)
if cursor: results = find_documents(query, 50, search.Cursor(cursor))
else: results = find_documents(query, 50, search.Cursor())
if results and results.cursor: next_cursor = results.cursor.web_safe_string
namedquery = query
query = query.replace(' and company_ad=0', ''
).replace(' and company_ad=1', ''
).replace(' and category:(6010 OR 6020 OR 6030 OR 6040 OR 6090)'
, ''
).replace(' and category:(6010 OR 6020 OR 6030 OR 6040 OR 6090)'
, ''
).replace(' and category:(1020 OR 1010 OR 1030 OR 1050 OR 1080 OR 1100 OR 1090)'
, ''
).replace(' and category:(2010 OR 2030 OR 2040 OR 2080 OR 2070)'
, ''
).replace(' and category:(3040 OR 3050 OR 3030 OR 3060)'
, ''
).replace(' and category:(4010 OR 4020 OR 4040 OR 4030 OR 4090 OR 4060 OR 4070)'
, '')
query = re.sub("regionID=\d+", '', query)
query = query.replace('category and', '')
query = query.replace('type=s', '')
query = query.replace('type=w', '')
query = query.replace('type=r', '')
query = query.replace('type=b', '')
query = query.replace('cityID and', '')
query = query.replace('and ', '')
query = query.replace(' and', '')
query = query.replace('regionID', '')
query = query.replace('=', '%3D')
namedquery = namedquery.replace('=', '%3D')
query = re.sub("cityID%3D\d+", '', query)
query = re.sub("category%3D\d+", '', query)
query = query.replace(' ', ' ')
# to do: make into a dictionary for O(1) access
if int(regionid) > 0: regionname = region_id_to_name[regionid]
#if regionID and query.find('cityID') < 1:
# region = Region.get_by_id(long(regionID))
form = SearchForm()
form.w.choices = [
('4703187', u'Andaman & Nicobar Islands'),
('4694186', u'Andhra Pradesh'),
('4699188', u'Arunachal Pradesh'),
('4692186', u'Assam'),
('4702186', u'Bihar'),
('4698185', u'Chandigarh'),
('4676188', u'Chhattisgarh'),
('4691190', u'Dadra & Nagar Haveli'),
('4704183', u'Daman & Diu'),
('4699183', u'Delhi'),
('4702187', u'Goa'),
('4691189', u'Gujarat'),
('4700186', u'Haryana'),
('4703185', u'Himachal Pradesh'),
('4694187', u'Jammu & Kashmir'),
('4699189', u'Jharkhand'),
('4701185', u'Karnataka'),
('4695189', u'Kerala'),
('4700189', u'Lakshadweep'),
('4697186', u'Madhya Pradesh'),
('4694184', u'Maharashtra'),
('4700187', u'Manipur'),
('4703186', u'Meghalaya'),
('4698184', u'Mizoram'),
('4692187', u'Nagaland'),
('4696185', u'Orissa'),
('4676189', u'Pondicherry'),
('4693185', u'Punjab'),
('4701186', u'Rajasthan'),
('4701187', u'Sikkim'),
('4701188', u'Tamil Nadu'),
('4697187', u'Tripura'),
('4699190', u'Uttaranchal'),
('4692188', u'Uttar Pradesh'),
('4700188', u'West Bengal'),
]
if region or cityentity:
# to do:use memcache
form.area.choices = [] # to do: use memcache for the list
for cityitem in City.all().filter('region =',
region.key()).order('-vieworder').order('name'
).fetch(99999):
form.area.choices.append([str(cityitem.key().id()),
cityitem.name])
if cityentity:
form.area.data = str(cityentity.key().id())
if self.request.host.find('hipheap') > -1:
if region and (str(region.key().id()), region.name) \
in form.w_us.choices:
form.w_us.choices.remove((str(region.key().id()),
region.name))
else:
if region and (str(region.key().id()), region.name) \
in form.w.choices:
form.w.choices.remove((str(region.key().id()),
region.name))
if region: regionname = region.name
if results: number_returned = len(results.results)
template_values = {
'results': results,'regionname':regionname,
'cursor': next_cursor,
'country': country,
'user': self.current_user,
'number_returned': number_returned,
'loggedin': self.logged_in,
'VERSION': VERSION,
'region': region,
'regionname': regionname,
'jobs_count': get_jobs_count(self, regionID, cityID),
'estate_count': get_estate_count(self, regionID, cityID),
'electronics_count': get_electronics_count(self, regionID,
cityID),
'home_count': get_home_count(self, regionID, cityID),
'leisure_count': get_leisure_count(self, regionID, cityID),
'vehicles_count': get_vehicles_count(self, regionID,
cityID),
'cityentity': cityentity,
'request': self.request,
'categoryID': categoryID,
'form': form,
'query': query,
'namedquery': namedquery,
'cityname': cityname,
'category': category,
}
self.render_template('q.htm', template_values)
class India(SearchBaseHandler):
def get(self):
"""Handles a get request with a query."""
regionname = None
country = 'India'
cursor = self.request.get('cursor')
region = None
if self.request.host.find('hipheap') > -1: country = 'USA'
elif self.request.host.find('koolbusiness') > -1: country = 'India'
elif self.request.host.find('montao') > -1: country = 'Brasil'
uri = urlparse(self.request.uri)
query = ''
if uri.query:
query = parse_qs(uri.query)
try:
query = query['query'][0]
except KeyError, err:
query = ''
# logging.info('KeyError')
if cursor:
results = find_documents(query, 50, search.Cursor(cursor))
else:
results = find_documents(query, 50, search.Cursor())
next_cursor = None
if results and results.cursor: next_cursor = results.cursor.web_safe_string
query = query.replace(' and company_ad=0', ''
).replace(' and company_ad=1', '')
regionname = 'Entire India'
regionID = 0
cityID = 0
form = SearchForm()
form.w.choices = region_id_to_name
template_values = {
'regions':region_id_to_name,
'form': form,
'results': results,
'cursor': next_cursor,
'region': region,
'country': country,
'number_returned': len(results.results),
'jobs_count': get_jobs_count_india(self, regionID, cityID),
'estate_count': get_estate_count_india(self, regionID,
cityID),
'electronics_count': get_electronics_count_india(self,
regionID, cityID),
'home_count': get_home_count_india(self, regionID, cityID),
'leisure_count': get_leisure_count_india(self, regionID,
cityID),
'vehicles_count': get_vehicles_count_india(self, regionID,
cityID),
'user': users.get_current_user(),
'loggedin': self.logged_in,
'region': region,
'regionname': regionname,
'city': '',
'cityentity': None,
'request': self.request,
'form': SearchForm(),
'query': query,
}
self.render_template('q.htm', template_values)
The log file where the first hit is OK and the reload fails is here.
Update
The bug is changing. Now it just didn't happen until the third hit of the 2nd page. It doesn't make sense at all not to encounter a pagination error until the third hit of the second page. This is absurd so tell me what is going on and how to investigate the trace? I changed the code to say more about the trace but python doesn't inform me what it is:
It's not only RegionSearch that's failing, it's also search for empty string on pages after page 2. Since it never happens on the first page I suspect this must have something to do with the cursorbut I've been totally unable to figure out what is wrong with the cursor. The exception doesn't tell me anything, it just says ´this is not working´and nothing about why. I changed handling but it didn't generate any more output about the error: `return
index.search(query)
except search.Error as e:
logging.exception('Search failed %s', e)`
It does occur randomöly without seeming to be a timeout and the exception does not inform about the problem, all it says is "search failed" and the query that fails, a query which works if I put it in the search field. So it doesnt make sense anyhow.
I can only guess that it's RegionSearch that's failing.
You'll need to handle exceptions on your queries.
See https://developers.google.com/appengine/docs/python/datastore/exceptions
The exception will tell you what the error is. Hard to guess.
EDIT:
try:
# do query
except search.PutError as e:
logging.exception('caught PutError %s', e)
except search.InternalError as e:
logging.exception('caught InternalError %s', e)
except search.DeleteError as e:
logging.exception('caught DeleteError %s', e)
except search.TransientError as e:
logging.exception('caught TransientError %s', e)
except search.InvalidRequest as e:
logging.exception('caught InvalidError %s', e)
except search.Error as e:
logging.exception('caught unknown error %s', e)
Try setting the SortExpression's default_value to None, that worked for me.
I was getting the same issue in my Test/QA instance however in my PROD instance worked fine, setting the default_value to None solved the problem in the Test/QA instance for good.

Resources