Selenium webscraping TimeoutException with stacktrace of pointers - selenium-webdriver

I am currently using selenium to web-scrape for articles on the RSC corpus. I keep running into an error due to the line:
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.capsule.capsule--article')))
I have tried both EC.presence_of_element_located and EC.visibility_of_element_located, and both give me the same TimeoutException error. I am not sure what else I can try to get rid of the TimeoutException, as the CSS selector is correct, and the URL properly loads the query on RSC.
Here is my stacktrace:
Traceback (most recent call last):
File "rsc.py", line 40, in <module>
main(url, query, page, location)
File "rsc.py", line 22, in main
dois = scraper.get_doi(query=query, page=page)
File "/home/ssarrouf/Documents/GitHub/WaterRemediationParser/batterydataextractor-main/batterydataextractor/scrape/rsc.py", line 62, in get_doi
_ = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.capsule.capsule--article')))
File "/home/ssarrouf/.pyenv/versions/3.8.16/lib/python3.8/site-packages/selenium/webdriver/support/wait.py", line 95, in until
raise TimeoutException(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message:
Stacktrace:
#0 0x55e10fae5d93 <unknown>
#1 0x55e10f8b42d7 <unknown>
#2 0x55e10f8f0caa <unknown>
#3 0x55e10f8f0db1 <unknown>
#4 0x55e10f92e8f4 <unknown>
#5 0x55e10f91461d <unknown>
#6 0x55e10f92c619 <unknown>
#7 0x55e10f914353 <unknown>
#8 0x55e10f8e3e40 <unknown>
#9 0x55e10f8e5038 <unknown>
#10 0x55e10fb398be <unknown>
#11 0x55e10fb3d8f0 <unknown>
#12 0x55e10fb1df90 <unknown>
#13 0x55e10fb3eb7d <unknown>
#14 0x55e10fb0f578 <unknown>
#15 0x55e10fb63348 <unknown>
#16 0x55e10fb634d6 <unknown>
#17 0x55e10fb7d341 <unknown>
#18 0x7f0fad5a9b43 <unknown>
My code in the "get_doi" method:
def get_doi(self, query, page):
"""
Get a list of dois from query massages and the exact page.
:param query: the query text (e.g. battery materials)
:param page: the number of page
:return: a list of dois of the relevant query text and page.
"""
if self.driver is None:
driver = webdriver.Chrome()
else:
driver = self.driver
if self.url is None:
url = "http://pubs.rsc.org/en/results?searchtext="
url = url + query
else:
url = self.url
driver.get(url)
wait = WebDriverWait(driver, self.max_wait_time)
# To make sure we don't overload the server
sleep(1)
next_button = wait.until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "a[class^=paging__btn]")))[1]
page_string = """document.querySelectorAll("a[class^=paging__btn]")[1].setAttribute("data-pageno", \""""\
+ str(page) + """\")"""
driver.execute_script(page_string)
next_button.click()
sleep(1)
_ = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.capsule.capsule--article')))
doi_lists = driver.find_elements(By.PARTIAL_LINK_TEXT, 'https://doi.org')
dois = [doi.text for doi in doi_lists]
return dois
My code for downloading the articles:
def download_doi(self, doi, file_location):
"""
Download the html paper of the doi
:param doi: doi of the paper
:param file_location: the saving location
:return:
"""
doi = doi.split("org/")[-1]
r = requests.get('http://doi.org/' + doi, headers={'User-Agent': 'Mozilla/5.0'})
result = re.findall(r'https://pubs.rsc.org/en/content/articlehtml/.*?"', r.text)
url = result[0][:-1]
web_content = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}).content
result = self.get_rsc_abstract(web_content)
exact_date = result['date'].split("/")
doi = result['doi'].replace("/", "_")
if len(exact_date) == 3:
name = exact_date[0] + exact_date[1] + exact_date[2] + '_' + doi
else:
name = result['online_date'].replace("/", "") + '_' + doi
with open(file_location + name + '.html', 'wb') as f:
f.write(web_content)
return
And the main method I am calling for the web-scraping:
def main(url, query, page, file_location):
"""
RSC web-scraper runner
:param url: the scraping url (or default)
:param query: query text (e.g. battery materials)
:param page: the page number of the query pages
:param file_location: saving location
:return:
"""
scraper = RSCWebScraper(url=url)
dois = scraper.get_doi(query=query, page=page)
for doi in dois:
try:
scraper.download_doi(doi, file_location)
# Some papers don't have html access
except:
continue
return
if __name__ == "__main__":
# Download papers within a certain date range
url = "https://pubs.rsc.org/en/results/all?Category=All&AllText=water%20remediation&IncludeReference=false&Select" \
"Journal=false&DateRange=true&SelectDate=true&DateToYear={}&DateFromYear={}&DateFromMonth={}&DateTo" \
"Month={}&PriceCode=False&OpenAccess=false".format("2022", "2021", "06", "01")
query = "water remediation"
location = r"/home/ssarrouf/Documents/webscrape/to_date_papers/rsc/"
for page in range(1, 120):
main(url, query, page, location)
Changing the method for
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.capsule.capsule--article')))
``` did not give me any results, and I also changed the query / url text to various months and years to ensure that the request was properly loading. These did nothing and I got TimeoutExceptions for each attempt.

Related

Getting No such file or directory [[{{node ReadFile}}]] [[IteratorGetNext]] [Op:__inference_train_function_9137] error

This may be a simple answer, but currently making a neural network using keras and I ran into this problem through this code
\`EPOCHS = 50
callbacks = \[
tf.keras.callbacks.ReduceLROnPlateau(
monitor='val_loss', factor=0.1, patience=10, verbose=1, mode='min', min_delta=0.0001),
tf.keras.callbacks.ModelCheckpoint(
'weights.tf', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=True),
tf.keras.callbacks.EarlyStopping(
monitor='val_loss', min_delta=0, patience=15, verbose=1, restore_best_weights=True)
\]
history = model.fit(
train_ds,
validation_data=val_ds,
verbose=1,
callbacks=callbacks,
epochs=EPOCHS,
)
model.load_weights('weights.tf')
model.evaluate(val_ds)\`
Output:
`Epoch 1/50
NotFoundError Traceback (most recent call last)
\<ipython-input-15-265d39d703c7\> in \<module\>
10 \]
11
\---\> 12 history = model.fit(
13 train_ds,
14 validation_data=val_ds,
1 frames
/usr/local/lib/python3.8/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
52 try:
53 ctx.ensure_initialized()
\---\> 54 tensors = pywrap_tfe.TFE_Py_Execute(ctx.\_handle, device_name, op_name,
55 inputs, attrs, num_outputs)
56 except core.\_NotOkStatusException as e:
NotFoundError: Graph execution error:
train/60377.jpg; No such file or directory
\[\[{{node ReadFile}}\]\]
\[\[IteratorGetNext\]\] \[Op:\__inference_train_function_9137\]
`
Here's my data:
FairFace Dataset from Kaggle
Here's how I preprocessed (through code I borrowed) the images from the FairFace dataset.
\`IMG_SIZE = 224
AUTOTUNE = tf.data.AUTOTUNE
BATCH_SIZE = 224
NUM_CLASSES = len(labels_map)
# Dataset creation
y_train = tf.keras.utils.to_categorical(train.race, num_classes=NUM_CLASSES, dtype='float32')
y_val = tf.keras.utils.to_categorical(val.race, num_classes=NUM_CLASSES, dtype='float32')
train_ds = tf.data.Dataset.from_tensor_slices((train.file, y_train)).shuffle(len(y_train))
val_ds = tf.data.Dataset.from_tensor_slices((val.file, y_val))
assert len(train_ds) == len(train.file) == len(train.race)
assert len(val_ds) == len(val.file) == len(val.race)
# Read files
def map_fn(path, label):
image = tf.io.decode_jpeg(tf.io.read_file(path))
image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
return image, label
# Read files
train_ds = train_ds.map(lambda path, lbl: (tf.io.decode_jpeg(tf.io.read_file(path)), lbl), num_parallel_calls=AUTOTUNE)
val_ds = val_ds.map(lambda path, lbl: (tf.io.decode_jpeg(tf.io.read_file(path)), lbl), num_parallel_calls=AUTOTUNE)
# Batch and resize after batch, then prefetch
train_ds = val_ds.map(lambda imgs, lbls: (tf.image.resize(imgs, (IMG_SIZE, IMG_SIZE)), lbls), num_parallel_calls=AUTOTUNE)
val_ds = val_ds.map(lambda imgs, lbls: (tf.image.resize(imgs, (IMG_SIZE, IMG_SIZE)), lbls), num_parallel_calls=AUTOTUNE)
train_ds = train_ds.batch(BATCH_SIZE)
val_ds = val_ds.batch(BATCH_SIZE)
# Performance enchancement - cache, batch, prefetch
train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.prefetch(buffer_size=AUTOTUNE)\`
I tried changing the jpg file name but to no avail.

using usocket seems to halt the loop (micropython)

I'm trying to code a simple program for a ESP32 board.
My main program is fairly simple and it has to run on a loop.
On the side, the device also needs to be able to respond to HTTP requests with a very simple response.
This is my attempt (a rework of https://randomnerdtutorials.com/micropython-esp32-esp8266-bme280-web-server/):
try:
import usocket as socket
except:
import socket
from micropython import const
import time
REFRESH_DELAY = const(60000) #millisecondi
def do_connect():
import network
wlan = network.WLAN(network.STA_IF)
wlan.active(True)
if not wlan.isconnected():
print('connecting to network...')
wlan.config(dhcp_hostname=HOST)
wlan.connect('SSID', 'PSWD')
while not wlan.isconnected():
pass
print('network config:', wlan.ifconfig())
import json
import esp
esp.osdebug(None)
import gc
gc.collect()
do_connect()
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.bind((HOST, SENSOR_SCKT_PORT))
s.listen(5)
prevRun = 0
i = 0
while True:
print("iteration #"+str(i))
i += 1
# run every 60 seconds
curRun = int(round(time.time() * 1000))
if curRun - prevRun >= REFRESH_DELAY:
prevRun = curRun
# MAIN PROGRAM
# ......
# whole bunch of code
# ....
# run continuously:
try:
if gc.mem_free() < 102000:
gc.collect()
conn, addr = s.accept()
conn.settimeout(3.0)
print('Got a connection from %s' % str(addr))
request = conn.recv(1024)
conn.settimeout(None)
request = str(request)
#print('Content = %s' % request)
measurements = 'some json stuff'
conn.send('HTTP/1.1 200 OK\n')
conn.send('Content-Type: text/html\n')
conn.send('Connection: close\n\n')
conn.send(measurements)
conn.close()
except OSError as e:
conn.close()
print('Connection closed')
what happens is I only get the iteration #0, and then the while True loop halts.
If I ping this server with a HTTP request, I get a correct response, AND the loop advances to iteration #1 and #2 (no idea why it thinks I pinged it with 2 requests).
So it seems that socket.listen(5) is halting the while loop.
Is there any way to avoid this?
Any other solution?
I don't think that threading is an option here.
The problem is that s.accept() is a blocking call...it won't return until it receives a connection. This is why it pauses your loop.
The easiest solution is probably to check whether or not a connection is waiting before calling s.accept(); you can do this using either select.select or select.poll. I prefer the select.poll API, which would end up looking something like this:
import esp
import gc
import json
import machine
import network
import select
import socket
import time
from micropython import const
HOST = '0.0.0.0'
SENSOR_SCKT_PORT = const(1234)
REFRESH_DELAY = const(60000) # milliseconds
def wait_for_connection():
print('waiting for connection...')
wlan = network.WLAN(network.STA_IF)
while not wlan.isconnected():
machine.idle()
print('...connected. network config:', wlan.ifconfig())
esp.osdebug(None)
gc.collect()
wait_for_connection()
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.bind((HOST, SENSOR_SCKT_PORT))
s.listen(5)
poll = select.poll()
poll.register(s, select.POLLIN)
prevRun = 0
i = 0
while True:
print("iteration #"+str(i))
i += 1
# run every 60 seconds
curRun = int(round(time.time() * 1000))
if curRun - prevRun >= REFRESH_DELAY:
prevRun = curRun
# MAIN PROGRAM
# ......
# whole bunch of code
# ....
# run continuously:
try:
if gc.mem_free() < 102000:
gc.collect()
events = poll.poll(100)
if events:
conn, addr = s.accept()
conn.settimeout(3.0)
print('Got a connection from %s' % str(addr))
request = conn.recv(1024)
conn.settimeout(None)
request = str(request)
# print('Content = %s' % request)
measurements = 'some json stuff'
conn.send('HTTP/1.1 200 OK\n')
conn.send('Content-Type: text/html\n')
conn.send('Connection: close\n\n')
conn.send(measurements)
conn.close()
except OSError:
conn.close()
print('Connection closed')
You'll note that I've taken a few liberties with your code to get it running on my device and to appease my sense of style; primarily, I've excised most of your do_connect method and put all the imports at the top of the file.
The only real changes are:
We create a select.poll() object:
poll = select.poll()
We ask it to monitor the s variable for POLLIN events:
poll.register(s, select.POLLIN)
We check if any connections are pending before attempting to handle a connection:
events = poll.poll(100)
if events:
conn, addr = s.accept()
conn.settimeout(3.0)
[...]
With these changes in place, running your code and making a request looks something like this:
iteration #0
iteration #1
iteration #2
iteration #3
iteration #4
iteration #5
iteration #6
Got a connection from ('192.168.1.169', 54392)
iteration #7
iteration #8
iteration #9
iteration #10
Note that as written here, your loop will iterate at least once every 100ms (and you can control that by changing the timeout on our call to poll.poll()).
Note: the above was tested on an esp8266 device (A Wemos D1 clone) running MicroPython v1.13-268-gf7aafc062).

Realtime output in CakePHP

I'd like to print the output of a program in php in "real time" (buffers are not important). The process takes a long time and having the (partial) data earlier would be very helpful.
Usually I'd use plain passthru() but this is done in CakePHP and it doesn't output anything until I do this:
$this->response->file($file, array('download' => true));
return $this->response;
If I just remove these lines and swap the exec() with a passthru() I get a MissingViewException
Error: [MissingViewException] View file "Songs/download.ctp" is missing.
And If I do this
$this->response=$out; #$out being the output of exec()
return $this->response;
I get this
2015-08-10 01:18:06 Error: Fatal Error (1): Call to a member function body() on string in [/storage/www/sonerezh/lib/Cake/Controller/Controller.php, line 960]
2015-08-10 01:18:06 Error: [InternalErrorException] Internal Server Error
Request URL: /songs/download/2307
Stack Trace:
#0 /storage/www/sonerezh/lib/Cake/Error/ErrorHandler.php(213): ErrorHandler::handleFatalError(1, 'Call to a membe...', '/storage/www/so...', 960)
#1 [internal function]: ErrorHandler::handleError(1, 'Call to a membe...', '/storage/www/so...', 960, Array)
#2 /storage/www/sonerezh/lib/Cake/Core/App.php(931): call_user_func('ErrorHandler::h...', 1, 'Call to a membe...', '/storage/www/so...', 960, Array)
#3 /storage/www/sonerezh/lib/Cake/Core/App.php(904): App::_checkFatalError()
#4 [internal function]: App::shutdown()
#5 {main}
What can I do?
You could try this (not tested):
$this->response->body(function () {
passthru ('./program') ;
}) ;
return $this->response ;
More information here.
Note: I assumed your were using CakePHP 3 since CakeResponse::file does not exist in CakePHP 2.

How do I enter pdb debugger when the datastore is started in setUp(unittest.TestCase)?

I'm confused. It would seem that Client entity should exist because I have 2 locations for it to be created:
I create the requested Entity in the the setUp() of the unittest.TestCase.
I also conditionally create the Client entity in main.py if it doesn't exist already.
I have tried to set a breakpoint where the Client entity is called, but I can't step into the debugger. While the code stops execution, I can't step into the debugger. I'm not even sure how to see the output.
I set the Consistency policy to 1, so the record should exist.
datastore_stub_util.PseudoRandomHRConsistencyPolicy(probability=1)
$ nosetests
INFO 2015-02-24 19:08:56,172 devappserver2.py:726] Skipping SDK update check.
INFO 2015-02-24 19:08:56,242 api_server.py:172] Starting API server at: http://localhost:62049
INFO 2015-02-24 19:08:56,247 dispatcher.py:186] Starting module "default" running at: http://localhost:8080
INFO 2015-02-24 19:08:56,249 admin_server.py:118] Starting admin server at: http://localhost:8000
ERROR 2015-02-24 19:09:00,307 webapp2.py:1552] 'NoneType' object has no attribute 'key'
Traceback (most recent call last):
File "/Users/Bryan/Desktop/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/lib/webapp2-2.5.2/webapp2.py", line 1535, in __call__
rv = self.handle_exception(request, response, e)
File "/Users/Bryan/Desktop/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/lib/webapp2-2.5.2/webapp2.py", line 1529, in __call__
rv = self.router.dispatch(request, response)
File "/Users/Bryan/Desktop/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/lib/webapp2-2.5.2/webapp2.py", line 1278, in default_dispatcher
return route.handler_adapter(request, response)
File "/Users/Bryan/Desktop/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/lib/webapp2-2.5.2/webapp2.py", line 1102, in __call__
return handler.dispatch()
File "/Users/Bryan/work/GoogleAppEngine/dermalfillersecrets/main.py", line 18, in dispatch
webapp2.RequestHandler.dispatch(self)
File "/Users/Bryan/Desktop/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/lib/webapp2-2.5.2/webapp2.py", line 572, in dispatch
return self.handle_exception(e, self.app.debug)
File "/Users/Bryan/Desktop/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/lib/webapp2-2.5.2/webapp2.py", line 570, in dispatch
return method(*args, **kwargs)
File "/Users/Bryan/work/GoogleAppEngine/dermalfillersecrets/main.py", line 95, in get
self.session['client'] = client.key.urlsafe()
AttributeError: 'NoneType' object has no attribute 'key'
INFO 2015-02-24 19:09:00,314 module.py:737] default: "GET / HTTP/1.1" 500 2354
INFO 2015-02-24 19:09:00,377 module.py:737] default: "GET /favicon.ico HTTP/1.1" 200 8348
INFO 2015-02-24 19:09:00,381 module.py:737] default: "GET /favicon.ico HTTP/1.1" 304 -
EINFO 2015-02-24 19:09:08,482 shutdown.py:45] Shutting down.
INFO 2015-02-24 19:09:08,483 api_server.py:588] Applying all pending transactions and saving the datastore
======================================================================
ERROR: test_guest_can_submit_contact_info (dermalfillersecrets.functional_tests.NewVisitorTest)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/Users/Bryan/work/GoogleAppEngine/dermalfillersecrets/functional_tests.py", line 88, in test_guest_can_submit_contact_info
self.browser.find_element_by_name('id_name').send_keys("Kallie Wheelock")
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/selenium/webdriver/remote/webdriver.py", line 302, in find_element_by_name
return self.find_element(by=By.NAME, value=name)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/selenium/webdriver/remote/webdriver.py", line 662, in find_element
{'using': by, 'value': value})['value']
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/selenium/webdriver/remote/webdriver.py", line 173, in execute
self.error_handler.check_response(response)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/selenium/webdriver/remote/errorhandler.py", line 166, in check_response
raise exception_class(message, screen, stacktrace)
NoSuchElementException: Message: Unable to locate element: {"method":"name","selector":"id_name"}
Stacktrace:
at FirefoxDriver.prototype.findElementInternal_ (file:///var/folders/mw/0y88j8_54bjc93d_lg3120qw0000gp/T/tmpSjWZ6W/extensions/fxdriver#googlecode.com/components/driver-component.js:9641:26)
at fxdriver.Timer.prototype.setTimeout/<.notify (file:///var/folders/mw/0y88j8_54bjc93d_lg3120qw0000gp/T/tmpSjWZ6W/extensions/fxdriver#googlecode.com/components/driver-component.js:548:5)
Here is the code in functional_tests.py
import sys, os, subprocess, time, unittest, shlex
sys.path.append("/usr/local/google_appengine")
sys.path.append("/usr/local/google_appengine/lib/yaml/lib")
sys.path.append("/usr/local/google_appengine/lib/webapp2-2.5.2")
sys.path.append("/usr/local/google_appengine/lib/django-1.5")
sys.path.append("/usr/local/google_appengine/lib/cherrypy")
sys.path.append("/usr/local/google_appengine/lib/concurrent")
sys.path.append("/usr/local/google_appengine/lib/docker")
sys.path.append("/usr/local/google_appengine/lib/requests")
sys.path.append("/usr/local/google_appengine/lib/websocket")
sys.path.append("/usr/local/google_appengine/lib/fancy_urllib")
sys.path.append("/usr/local/google_appengine/lib/antlr3")
from selenium import webdriver
from google.appengine.api import memcache, apiproxy_stub, apiproxy_stub_map
from google.appengine.ext import db
from google.appengine.ext import testbed
import dev_appserver
from google.appengine.tools.devappserver2 import devappserver2
class NewVisitorTest(unittest.TestCase):
def setUp(self):
# Start the dev server
cmd = "/usr/local/bin/dev_appserver.py /Users/Bryan/work/GoogleAppEngine/dermalfillersecrets/app.yaml --port 8080 --storage_path /tmp/datastore --clear_datastore --skip_sdk_update_check"
self.dev_appserver = subprocess.Popen(shlex.split(cmd),
stdout=subprocess.PIPE)
time.sleep(2) # Important, let dev_appserver start up
self.testbed = testbed.Testbed()
self.testbed.setup_env(app_id="dev~myapp")
self.testbed.activate()
#self.testbed.setup_env(app_id='dermalfillersecrets')
self.testbed.init_user_stub()
# Create a consistency policy with a probability of 1,
# the datastore should be available.
self.policy = datastore_stub_util.PseudoRandomHRConsistencyPolicy(probability=1)
# Initialize the datastore stub with this policy.
self.testbed.init_datastore_v3_stub(datastore_file="/tmp/datastore/datastore.db", use_sqlite=True, consistency_policy=self.policy)
self.testbed.init_memcache_stub()
self.datastore_stub = apiproxy_stub_map.apiproxy.GetStub('datastore_v3')
# setup the dev_appserver
APP_CONFIGS = ['app.yaml']
# setup client to make sure
from main import Client
if not ( Client.query( Client.name == "Bryan Wheelock").get()):
logging.info("create Admin")
client = Client(
email = "bryan#mail.com",
name = "Bryan Wheelock",
street1 = "555 Main St",
street2 = "unit 1",
city = "Atlanta",
zipcode = 99999,
phone = "(888)555-1212"
).put()
# this sleep is to allow eventual consistency to propogate
time.sleep(2)
self.browser = webdriver.Firefox()
self.browser.implicitly_wait(3)
def tearDown(self):
self.browser.quit()
self.testbed.deactivate()
self.dev_appserver.terminate()
def test_guest_can_submit_contact_info(self):
from main import Client, Customer
client = Client.query( Client.name == "Bryan Wheelock").get()
orig_customer_count = Customer.query(ancestor=client.key).count()
self.browser.get('http://localhost:8080')
time.sleep(5)
self.browser.find_element_by_name('id_name').send_keys("Kallie Wheelock")
self.browser.find_element_by_name('id_street').send_keys("123 main st")
self.browser.find_element_by_name('id_phone').send_keys('(404)555-1212')
self.browser.find_element_by_name('id_zip').send_keys("30306")
self.browser.find_element_by_name('submit').submit()
# the time delay is to allow eventual consisenency to happen.
time.sleep(4)
assert(Customer.query(Customer.name == "Kallie Wheelock").get())
# this should return 1 more record
final_customer_count = Customer.query(ancestor=client.key).count()
self.assertNotEqual(orig_customer_count, final_customer_count)
# Delete the Customer record
Customer.query(Customer.name =="Kallie Wheelock").delete()
Here's the code in main.py:
import os
import urllib
import logging
from google.appengine.api import users
from google.appengine.ext import ndb
import jinja2
import webapp2
from webapp2_extras import sessions
class BaseHandler(webapp2.RequestHandler):
def dispatch(self):
self.session_store = sessions.get_store(request=self.request)
try:
# dispatch the request
webapp2.RequestHandler.dispatch(self)
finally:
# save all sessions
self.session_store.save_sessions(self.response)
#webapp2.cached_property
def session(self):
# Returns a session using the default cookie key.
return self.session_store.get_session()
JINJA_ENVIRONMENT = jinja2.Environment(
loader = jinja2.FileSystemLoader(os.path.dirname(__file__)),
extensions=['jinja2.ext.autoescape'],
autoescape=True)
DEFAULT_LEADBOOK_NAME = 'whatsmyname'
def leadbook_key(leadbook_name=DEFAULT_LEADBOOK_NAME):
"""Constructs a Datastore key for a LeadBook entity with leadbook_name."""
return ndb.Key('LeadBook', leadbook_name)
class Client(ndb.Model):
email = ndb.StringProperty()
name = ndb.StringProperty(indexed=True)
street1 = ndb.StringProperty()
street2 = ndb.StringProperty()
city = ndb.StringProperty()
zipcode = ndb.IntegerProperty()
phone = ndb.StringProperty()
signup = ndb.DateTimeProperty(auto_now_add=True)
# this just creates a Client to use
if not ( Client.query( Client.name == "Bryan Wheelock").get()):
client = Client(
email = "bryan#mail.com",
name = "Bryan Wheelock",
street1 = "555 Main St",
street2 = "unit 1",
city = "Atlanta",
zipcode = 99999,
phone = "(888)555-1212"
).put()
class Customer(ndb.Model):
# I commented out client property because using Ancestor Query( limited to 1 write per second)
#client = ndb.KeyProperty(kind=Client)
#email = ndb.StringProperty(indexed=True)
name = ndb.StringProperty(indexed=True)
street1 = ndb.StringProperty()
street2 = ndb.StringProperty()
city = ndb.StringProperty()
zipcode = ndb.IntegerProperty()
phone = ndb.StringProperty()
signup = ndb.DateTimeProperty(auto_now_add=True)
class MainPage(BaseHandler):
def get(self):
leadbook_name = self.request.get('leadbook_name',
DEFAULT_LEADBOOK_NAME)
# This record should exist because I create in setUP and in main.py
client = Client.query( Client.name == "Bryan Wheelock").get()
###########################################################
########################
# I can't step into the debugger because I don't know how to access debugger shell.
import pdb; pdb.set_trace()
########################
###########################################################
self.session['client'] = client.key.urlsafe()
template_values = {
'client': client,
'leadbook_name': urllib.quote_plus(leadbook_name),
}
template = JINJA_ENVIRONMENT.get_template('index.html')
self.response.write(template.render(template_values))
class LeadBook(BaseHandler):
def post(self):
leadbook_name = self.request.get('leadbook_name',
DEFAULT_LEADBOOK_NAME)
client = ndb.Key(urlsafe=self.session['client']).get()
customer = Customer( parent = client.key)
customer.name = self.request.get('id_name')
customer.street1 = self.request.get('id_street')
customer.phone = self.request.get('id_phone')
customer.zipcode = int(self.request.get('id_zip'))
# show original number of customer to show the code works
starting_customer_count = Customer.query(ancestor=client.key).count()
customer.put()
# This should return the record
assert(Customer.query(Customer.name == "Kallie Wheelock").get())
final_customer_count = Customer.query(ancestor=client.key).count()
#import pdb; pdb.set_trace()
query_params = {'leadbook_name': leadbook_name}
self.redirect('/?' + urllib.urlencode(query_params))
config = {}
config['webapp2_extras.sessions'] = {
'secret_key': 'my-super-secret-key',
}
application = webapp2.WSGIApplication([
('/', MainPage),
('/sign', LeadBook),
], config = config,
debug=True)
Considering you use nosetests, try running it with the pdb option.
nosetests -sv --pdb
The --pdb option will drop the test runner into pdb when it encounters an error.
More info here:
http://nose.readthedocs.org/en/latest/plugins/debug.html

Why is the entity I create in setUp() not accessible via nosetests?

I still can't seem to get Nosetest to run properly.
The dev_appserver runs fine when started from the command line, but when I attempt to start it from the functional_tests.py it fails.
I create a Client entity in the setUp(), but it's not accessable from the test.
How do I step into pdb debugger with dev_appserver starting up in the test?
I have tried to put pdb() breakpoints in the code. While the code stops execution, I can't step into the debugger. I'm not even sure how to see the output.
$ nosetests
INFO 2015-02-24 19:08:56,172 devappserver2.py:726] Skipping SDK update check.
INFO 2015-02-24 19:08:56,242 api_server.py:172] Starting API server at: http://localhost:62049
INFO 2015-02-24 19:08:56,247 dispatcher.py:186] Starting module "default" running at: http://localhost:8080
INFO 2015-02-24 19:08:56,249 admin_server.py:118] Starting admin server at: http://localhost:8000
ERROR 2015-02-24 19:09:00,307 webapp2.py:1552] 'NoneType' object has no attribute 'key'
Traceback (most recent call last):
File "/Users/Bryan/Desktop/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/lib/webapp2-2.5.2/webapp2.py", line 1535, in __call__
rv = self.handle_exception(request, response, e)
File "/Users/Bryan/Desktop/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/lib/webapp2-2.5.2/webapp2.py", line 1529, in __call__
rv = self.router.dispatch(request, response)
File "/Users/Bryan/Desktop/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/lib/webapp2-2.5.2/webapp2.py", line 1278, in default_dispatcher
return route.handler_adapter(request, response)
File "/Users/Bryan/Desktop/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/lib/webapp2-2.5.2/webapp2.py", line 1102, in __call__
return handler.dispatch()
File "/Users/Bryan/work/GoogleAppEngine/dermalfillersecrets/main.py", line 18, in dispatch
webapp2.RequestHandler.dispatch(self)
File "/Users/Bryan/Desktop/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/lib/webapp2-2.5.2/webapp2.py", line 572, in dispatch
return self.handle_exception(e, self.app.debug)
File "/Users/Bryan/Desktop/GoogleAppEngineLauncher.app/Contents/Resources/GoogleAppEngine-default.bundle/Contents/Resources/google_appengine/lib/webapp2-2.5.2/webapp2.py", line 570, in dispatch
return method(*args, **kwargs)
File "/Users/Bryan/work/GoogleAppEngine/dermalfillersecrets/main.py", line 95, in get
self.session['client'] = client.key.urlsafe()
AttributeError: 'NoneType' object has no attribute 'key'
INFO 2015-02-24 19:09:00,314 module.py:737] default: "GET / HTTP/1.1" 500 2354
INFO 2015-02-24 19:09:00,377 module.py:737] default: "GET /favicon.ico HTTP/1.1" 200 8348
INFO 2015-02-24 19:09:00,381 module.py:737] default: "GET /favicon.ico HTTP/1.1" 304 -
EINFO 2015-02-24 19:09:08,482 shutdown.py:45] Shutting down.
INFO 2015-02-24 19:09:08,483 api_server.py:588] Applying all pending transactions and saving the datastore
======================================================================
ERROR: test_guest_can_submit_contact_info (dermalfillersecrets.functional_tests.NewVisitorTest)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/Users/Bryan/work/GoogleAppEngine/dermalfillersecrets/functional_tests.py", line 88, in test_guest_can_submit_contact_info
self.browser.find_element_by_name('id_name').send_keys("Kallie Wheelock")
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/selenium/webdriver/remote/webdriver.py", line 302, in find_element_by_name
return self.find_element(by=By.NAME, value=name)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/selenium/webdriver/remote/webdriver.py", line 662, in find_element
{'using': by, 'value': value})['value']
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/selenium/webdriver/remote/webdriver.py", line 173, in execute
self.error_handler.check_response(response)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/selenium/webdriver/remote/errorhandler.py", line 166, in check_response
raise exception_class(message, screen, stacktrace)
NoSuchElementException: Message: Unable to locate element: {"method":"name","selector":"id_name"}
Stacktrace:
at FirefoxDriver.prototype.findElementInternal_ (file:///var/folders/mw/0y88j8_54bjc93d_lg3120qw0000gp/T/tmpSjWZ6W/extensions/fxdriver#googlecode.com/components/driver-component.js:9641:26)
at fxdriver.Timer.prototype.setTimeout/<.notify (file:///var/folders/mw/0y88j8_54bjc93d_lg3120qw0000gp/T/tmpSjWZ6W/extensions/fxdriver#googlecode.com/components/driver-component.js:548:5)
Here is the code in functional_tests.py
import sys, os, subprocess, time, unittest, shlex
sys.path.append("/usr/local/google_appengine")
sys.path.append("/usr/local/google_appengine/lib/yaml/lib")
sys.path.append("/usr/local/google_appengine/lib/webapp2-2.5.2")
sys.path.append("/usr/local/google_appengine/lib/django-1.5")
sys.path.append("/usr/local/google_appengine/lib/cherrypy")
sys.path.append("/usr/local/google_appengine/lib/concurrent")
sys.path.append("/usr/local/google_appengine/lib/docker")
sys.path.append("/usr/local/google_appengine/lib/requests")
sys.path.append("/usr/local/google_appengine/lib/websocket")
sys.path.append("/usr/local/google_appengine/lib/fancy_urllib")
sys.path.append("/usr/local/google_appengine/lib/antlr3")
from selenium import webdriver
from google.appengine.api import memcache, apiproxy_stub, apiproxy_stub_map
from google.appengine.ext import db
from google.appengine.ext import testbed
import dev_appserver
from google.appengine.tools.devappserver2 import devappserver2
class NewVisitorTest(unittest.TestCase):
def setUp(self):
# Start the dev server
cmd = "/usr/local/bin/dev_appserver.py /Users/Bryan/work/GoogleAppEngine/dermalfillersecrets/app.yaml --port 8080 --storage_path /tmp/datastore --clear_datastore --skip_sdk_update_check"
self.dev_appserver = subprocess.Popen(shlex.split(cmd),
stdout=subprocess.PIPE)
time.sleep(2) # Important, let dev_appserver start up
self.testbed = testbed.Testbed()
self.testbed.setup_env(app_id="dev~myapp")
self.testbed.activate()
#self.testbed.setup_env(app_id='dermalfillersecrets')
self.testbed.init_user_stub()
# Create a consistency policy with a probability of 1,
# the datastore should be available.
self.policy = datastore_stub_util.PseudoRandomHRConsistencyPolicy(probability=1)
# Initialize the datastore stub with this policy.
self.testbed.init_datastore_v3_stub(datastore_file="/tmp/datastore/datastore.db", use_sqlite=True, consistency_policy=self.policy)
self.testbed.init_memcache_stub()
self.datastore_stub = apiproxy_stub_map.apiproxy.GetStub('datastore_v3')
# setup the dev_appserver
APP_CONFIGS = ['app.yaml']
# setup client to make sure
from main import Client
if not ( Client.query( Client.name == "Bryan Wheelock").get()):
logging.info("create Admin")
client = Client(
email = "bryan#mail.com",
name = "Bryan Wheelock",
street1 = "555 Main St",
street2 = "unit 1",
city = "Atlanta",
zipcode = 99999,
phone = "(888)555-1212"
).put()
# this sleep is to allow eventual consistency to propogate
time.sleep(2)
self.browser = webdriver.Firefox()
self.browser.implicitly_wait(3)
def tearDown(self):
self.browser.quit()
self.testbed.deactivate()
self.dev_appserver.terminate()
def test_guest_can_submit_contact_info(self):
from main import Client, Customer
client = Client.query( Client.name == "Bryan Wheelock").get()
orig_customer_count = Customer.query(ancestor=client.key).count()
self.browser.get('http://localhost:8080')
time.sleep(5)
self.browser.find_element_by_name('id_name').send_keys("Kallie Wheelock")
self.browser.find_element_by_name('id_street').send_keys("123 main st")
self.browser.find_element_by_name('id_phone').send_keys('(404)555-1212')
self.browser.find_element_by_name('id_zip').send_keys("30306")
self.browser.find_element_by_name('submit').submit()
# the time delay is to allow eventual consisenency to happen.
time.sleep(4)
assert(Customer.query(Customer.name == "Kallie Wheelock").get())
# this should return 1 more record
final_customer_count = Customer.query(ancestor=client.key).count()
self.assertNotEqual(orig_customer_count, final_customer_count)
# Delete the Customer record
Customer.query(Customer.name =="Kallie Wheelock").delete()
Here's the code in main.py:
import os
import urllib
import logging
from google.appengine.api import users
from google.appengine.ext import ndb
import jinja2
import webapp2
from webapp2_extras import sessions
class BaseHandler(webapp2.RequestHandler):
def dispatch(self):
self.session_store = sessions.get_store(request=self.request)
try:
# dispatch the request
webapp2.RequestHandler.dispatch(self)
finally:
# save all sessions
self.session_store.save_sessions(self.response)
#webapp2.cached_property
def session(self):
# Returns a session using the default cookie key.
return self.session_store.get_session()
JINJA_ENVIRONMENT = jinja2.Environment(
loader = jinja2.FileSystemLoader(os.path.dirname(__file__)),
extensions=['jinja2.ext.autoescape'],
autoescape=True)
DEFAULT_LEADBOOK_NAME = 'whatsmyname'
def leadbook_key(leadbook_name=DEFAULT_LEADBOOK_NAME):
"""Constructs a Datastore key for a LeadBook entity with leadbook_name."""
return ndb.Key('LeadBook', leadbook_name)
class Client(ndb.Model):
email = ndb.StringProperty()
name = ndb.StringProperty(indexed=True)
street1 = ndb.StringProperty()
street2 = ndb.StringProperty()
city = ndb.StringProperty()
zipcode = ndb.IntegerProperty()
phone = ndb.StringProperty()
signup = ndb.DateTimeProperty(auto_now_add=True)
# this just creates a Client to use
if not ( Client.query( Client.name == "Bryan Wheelock").get()):
client = Client(
email = "bryan#mail.com",
name = "Bryan Wheelock",
street1 = "555 Main St",
street2 = "unit 1",
city = "Atlanta",
zipcode = 99999,
phone = "(888)555-1212"
).put()
class Customer(ndb.Model):
# I commented out client property because using Ancestor Query( limited to 1 write per second)
#client = ndb.KeyProperty(kind=Client)
#email = ndb.StringProperty(indexed=True)
name = ndb.StringProperty(indexed=True)
street1 = ndb.StringProperty()
street2 = ndb.StringProperty()
city = ndb.StringProperty()
zipcode = ndb.IntegerProperty()
phone = ndb.StringProperty()
signup = ndb.DateTimeProperty(auto_now_add=True)
class MainPage(BaseHandler):
def get(self):
leadbook_name = self.request.get('leadbook_name',
DEFAULT_LEADBOOK_NAME)
# This should be the Client record that shows the info of the owner of the local clinic
# the question is how do I get the site to show the correct Client?
client = Client.query( Client.name == "Bryan Wheelock").get()
self.session['client'] = client.key.urlsafe()
template_values = {
'client': client,
'leadbook_name': urllib.quote_plus(leadbook_name),
}
template = JINJA_ENVIRONMENT.get_template('index.html')
self.response.write(template.render(template_values))
class LeadBook(BaseHandler):
def post(self):
leadbook_name = self.request.get('leadbook_name',
DEFAULT_LEADBOOK_NAME)
client = ndb.Key(urlsafe=self.session['client']).get()
customer = Customer( parent = client.key)
customer.name = self.request.get('id_name')
customer.street1 = self.request.get('id_street')
customer.phone = self.request.get('id_phone')
customer.zipcode = int(self.request.get('id_zip'))
# show original number of customer to show the code works
starting_customer_count = Customer.query(ancestor=client.key).count()
#import pdb; pdb.set_trace()
customer.put()
# This should return the record
assert(Customer.query(Customer.name == "Kallie Wheelock").get())
final_customer_count = Customer.query(ancestor=client.key).count()
#import pdb; pdb.set_trace()
query_params = {'leadbook_name': leadbook_name}
self.redirect('/?' + urllib.urlencode(query_params))
config = {}
config['webapp2_extras.sessions'] = {
'secret_key': 'my-super-secret-key',
}
application = webapp2.WSGIApplication([
('/', MainPage),
('/sign', LeadBook),
], config = config,
debug=True)
The problem is hinted at by your comment:
# this sleep is to allow eventual consistency to propogate
That's really not how it works. Eventual consistency has nothing to do with time, and the way it is emulated in the local datastore has even less; in the tests, the datastore testbed implements a policy whereby the initial read almost always fails. The documentation explains how you can tweak the policy in your test; one shortcut - again, that only works in tests and not in production - is to do an explicit .get() after saving, which will always make the entity visible.

Resources