I am iterating over several DB records and writing data from their respective BLOB fields into files:
def build(self, records):
"""
Builds openimmo.anhang
"""
result = None
anh_records = [r for r in records if type(r) == anhaenge]
if not anh_records:
return result
anhang = []
print('RECORDS: ' + str(len(anh_records)))
for anh_record in anh_records:
if anh_record.daten:
__, path = mkstemp()
with open(path, 'wb') as target:
target.write(anh_record.daten)
anh = openimmo.anhang()
anh.anhangtitel = anh_record.anhangtitel
anh.format = 'image/jpeg' #MIMEUtil.getmime(path)
anh.daten = openimmo.daten()
anh.daten.pfad = path
anh.location = id2location.get(anh_record.location)
anh.gruppe = id2gruppe.get(anh_record.gruppe)
anhang.append(anh)
try:
result.validateBinding()
except:
self.log.err('Could not build "anhang": ' + str(result))
if anhang:
result = openimmo.anhaenge()
result.anhang = anhang
return result
This, however produces the following error:
RECORDS: 5
Message: "[Errno 24] Too many open files: '/tmp/tmpo54qfq'
daemon panic:
Caught unexpected exception in _main() on 2014-08-20 11:53:37.918353
Message: "[Errno 24] Too many open files: '/tmp/tmpo54qfq'" of type "<class 'IOError'>"
Traceback (most recent call last):
File "/usr/local/lib/python3.2/dist-packages/homie_core-1.0-py3.2.egg/homie/serv/daemon.py", line 345, in __run
File "/usr/local/lib/python3.2/dist-packages/homie_core-1.0-py3.2.egg/homie/serv/service.py", line 72, in _main
File "/usr/local/lib/python3.2/dist-packages/homie_core-1.0-py3.2.egg/homie/api/itf.py", line 127, in export
File "/usr/local/lib/python3.2/dist-packages/homie_openimmodb-0.2_indev-py3.2.egg/openimmodb/itf.py", line 51, in _retrieve
File "/usr/local/lib/python3.2/dist-packages/homie_openimmodb-0.2_indev-py3.2.egg/openimmodb/conv.py", line 27, in decode
File "/usr/local/lib/python3.2/dist-packages/homie_openimmodb-0.2_indev-py3.2.egg/openimmodb/factories/openimmo/immobilie.py", line 60, in build
File "/usr/local/lib/python3.2/dist-packages/homie_openimmodb-0.2_indev-py3.2.egg/openimmodb/factories/openimmo/anhaenge.py", line 30, in build
IOError: [Errno 24] Too many open files: '/tmp/tmpo54qfq'
According to lsof the whole process has over 5k open files:
# lsof| grep python3| wc -l
5375
I checked it several times: I am using with open(file) as desc everywhere in the code, when I open a file.
Shouldn't the files be closed automatically at the end of each with block, or am I missing something?
tempfile.mkstemp() opens a file for you:
fd, path = mkstemp()
with open(fd, 'wb') as target:
# os.close(fd) is called automatically
You don't need open(path) that opens another file (with the same name).
You could use tempfile.NamedTemporaryFile(delete=False) instead of tempfile.mkstemp().
Related
Using Tensorflow 2.3, I'm trying to create a tf.data.Dataset without labels.
I have my .png files in a folder './Folder/'. For creating the minimal working sample, I think the only relevant line is the one where I am calling tf.keras.preprocessing.image_dataset_from_directory. The class definition is here.
dataset = tf.keras.preprocessing.image_dataset_from_directory('./Folder/',label_mode=None,batch_size=100)
When the Python interpreter reaches the line above, it returns this error message:
Traceback (most recent call last):
File "/home/roi/.local/lib/python3.8/site-packages/tensorflow/python/framework/op_def_library.py", line 465, in _apply_op_helper
values = ops.convert_to_tensor(
File "/home/roi/.local/lib/python3.8/site-packages/tensorflow/python/framework/ops.py", line 1473, in convert_to_tensor
raise ValueError(
ValueError: Tensor conversion requested dtype string for Tensor with dtype float32: <tf.Tensor 'args_0:0' shape=() dtype=float32>
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "04-vaeAnomalyScores.py", line 135, in <module>
historicKLD, encoder, decoder, vae = artVAE_Instance.run_autoencoder() # Train
File "/media/roi/9b168630-3b62-4215-bb7d-fed9ba179dc7/images/largePatches/artvae.py", line 386, in run_autoencoder
trainingDataSet = self.loadImages(self.trainingDir)
File "/media/roi/9b168630-3b62-4215-bb7d-fed9ba179dc7/images/largePatches/artvae.py", line 231, in loadImages
dataset = tf.keras.preprocessing.image_dataset_from_directory(dir[:-1]+'Downscaled/',label_mode=None,batch_size=self.BATCH_SIZE)
File "/home/roi/.local/lib/python3.8/site-packages/tensorflow/python/keras/preprocessing/image_dataset.py", line 192, in image_dataset_from_directory
dataset = paths_and_labels_to_dataset(
File "/home/roi/.local/lib/python3.8/site-packages/tensorflow/python/keras/preprocessing/image_dataset.py", line 219, in paths_and_labels_to_dataset
img_ds = path_ds.map(
File "/home/roi/.local/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 1695, in map
return MapDataset(self, map_func, preserve_cardinality=True)
File "/home/roi/.local/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 4041, in __init__
self._map_func = StructuredFunctionWrapper(
File "/home/roi/.local/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 3371, in __init__
self._function = wrapper_fn.get_concrete_function()
File "/home/roi/.local/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 2938, in get_concrete_function
graph_function = self._get_concrete_function_garbage_collected(
File "/home/roi/.local/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 2906, in _get_concrete_function_garbage_collected
graph_function, args, kwargs = self._maybe_define_function(args, kwargs)
File "/home/roi/.local/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3213, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "/home/roi/.local/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3065, in _create_graph_function
func_graph_module.func_graph_from_py_func(
File "/home/roi/.local/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 986, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "/home/roi/.local/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 3364, in wrapper_fn
ret = _wrapper_helper(*args)
File "/home/roi/.local/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 3299, in _wrapper_helper
ret = autograph.tf_convert(func, ag_ctx)(*nested_args)
File "/home/roi/.local/lib/python3.8/site-packages/tensorflow/python/autograph/impl/api.py", line 255, in wrapper
return converted_call(f, args, kwargs, options=options)
File "/home/roi/.local/lib/python3.8/site-packages/tensorflow/python/autograph/impl/api.py", line 532, in converted_call
return _call_unconverted(f, args, kwargs, options)
File "/home/roi/.local/lib/python3.8/site-packages/tensorflow/python/autograph/impl/api.py", line 339, in _call_unconverted
return f(*args, **kwargs)
File "/home/roi/.local/lib/python3.8/site-packages/tensorflow/python/keras/preprocessing/image_dataset.py", line 220, in <lambda>
lambda x: path_to_image(x, image_size, num_channels, interpolation))
File "/home/roi/.local/lib/python3.8/site-packages/tensorflow/python/keras/preprocessing/image_dataset.py", line 228, in path_to_image
img = io_ops.read_file(path)
File "/home/roi/.local/lib/python3.8/site-packages/tensorflow/python/ops/gen_io_ops.py", line 574, in read_file
_, _, _op, _outputs = _op_def_library._apply_op_helper(
File "/home/roi/.local/lib/python3.8/site-packages/tensorflow/python/framework/op_def_library.py", line 492, in _apply_op_helper
raise TypeError("%s expected type of %s." %
TypeError: Input 'filename' of 'ReadFile' Op has type float32 that does not match expected type of string.
Thank you so much for your help.
One way to fix this I found is to put all your images in another sub-directory inside the directory whose path you are feeding to the image_dataset_from_directory.
Taking your example, you would create a new folder, let's call it new_folder, inside of ./Folder/ where you would put all your images, such that now the path to all your images is ./Folder/new_folder/. Then you can call the image_dataset_from_directory method with the exact same arguments as you have done in your question:
tf.keras.preprocessing.image_dataset_from_directory(
'./Folder/',
label_mode=None,
batch_size=100
)
I found this to work for me so hopefully someone else will also find it helpful!
Others have reported a similar error, but the solutions given do not solve my problem.
For example there is a good answer here. The answer in the link mentions how ndb changes from a first use to a later use and suggests there is a problem because a first run produces a None in the Datastore. I cannot reproduce or see that happening in the Datastore for my sdk, but that may be because I am running it here from the interactive console.
I am pretty sure I got an initial good run with the GAE interactive console, but every run since then has failed with the error in my Title to this question.
I have left the print statements in the following code because they show good results and assure me that the error is occuring in the put() at the very end.
from google.appengine.ext import ndb
class Account(ndb.Model):
week = ndb.IntegerProperty(repeated=True)
weeksNS = ndb.IntegerProperty(repeated=True)
weeksEW = ndb.IntegerProperty(repeated=True)
terry=Account(week=[],weeksNS=[],weeksEW=[])
terry_key=terry.put()
terry = terry_key.get()
print terry
for t in list(range(4)): #just dummy input, but like real input
terry.week.append(t)
print terry.week
region = 1 #same error message for region = 0
if region :
terry.weeksEW.append(terry.week)
else:
terry.weeksNS.append(terry.week)
print 'EW'+str(terry.weeksEW)
print 'NS'+str(terry.weeksNS)
terry.week = []
print 'week'+str(terry.week)
terry.put()
The idea of my code is to first build up the terry.week list values incrementally and then later store the whole list to the appropriate region, either NS or EW. So I'm looking for a workaround for this scheme.
The error message is likely of no value but I am reproducing it here.
Traceback (most recent call last):
File "/Users/brian/google-cloud-sdk/platform/google_appengine/google/appengine/tools/devappserver2/python/runtime/request_handler.py", line 237, in handle_interactive_request
exec(compiled_code, self._command_globals)
File "<string>", line 55, in <module>
File "/Users/brian/google-cloud-sdk/platform/google_appengine/google/appengine/ext/ndb/model.py", line 3458, in _put
return self._put_async(**ctx_options).get_result()
File "/Users/brian/google-cloud-sdk/platform/google_appengine/google/appengine/ext/ndb/tasklets.py", line 383, in get_result
self.check_success()
File "/Users/brian/google-cloud-sdk/platform/google_appengine/google/appengine/ext/ndb/tasklets.py", line 427, in _help_tasklet_along
value = gen.throw(exc.__class__, exc, tb)
File "/Users/brian/google-cloud-sdk/platform/google_appengine/google/appengine/ext/ndb/context.py", line 824, in put
key = yield self._put_batcher.add(entity, options)
File "/Users/brian/google-cloud-sdk/platform/google_appengine/google/appengine/ext/ndb/tasklets.py", line 430, in _help_tasklet_along
value = gen.send(val)
File "/Users/brian/google-cloud-sdk/platform/google_appengine/google/appengine/ext/ndb/context.py", line 358, in _put_tasklet
keys = yield self._conn.async_put(options, datastore_entities)
File "/Users/brian/google-cloud-sdk/platform/google_appengine/google/appengine/datastore/datastore_rpc.py", line 1858, in async_put
pbs = [entity_to_pb(entity) for entity in entities]
File "/Users/brian/google-cloud-sdk/platform/google_appengine/google/appengine/ext/ndb/model.py", line 697, in entity_to_pb
pb = ent._to_pb()
File "/Users/brian/google-cloud-sdk/platform/google_appengine/google/appengine/ext/ndb/model.py", line 3167, in _to_pb
prop._serialize(self, pb, projection=self._projection)
File "/Users/brian/google-cloud-sdk/platform/google_appengine/google/appengine/ext/ndb/model.py", line 1422, in _serialize
values = self._get_base_value_unwrapped_as_list(entity)
File "/Users/brian/google-cloud-sdk/platform/google_appengine/google/appengine/ext/ndb/model.py", line 1192, in _get_base_value_unwrapped_as_list
wrapped = self._get_base_value(entity)
File "/Users/brian/google-cloud-sdk/platform/google_appengine/google/appengine/ext/ndb/model.py", line 1180, in _get_base_value
return self._apply_to_values(entity, self._opt_call_to_base_type)
File "/Users/brian/google-cloud-sdk/platform/google_appengine/google/appengine/ext/ndb/model.py", line 1352, in _apply_to_values
value[:] = map(function, value)
File "/Users/brian/google-cloud-sdk/platform/google_appengine/google/appengine/ext/ndb/model.py", line 1234, in _opt_call_to_base_type
value = _BaseValue(self._call_to_base_type(value))
File "/Users/brian/google-cloud-sdk/platform/google_appengine/google/appengine/ext/ndb/model.py", line 1255, in _call_to_base_type
return call(value)
File "/Users/brian/google-cloud-sdk/platform/google_appengine/google/appengine/ext/ndb/model.py", line 1331, in call
newvalue = method(self, value)
File "/Users/brian/google-cloud-sdk/platform/google_appengine/google/appengine/ext/ndb/model.py", line 1781, in _validate
(value,))
BadValueError: Expected integer, got [0, 1, 2, 3]
I believe the error comes from these lines:
terry.weeksEW.append(terry.week)
terry.weeksNS.append(terry.week)
You are not appending another integer; You are appending a list, when an integer is expected.
>>> aaa = [1,2,3]
>>> bbb = [4,5,6]
>>> aaa.append(bbb)
>>> aaa
[1, 2, 3, [4, 5, 6]]
>>>
This fails the ndb.IntegerProperty test.
Try:
terry.weeksEW += terry.week
terry.weeksNS += terry.week
EDIT: To save a list of lists, do not use the IntegerProperty(), but instead the JsonProperty(). Better still, the ndb datastore is deprecated, so... I recommend Firestore, which uses JSON objects by default. At least use Cloud Datastore, or Cloud NDB.
I have three arrays of data. I want to loop them over and save the values in a temporary file if some condition is meet. If the condition is not meet I would like to open the temorary file and find the index of the maximum value then save to another file. When I try the code below I get this error. This is my fist time using tempfile.NamedTemporaryFile() so I very well may not be using it correctly. Thanks
Traceback (most recent call last):
File "<ipython-input-19-7c44ca7dcbd6>", line 1, in <module>
runfile('C:/Users/Khary/Documents/Astrophysics/Bolshoi/Halo Formation History Project/Codes/Find V_max.py', wdir='C:/Users/Khary/Documents/Astrophysics/Bolshoi/Halo Formation History Project/Codes')
File "C:\Anaconda3\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 685, in runfile
execfile(filename, namespace)
File "C:\Anaconda3\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 85, in execfile
exec(compile(open(filename, 'rb').read(), filename, 'exec'), namespace)
File "C:/Users/Khary/Documents/Astrophysics/Bolshoi/Halo Formation History Project/Codes/Find V_max.py", line 222, in <module>
formation_def()
File "C:/Users/Khary/Documents/Astrophysics/Bolshoi/Halo Formation History Project/Codes/Find V_max.py", line 129, in formation_def
FT = np.loadtxt(TF,skiprows=0)
File "C:\Anaconda3\lib\site-packages\numpy\lib\npyio.py", line 770, in loadtxt
first_line = next(fh)
UnsupportedOperation: not readable
My code
import numpy as np
import temp
#large arrays of data
Id = np.array([some size])
MASS = np.array([some size])
V = np.array([some size])
def filesave(MAS,V): #Functioin to write and save values to file
Mc = str(MAS)
Vel = str(V)
w.write(Mc)
w.write('\t')
w.write(Vel)
w.write('\n')
return()
def formation_def():
count = 1
l =len(ID)
for i in range(l):
if ID[i] == count:
for j in range(i,l):
TF = tempfile.NamedTemporaryFile(mode='a')
if ID[j] <= ID[i]:
T = str(ID[j])
M = str(MASS[j])
Vel = str(V[j])
TF.write(T)
TF.write('\t')
TF.write(M)
TF.write('\t')
TF.write(Vel)
TF.write('\n')
elif ID[j]>ID[i]: # if ID[j]>TID[i] then we are in the next halo in the list
FT = np.loadtxt(TF,skiprows=0)
MASS2 = FT[:,0]
V2 = FT[:,2]
vel_max = np.argmax(V2)
filesave(MASS2[vel_max],V2[vel_max])
TF.close()
count+=1 # and must indcrement the counter and break out of loop
break
elif:
count = ID[i]+1
return()
When you created your temporary file, you assigned it to append data to the file. That is a write operation.
TF = tempfile.NamedTemporaryFile(mode='a')
Where it seems to be failing, you are trying to read data from the file and put it in FT.
FT = np.loadtxt(TF,skiprows=0)
Change TF to mode='r' and you should have better luck.
Im trying to add the file extension back to a file path and filename after deleting the last 10 characters of the file's name.
Code so far:
rootDir = "C\\Users\\Documents\\New"
for root, dirs, filenames in os.walk(rootDir):
for fileys in filenames:
fullpath = os.path.join(root, fileys)
filesplit = os.path.splitext(fullpath)
fileext = filesplit
os.rename(fullpath, fullpath[:-5] + fileext) #line that has error
What my error is:
Traceback (most recent call last):
File "C:/Users/Student/Downloads/App.py", line 28, in
os.rename(fullpath, fullpath[:-10] + (s + fileext for s in fullpath))
TypeError: cannot concatenate 'str' and 'generator' objects
I see that the issue is that I am trying to concatenate a string and generator, but when i tried:
os.rename (fullpath, fullpath[:10] + fileext)
The error i receive is that I cannot concatenate a tuple and a string.
So how do I add on the file extension to the fullpath after i remove the last 10 characters of the filename while renaming the file.
Thanks
Suppose I create a key from user input websafe url
key = ndb.Key(urlsafe=some_user_input)
How can I check if the some_user_input is valid?
My current experiment shows that statement above will throw ProtocolBufferDecodeError (Unable to merge from string.) exception if the some_user_input is invalid, but could not find anything about this from the API. Could someone kindly confirm this, and point me some better way for user input validity checking instead of catching the exception?
Thanks a lot!
If you try to construct a Key with an invalid urlsafe parameter
key = ndb.Key(urlsafe='bogus123')
you will get an error like
Traceback (most recent call last):
File "/opt/google/google_appengine/google/appengine/runtime/wsgi.py", line 240, in Handle
handler = _config_handle.add_wsgi_middleware(self._LoadHandler())
File "/opt/google/google_appengine/google/appengine/runtime/wsgi.py", line 299, in _LoadHandler
handler, path, err = LoadObject(self._handler)
File "/opt/google/google_appengine/google/appengine/runtime/wsgi.py", line 85, in LoadObject
obj = __import__(path[0])
File "/home/tim/git/project/main.py", line 10, in <module>
from src.tim import handlers as handlers_
File "/home/tim/git/project/src/tim/handlers.py", line 42, in <module>
class ResetHandler(BaseHandler):
File "/home/tim/git/project/src/tim/handlers.py", line 47, in ResetHandler
key = ndb.Key(urlsafe='bogus123')
File "/opt/google/google_appengine/google/appengine/ext/ndb/key.py", line 212, in __new__
self.__reference = _ConstructReference(cls, **kwargs)
File "/opt/google/google_appengine/google/appengine/ext/ndb/utils.py", line 142, in positional_wrapper
return wrapped(*args, **kwds)
File "/opt/google/google_appengine/google/appengine/ext/ndb/key.py", line 642, in _ConstructReference
reference = _ReferenceFromSerialized(serialized)
File "/opt/google/google_appengine/google/appengine/ext/ndb/key.py", line 773, in _ReferenceFromSerialized
return entity_pb.Reference(serialized)
File "/opt/google/google_appengine/google/appengine/datastore/entity_pb.py", line 1710, in __init__
if contents is not None: self.MergeFromString(contents)
File "/opt/google/google_appengine/google/net/proto/ProtocolBuffer.py", line 152, in MergeFromString
self.MergePartialFromString(s)
File "/opt/google/google_appengine/google/net/proto/ProtocolBuffer.py", line 168, in MergePartialFromString
self.TryMerge(d)
File "/opt/google/google_appengine/google/appengine/datastore/entity_pb.py", line 1839, in TryMerge
d.skipData(tt)
File "/opt/google/google_appengine/google/net/proto/ProtocolBuffer.py", line 677, in skipData
raise ProtocolBufferDecodeError, "corrupted"
ProtocolBufferDecodeError: corrupted
Interesting here are is
File "/opt/google/google_appengine/google/appengine/ext/ndb/key.py", line 773, in _ReferenceFromSerialized
return entity_pb.Reference(serialized)
which is the last code executed in the key.py module:
def _ReferenceFromSerialized(serialized):
"""Construct a Reference from a serialized Reference."""
if not isinstance(serialized, basestring):
raise TypeError('serialized must be a string; received %r' % serialized)
elif isinstance(serialized, unicode):
serialized = serialized.encode('utf8')
return entity_pb.Reference(serialized)
serialized here being the decoded urlsafe string, you can read more about it in the link to the source code.
another interesting one is the last one:
File "/opt/google/google_appengine/google/appengine/datastore/entity_pb.py", line 1839, in TryMerge
in the entity_pb.py module which looks like this
def TryMerge(self, d):
while d.avail() > 0:
tt = d.getVarInt32()
if tt == 106:
self.set_app(d.getPrefixedString())
continue
if tt == 114:
length = d.getVarInt32()
tmp = ProtocolBuffer.Decoder(d.buffer(), d.pos(), d.pos() + length)
d.skip(length)
self.mutable_path().TryMerge(tmp)
continue
if tt == 162:
self.set_name_space(d.getPrefixedString())
continue
if (tt == 0): raise ProtocolBuffer.ProtocolBufferDecodeError
d.skipData(tt)
which is where the actual attempt to 'merge the input to into a Key' is made.
You can see in the source code that during the process of constructing a Key from an urlsafe parameter not a whole lot can go wrong. First it checks if the input is a string and if it's not, a TypeError is raised, if it is but it's not 'valid', indeed a ProtocolBufferDecodeError is raised.
My current experiment shows that statement above will throw ProtocolBufferDecodeError (Unable to merge from string.) exception if the some_user_input is invalid, but could not find anything about this from the API. Could someone kindly confirm this
Sort of confirmed - we now know that also TypeError can be raised.
and point me some better way for user input validity checking instead of catching the exception?
This is an excellent way to check validity! Why do the checks yourself if the they are already done by appengine? A code snippet could look like this (not working code, just an example)
def get(self):
# first, fetch the user_input from somewhere
try:
key = ndb.Key(urlsafe=user_input)
except TypeError:
return 'Sorry, only string is allowed as urlsafe input'
except ProtocolBufferDecodeError:
return 'Sorry, the urlsafe string seems to be invalid'