Problem with saving pickle object into arrays from images in python - arrays

I have the following class for loading and converting my images into train and test arrays for a deep learning model in Tensorflow 2.
The images are in three folders, named 'Car', 'Cat' and 'Man', which are within the Train and Test main folders. Each image is of 300 x 400 pixels.
import os
import pickle
import cv2
import numpy as np
os.getcwd()
out: 'C:\\Users\\me\\Jupiter_Notebooks\\Dataset_Thermal\\SeekThermal'
path_train = "../SeekThermal/Train"
path_test = "../SeekThermal/Test"
class MasterImage(object):
def __init__(self,PATH='', IMAGE_SIZE = 50):
self.PATH = PATH
self.IMAGE_SIZE = IMAGE_SIZE
self.image_data = []
self.x_data = []
self.y_data = []
self.CATEGORIES = []
# This will get List of categories
self.list_categories = []
def get_categories(self):
for path in os.listdir(self.PATH):
if '.DS_Store' in path:
pass
else:
self.list_categories.append(path)
print("Found Categories ",self.list_categories,'\n')
return self.list_categories
def process_image(self):
try:
"""
Return Numpy array of image
:return: X_Data, Y_Data
"""
self.CATEGORIES = self.get_categories()
for categories in self.CATEGORIES: # Iterate over categories
train_folder_path = os.path.join(self.PATH, categories) # Folder Path
class_index = self.CATEGORIES.index(categories) # this will get index for classification
for img in os.listdir(train_folder_path): # This will iterate in the Folder
new_path = os.path.join(train_folder_path, img) # image Path
try: # if any image is corrupted
image_data_temp = cv2.imread(new_path) # Read Image as numbers
image_temp_resize = cv2.resize(image_data_temp,(self.IMAGE_SIZE,self.IMAGE_SIZE))
self.image_data.append([image_temp_resize,class_index])
random.shuffle(self.image_data)
except:
pass
data = np.asanyarray(self.image_data) # or, data = np.asanyarray(self.image_data,dtype=object)
# Iterate over the Data
for x in data:
self.x_data.append(x[0]) # Get the X_Data
self.y_data.append(x[1]) # get the label
X_Data = np.asarray(self.x_data) / (255.0) # Normalize Data
Y_Data = np.asarray(self.y_data)
# reshape x_Data
X_Data = X_Data.reshape(-1, self.IMAGE_SIZE, self.IMAGE_SIZE, 3)
return X_Data, Y_Data
except:
print("Failed to run Function Process Image ")
def pickle_image(self):
"""
:return: None Creates a Pickle Object of DataSet
"""
# Call the Function and Get the Data
X_Data,Y_Data = self.process_image()
# Write the Entire Data into a Pickle File
pickle_out = open('X_Data','wb')
pickle.dump(X_Data, pickle_out)
pickle_out.close()
# Write the Y Label Data
pickle_out = open('Y_Data', 'wb')
pickle.dump(Y_Data, pickle_out)
pickle_out.close()
print("Pickled Image Successfully ")
return X_Data,Y_Data
def load_dataset(self):
try:
# Read the Data from Pickle Object
X_Temp = open('..\SeekThermal\X_Data','rb')
X_Data = pickle.load(X_Temp)
Y_Temp = open('..\SeekThermal\Y_Data','rb')
Y_Data = pickle.load(Y_Temp)
print('Reading Dataset from Pickle Object')
return X_Data,Y_Data
except:
print('Could not Found Pickle File ')
print('Loading File and Dataset ..........')
X_Data,Y_Data = self.pickle_image()
return X_Data,Y_Data
I dont understand what the problem is with the pickle file, because just last week I able to create these arrays successfully with the same code??
Is there an easier way to load images in Tensorflow rather than through the custom defined class?
a = MasterImage(PATH = path_train,IMAGE_SIZE = 224)
a.process_image()
out:
it produces an array with a warning.
VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.
data = np.asanyarray(self.image_data)
a.pickle_image()
out:
TypeError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_1692\507657192.py in <cell line: 1>()
----> 1 a.pickle_image()
~\AppData\Local\Temp\ipykernel_1692\1410849712.py in pickle_image(self)
71 """
72 # Call the Function and Get the Data
---> 73 X_Data,Y_Data = self.process_image()
74
75 # Write the Entire Data into a Pickle File
TypeError: cannot unpack non-iterable NoneType object
a.load_dataset()
out:
Could not Found Pickle File
Loading File and Dataset ..........
Found Categories ['Car', 'Cat', 'Man', 'Car', 'Cat', 'Man']
Pickled Image Successfully
I'm running Python 3.8.8 via anaconda on Windows 10. Thank you for any advice.

Related

Python, face_recognition convert string to array

I want to convert a variable to a string and then to an array that I can use to compare, but i dont know how to do that.
my code:
import face_recognition
import numpy as np
a = face_recognition.load_image_file('C:\\Users\zivsi\OneDrive\תמונות\סרט צילום\WIN_20191115_10_32_24_Pro.jpg') # my picture 1
b = face_recognition.load_image_file('C:\\Users\zivsi\OneDrive\תמונות\סרט צילום\WIN_20191115_09_48_56_Pro.jpg') # my picture 2
c = face_recognition.load_image_file(
'C:\\Users\zivsi\OneDrive\תמונות\סרט צילום\WIN_20191115_09_48_52_Pro.jpg') # my picture 3
d = face_recognition.load_image_file('C:\\Users\zivsi\OneDrive\תמונות\סרט צילום\ziv sion.jpg') # my picture 4
e = face_recognition.load_image_file(
'C:\\Users\zivsi\OneDrive\תמונות\סרט צילום\WIN_20191120_17_46_40_Pro.jpg') # my picture 5
f = face_recognition.load_image_file(
'C:\\Users\zivsi\OneDrive\תמונות\סרט צילום\WIN_20191117_16_19_11_Pro.jpg') # my picture 6
a = face_recognition.face_encodings(a)[0]
b = face_recognition.face_encodings(b)[0]
c = face_recognition.face_encodings(c)[0]
d = face_recognition.face_encodings(d)[0]
e = face_recognition.face_encodings(e)[0]
f = face_recognition.face_encodings(f)[0]
Here I tried to convert the variable to a string
str_variable = str(a)
array_variable = np.array(str_variable)
my_face = a, b, c, d, e, f, array_variable
while True:
new = input('path: ')
print('Recognizing...')
unknown = face_recognition.load_image_file(new)
unknown_encodings = face_recognition.face_encodings(unknown)[0]
The program cannot use the variable:
results = face_recognition.compare_faces(array_variable, unknown_encodings, tolerance=0.4)
print(results)
recognize_times = int(results.count(True))
if (3 <= recognize_times):
print('hello boss!')
my_face = *my_face, unknown_encodings
please help me
The error shown:
Traceback (most recent call last):
File "C:/Users/zivsi/PycharmProjects/AI/pytt.py", line 37, in <module>
results = face_recognition.compare_faces(my_face, unknown_encodings, tolerance=0.4)
File "C:\Users\zivsi\AppData\Local\Programs\Python\Python36\lib\site-
packages\face_recognition\api.py", line 222, in compare_faces
return list(face_distance(known_face_encodings, face_encoding_to_check) <= tolerance)
File "C:\Users\zivsi\AppData\Local\Programs\Python\Python36\lib\site-packages\face_recognition\api.py", line 72, in face_distance
return np.linalg.norm(face_encodings - face_to_compare, axis=1)
ValueError: operands could not be broadcast together with shapes (7,) (128,)
First of all, the array_variable should actually be a list of the known encodings and not a numpy array.
Also you do not need str.
Now, in your case, if the input images i.e., a,b,c,d,f,e do NOT have the same dimensions, the error will persist. You can not compare images that have different sizes using this function. The reason is that the comparison is based on the distance and distance is defined on vectors of the same length.
Here is a working simple example using the photos from https://github.com/ageitgey/face_recognition/tree/master/examples:
import face_recognition
import numpy as np
from PIL import Image, ImageDraw
from IPython.display import display
# Load a sample picture and learn how to recognize it.
obama_image = face_recognition.load_image_file("obama.jpg")
obama_face_encoding = face_recognition.face_encodings(obama_image)[0]
# Load a second sample picture and learn how to recognize it.
biden_image = face_recognition.load_image_file("biden.jpg")
biden_face_encoding = face_recognition.face_encodings(biden_image)[0]
array_variable = [obama_face_encoding,biden_face_encoding] # list of known encodings
# compare the list with the biden_face_encoding
results = face_recognition.compare_faces(array_variable, biden_face_encoding, tolerance=0.4)
print(results)
[False, True] # True means match, False mismatch
# False: coming from obama_face_encoding VS biden_face_encoding
# True: coming from biden_face_encoding VS biden_face_encoding
To run it go here: https://beta.deepnote.com/project/09705740-31c0-4d9a-8890-269ff1c3dfaf#
Documentation: https://face-recognition.readthedocs.io/en/latest/face_recognition.html
EDIT
To save the known encodings you can use numpy.save
np.save('encodings',biden_face_encoding) # save
load_again = np.load('encodings.npy') # load again

Estimator doesn't work even if input is right

I'm adapting a script.py to achieve transfer learning. I find many script to retrain a model by TFRecord files, but none of them worked for me bacause of something about TF2.0 and contrib, so I'm trying to convert a script to adapt to TF2 and to my model.
This is my script at the moment:
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
keras = tf.keras
EPOCHS = 1
# Data preprocessing
import pathlib
#data_dir = tf.keras.utils.get_file(origin="/home/pi/venv/raccoon_dataset/", fname="raccoons_dataset")
#data_dir = pathlib.Path(data_dir)
data_dir = "/home/pi/.keras/datasets/ssd_mobilenet_v1_coco_2018_01_28/saved_model/saved_model.pb"
######################
# Read the TFRecords #
######################
def imgs_input_fn(filenames, perform_shuffle=False, repeat_count=1, batch_size=1):
def _parse_function(serialized):
features = \
{
'image': tf.io.FixedLenFeature([], tf.string),
'label': tf.io.FixedLenFeature([], tf.int64)
}
# Parse the serialized data so we get a dict with our data.
parsed_example = tf.io.parse_single_example(serialized=serialized,
features=features)
print("\nParsed example:\n", parsed_example, "\nEnd of parsed example:\n")
# Get the image as raw bytes.
image_shape = tf.stack([300, 300, 3])
image_raw = parsed_example['image']
label = tf.cast(parsed_example['label'], tf.float32)
# Decode the raw bytes so it becomes a tensor with type.
image = tf.io.decode_raw(image_raw, tf.uint8)
image = tf.cast(image, tf.float32)
image = tf.reshape(image, image_shape)
#image = tf.subtract(image, 116.779) # Zero-center by mean pixel
#image = tf.reverse(image, axis=[2]) # 'RGB'->'BGR'
d = dict(zip(["image"], [image])), [label]
return d
dataset = tf.data.TFRecordDataset(filenames=filenames)
# Parse the serialized data in the TFRecords files.
# This returns TensorFlow tensors for the image and labels.
#print("\nDataset before parsing:\n",dataset,"\n")
dataset = dataset.map(_parse_function)
#print("\nDataset after parsing:\n",dataset,"\n")
if perform_shuffle:
# Randomizes input using a window of 256 elements (read into memory)
dataset = dataset.shuffle(buffer_size=256)
dataset = dataset.repeat(repeat_count) # Repeats dataset this # times
dataset = dataset.batch(batch_size) # Batch size to use
print("\nDataset batched:\n", dataset, "\nEnd dataset\n")
iterator = tf.compat.v1.data.make_one_shot_iterator(dataset)
print("\nIterator shape:\n", tf.compat.v1.data.get_output_shapes(iterator),"\nEnd\n")
#print("\nIterator:\n",iterator.get_next(),"\nEnd Iterator\n")
batch_features, batch_labels = iterator.get_next()
return batch_features, batch_labels
raw_train = tf.compat.v1.estimator.TrainSpec(input_fn=imgs_input_fn(
"/home/pi/venv/raccoon_dataset/data/train.record",
perform_shuffle=True,
repeat_count=5,
batch_size=20),
max_steps=1)
and this is the resulting screen:
Parsed example:
{'image': <tf.Tensor 'ParseSingleExample/ParseSingleExample:0' shape=() dtype=string>, 'label': <tf.Tensor 'ParseSingleExample/ParseSingleExample:1' shape=() dtype=int64>}
End of parsed example:
Dataset batched:
<BatchDataset shapes: ({image: (None, 300, 300, 3)}, (None, 1)), types: ({image: tf.float32}, tf.float32)>
End dataset
Iterator shape:
({'image': TensorShape([None, 300, 300, 3])}, TensorShape([None, 1]))
End
2019-11-20 14:01:14.493817: W tensorflow/core/framework/op_kernel.cc:1622] OP_REQUIRES failed at example_parsing_ops.cc:240 : Invalid argument: Feature: image (data type: string) is required but could not be found.
2019-11-20 14:01:14.495019: W tensorflow/core/framework/op_kernel.cc:1622] OP_REQUIRES failed at iterator_ops.cc:929 : Invalid argument: {{function_node __inference_Dataset_map__parse_function_27}} Feature: image (data type: string) is required but could not be found.
[[{{node ParseSingleExample/ParseSingleExample}}]]
Traceback (most recent call last):
File "transfer_learning.py", line 127, in <module>
batch_size=20),
File "transfer_learning.py", line 107, in imgs_input_fn
batch_features, batch_labels = iterator.get_next()
File "/home/pi/venv/lib/python3.7/site-packages/tensorflow_core/python/data/ops/iterator_ops.py", line 737, in get_next
return self._next_internal()
File "/home/pi/venv/lib/python3.7/site-packages/tensorflow_core/python/data/ops/iterator_ops.py", line 651, in _next_internal
output_shapes=self._flat_output_shapes)
File "/home/pi/venv/lib/python3.7/site-packages/tensorflow_core/python/ops/gen_dataset_ops.py", line 2673, in iterator_get_next_sync
_six.raise_from(_core._status_to_exception(e.code, message), None)
File "<string>", line 3, in raise_from
tensorflow.python.framework.errors_impl.InvalidArgumentError: {{function_node __inference_Dataset_map__parse_function_27}} Feature: image (data type: string) is required but could not be found.
[[{{node ParseSingleExample/ParseSingleExample}}]] [Op:IteratorGetNextSync]
I don't know what I'm doing wrong.

Pandas, how to reset? - Shape of passed values is (1,1), indices imply (3,1)

I'm currently writing some code and am using pandas to export all of the data into csv files. My program runs multiple iterations until it has gone through all of the necessary files. Pandas is re-writing one file each iteration but when it moves onto the next file I need it to reset all of the data (I think).
Structure is roughly:
While loop>a few variables are named>program runs>dataframe=(pandas.DataFrame(averagepercentagelist,index=namelist,columns=header))
This part works with no problem for one file. When moving onto the next file, all of the arrays I use are reset and this I think is why pandas gives the error Shape of passed values is (1,1), indices imply (3,1).
Please let me know if I need to explain it better.
EDIT:
While True:
try:
averagepercentagelist=[]
namelist=[]
columns=[]
for row in database:
averagepercentagelist=["12","23"]
namelist=["Name0","Name1"]
columns=["Average percentage"]
dataframe=(pandas.DataFrame(averagepercentagelist,index=namelist,columns=header))
except Exception as e:
print e
break
SNIPPET:
dataframe= (pandas.DataFrame(averagepercentagelist,index=namelist,columns=header))
currentcalculatedatafrane = 'averages' + currentcalculate
dataframeexportpath = os.path.join(ROOT_PATH,'Averages',currentcalculatedatafrane)
dataframe.to_csv(dataframeexportpath)
FULL PROGRAM SO FAR:
import csv
import os
import re
import pandas
import tkinter as tk
from tkinter import messagebox
from os.path import isfile, join
from os import listdir
import time
ROOT_PATH = os.path.dirname(os.path.abspath(__file__))
indexforcalcu=0
line_count=0
testlist=[]
namelist=[]
header=['Average Percentage']
def clearvariables():
indexforcalcu=0
testlist=[]
def findaverageofstudent(findaveragenumber,numoftests):
total=0
findaveragenumber = findaveragenumber/numoftests
findaveragenumber = round(findaveragenumber, 1)
return findaveragenumber
def removecharacters(nameforfunc):
nameforfunc=str(nameforfunc)
elem=re.sub("[{'}]", "",nameforfunc)
return elem
def getallclasses():
onlyfiles = [f for f in listdir(ROOT_PATH) if isfile(join(ROOT_PATH, f))]
onlyfiles.remove("averagecalculatorv2.py")
return onlyfiles
def findaveragefunc():
indexforcalcu=-1
while True:
try:
totaltests=0
line_count=0
averagepercentagelist=[]
indexforcalcu=indexforcalcu+1
allclasses=getallclasses()
currentcalculate=allclasses[indexforcalcu]
classpath = os.path.join(ROOT_PATH, currentcalculate)
with open(classpath) as csv_file:
classscoredb = csv.reader(csv_file, delimiter=',')
for i, row in enumerate(classscoredb):
if line_count == 0:
while True:
try:
totaltests=totaltests+1
rowreader= {row[totaltests]}
except:
totaltests=totaltests-1
line_count = line_count + 1
break
else:
calculating_column_location=1
total=0
while True:
try:
total = total + int(row[calculating_column_location])
calculating_column_location = calculating_column_location + 1
except:
break
i=str(i)
name=row[0]
cleanname=removecharacters(nameforfunc=name)
namelist.append(cleanname)
findaveragenumbercal=findaverageofstudent(findaveragenumber=total,numoftests=totaltests)
averagepercentagelist.append(findaveragenumbercal)
line_count = line_count + 1
dataframe= (pandas.DataFrame(averagepercentagelist,index=namelist,columns=header))
currentcalculatedatafrane = 'averages' + i + currentcalculate
dataframeexportpath = os.path.join(ROOT_PATH,'Averages',currentcalculatedatafrane)
dataframe.to_csv(dataframeexportpath)
i=int(i)
except Exception as e:
print("ERROR!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n\n",e)
break
def makenewclass():
global newclassname
getclassname=str(newclassname.get())
if getclassname == "":
messagebox.showerror("Error","The class name you have entered is invalid.")
else:
classname = getclassname + ".csv"
with open(classname, mode='w') as employee_file:
classwriter = csv.writer(employee_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
classwriter.writerow(["Name","Test 1"])
root=tk.Tk()
root.title("Test result average finder")
findaveragebutton=tk.Button(root,text="Find Averages",command=findaveragefunc())
findaveragebutton.grid(row=2,column=2,padx=(10, 10),pady=(0,10))
classnamelabel=tk.Label(root, text="Class name:")
classnamelabel.grid(row=1, column=0,padx=(10,0),pady=(10,10))
newclassname = tk.Entry(root)
newclassname.grid(row=1,column=1,padx=(10, 10))
newclassbutton=tk.Button(root,text="Create new class",command=makenewclass)
newclassbutton.grid(row=1,column=2,padx=(0, 10),pady=(10,10))
root.mainloop()
Thanks in advance,
Sean
Use:
import glob, os
import pandas as pd
ROOT_PATH = os.path.dirname(os.path.abspath(__file__))
#extract all csv files to list
files = glob.glob(f'{ROOT_PATH}/*.csv')
print (files)
#create new folder if necessary
new = os.path.join(ROOT_PATH,'Averages')
if not os.path.exists(new):
os.makedirs(new)
#loop each file
for f in files:
#create DataFrame and convert first column to index
df = pd.read_csv(f, index_col=[0])
#count average in each row, rond and create one colum DataFrame
avg = df.mean(axis=1).round(1).to_frame('Average Percentage')
#remove index name if nncessary
avg.index.name = None
print (avg)
#create new path
head, tail = os.path.split(f)
path = os.path.join(head, 'Averages', tail)
print (path)
#write DataFrame to csv
avg.to_csv(path)

Sentiment140 Preprocessing

I have been trying to do some preprocessing on the Sentiment140 database on Kaggle: https://www.kaggle.com/kazanova/sentiment140
The code I'm using is this:
import os
from nltk.stem.lancaster import LancasterStemmer
from nltk.tokenize import RegexpTokenizer
Base_location = ''
dataset_location = os.path.join(Base_location, 'Sentiment140.csv')
corpus = []
labels = []
# Parse tweets and sentiments
with open(dataset_location, 'r', encoding='latin-1') as df:
for i, line in enumerate(df):
parts = line.strip().split(',')
# Sentiment (0 = Negative, 1 = Positive)
labels.append(str(parts[0].strip()))
# Tweet
tweet = parts[5].strip()
if tweet.startswith('"'):
tweet = tweet[1:]
if tweet.endswith('"'):
tweet = tweet[::-1]
corpus.append(tweet.strip().lower())
print('Corpus size: {}'.format(len(corpus)))
# Tokenize and stem
tkr = RegexpTokenizer('[a-zA-Z0-9#]+')
stemmer = LancasterStemmer()
tokenized_corpus = []
for i, tweet in enumerate(corpus):
tokens = [stemmer.stem(t) for t in tkr.tokenize(tweet) if not t.startswith('#')]
tokenized_corpus.append(tokens)
print(tokenized_corpus)
However, I keep getting this error:
TypeError: '_io.TextIOWrapper' object is not subscriptable
Can anyone help me understand how to solve the issue?
Thanks in advance
TL;DR
To read .csv or structured datasets, use pandas https://pandas.pydata.org/ or any other dataframe libraries.
In Long:
Instead of doing:
Base_location = ''
dataset_location = os.path.join(Base_location, 'Sentiment140.csv')
corpus = []
labels = []
# Parse tweets and sentiments
with open(dataset_location, 'r', encoding='latin-1') as df:
for i, line in enumerate(df):
parts = line.strip().split(',')
# Sentiment (0 = Negative, 1 = Positive)
labels.append(str(parts[0].strip()))
# Tweet
tweet = parts[5].strip()
if tweet.startswith('"'):
tweet = tweet[1:]
if tweet.endswith('"'):
tweet = tweet[::-1]
corpus.append(tweet.strip().lower())
You could simply read the .csv file with pandas, e.g.
import pandas as pd
corpus = pd.read_csv('training.1600000.processed.noemoticon.csv', encoding='latin-1')
Then use the .apply() function to process the tweets:
"""
Columns
====
target: the polarity of the tweet (0 = negative, 2 = neutral, 4 = positive)
ids: The id of the tweet ( 2087)
date: the date of the tweet (Sat May 16 23:58:44 UTC 2009)
flag: The query (lyx). If there is no query, then this value is NO_QUERY.
user: the user that tweeted (robotickilldozr)
text: the text of the tweet (Lyx is cool)
"""
from nltk.stem.lancaster import LancasterStemmer
from nltk.tokenize import RegexpTokenizer
import pandas as pd
df = pd.read_csv('training.1600000.processed.noemoticon.csv',
header=None,
names=['target', 'ids', 'date', 'flag', 'user', 'text'],
encoding='latin-1')
tokenizer = RegexpTokenizer('[a-zA-Z0-9#]+')
stemmer = LancasterStemmer()
def process_tweet(tweet):
return [stemmer.stem(token) if not token.startswith('#') else token
for token in tokenizer.tokenize(tweet)]
# 1. Cast the column type to string
# 2. Lowercase it
# 3. Iterate throw each row and get the output from process_tweet()
# 4. # 3. Keep in a new column call `tokenized_text`
df['tokenized_text']= df['text'].str.lower().apply(process_tweet)

How to create own dataset for FCN with caffe?

How to convert image to lmdb for fcn with caffe? You know, It's easy create own dataset for image classification with caffe, but how to create own dataset for semantic segment for fcn?
Use this code. Make the necessary path changes. Please read the code carefully before using it.
import caffe
import lmdb
from PIL import Image
import numpy as np
import glob
from random import shuffle
# Initialize the Image set:
NumberTrain = 1111 # Number of Training Images
NumberTest = 1112 # Number of Testing Images
Rheight = 380 # Required Height
Rwidth = 500 # Required Width
LabelHeight = 380 # Downscaled height of the label
LabelWidth = 500 # Downscaled width of the label
# Read the files in the Data Folder
inputs_data_train = sorted(glob.glob("/home/<user>/caffe-with_crop/examples/fcn-32s/TrainData/*.jpg"))
inputs_data_valid = sorted(glob.glob("/home/<user>/caffe-with_crop/examples/fcn-32s/ValData/*.jpg"))
inputs_label = sorted(glob.glob("/home/<user>/caffe-with_crop/examples/fcn-32s/VOC2011/SegmentationClass/*.png"))
shuffle(inputs_data_train) # Shuffle the DataSet
shuffle(inputs_data_valid) # Shuffle the DataSet
inputs_Train = inputs_data_train[:NumberTrain] # Extract the training data from the complete set
inputs_Test = inputs_data_valid[:NumberTest] # Extract the testing data from the complete set
# Creating LMDB for Training Data
print("Creating Training Data LMDB File ..... ")
in_db = lmdb.open('/home/<user>/caffe-with_crop/examples/fcn-32s/TrainVOC_Data_lmdb',map_size=int(1e14))
with in_db.begin(write=True) as in_txn:
for in_idx, in_ in enumerate(inputs_Train):
print in_idx
im = np.array(Image.open(in_)) # or load whatever ndarray you need
Dtype = im.dtype
im = im[:,:,::-1]
im = Image.fromarray(im)
im = im.resize([Rheight, Rwidth], Image.ANTIALIAS)
im = np.array(im,Dtype)
im = im.transpose((2,0,1))
im_dat = caffe.io.array_to_datum(im)
in_txn.put('{:0>10d}'.format(in_idx),im_dat.SerializeToString())
in_db.close()
# Creating LMDB for Training Labels
print("Creating Training Label LMDB File ..... ")
in_db = lmdb.open('/home/<user>/caffe-with_crop/examples/fcn-32s/TrainVOC_Label_lmdb',map_size=int(1e14))
with in_db.begin(write=True) as in_txn:
for in_idx, in_ in enumerate(inputs_Train):
print in_idx
in_label = in_[:-25]+'VOC2011/SegmentationClass/'+in_[-15:-3]+'png' # Change the numbers as per requirement
L = np.array(Image.open(in_)) # or load whatever ndarray you need
Dtype = L.dtype
L = L[:,:,::-1]
Limg = Image.fromarray(L)
Limg = Limg.resize([LabelHeight, LabelWidth],Image.NEAREST) # To resize the Label file to the required size
L = np.array(Limg,Dtype)
L = L.reshape(L.shape[0],L.shape[1],1)
L = L.transpose((2,0,1))
L_dat = caffe.io.array_to_datum(L)
in_txn.put('{:0>10d}'.format(in_idx),L_dat.SerializeToString())
in_db.close()
# Creating LMDB for Testing Data
print("Creating Testing Data LMDB File ..... ")
in_db = lmdb.open('/home/<user>/caffe-with_crop/examples/fcn-32s/TestVOC_Data_lmdb',map_size=int(1e14))
with in_db.begin(write=True) as in_txn:
for in_idx, in_ in enumerate(inputs_Test):
print in_idx
im = np.array(Image.open(in_)) # or load whatever ndarray you need
Dtype = im.dtype
im = im[:,:,::-1]
im = Image.fromarray(im)
im = im.resize([Rheight, Rwidth], Image.ANTIALIAS)
im = np.array(im,Dtype)
im = im.transpose((2,0,1))
im_dat = caffe.io.array_to_datum(im)
in_txn.put('{:0>10d}'.format(in_idx),im_dat.SerializeToString())
in_db.close()
# Creating LMDB for Testing Labels
print("Creating Testing Label LMDB File ..... ")
in_db = lmdb.open('/home/<user>/caffe-with_crop/examples/fcn-32s/TestVOC_Label_lmdb',map_size=int(1e14))
with in_db.begin(write=True) as in_txn:
for in_idx, in_ in enumerate(inputs_Test):
print in_idx
in_label = in_[:-25]+'VOC2011/SegmentationClass/'+in_[-15:-3]+'png' # Change the numbers as per requirement
L = np.array(Image.open(in_)) # or load whatever ndarray you need
Dtype = L.dtype
L = L[:,:,::-1]
Limg = Image.fromarray(L)
Limg = Limg.resize([LabelHeight, LabelWidth],Image.NEAREST) # To resize the Label file to the required size
L = np.array(Limg,Dtype)
L = L.reshape(L.shape[0],L.shape[1],1)
L = L.transpose((2,0,1))
L_dat = caffe.io.array_to_datum(L)
in_txn.put('{:0>10d}'.format(in_idx),L_dat.SerializeToString())
in_db.close()

Resources