So I'm building my first own simple DQN neural network. But I'm really struggling with the output shape of my network.
I have an input with 139 features making it input_shape=(None,139) and a batch size of 64. I have 4 outputs for the last layer, as my enviroment have 4 possible actions (0,1,2,3).
But I get this error:
ValueError: Error when checking target: expected dense_4 to have shape
(None, 1) but got array with shape (1, 4)
It's making me crazy. What have I done wrong?
def create_model(self):
model = Sequential()
model.add(Dense(128, input_shape=(None,139), activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(4, activation='softmax'))
#Model compile settings:
opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)
# Compile model
model.compile(
loss='sparse_categorical_crossentropy',
optimizer=opt,
metrics=['accuracy']
)
print(model.summary())
return model
Model summary:
Model: "sequential_7"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_23 (Dense) (None, None, 128) 17920
_________________________________________________________________
dropout_19 (Dropout) (None, None, 128) 0
_________________________________________________________________
dense_24 (Dense) (None, None, 128) 16512
_________________________________________________________________
dropout_20 (Dropout) (None, None, 128) 0
_________________________________________________________________
dense_25 (Dense) (None, None, 128) 16512
_________________________________________________________________
dropout_21 (Dropout) (None, None, 128) 0
_________________________________________________________________
dense_26 (Dense) (None, None, 4) 516
=================================================================
Total params: 51,460
Trainable params: 51,460
Non-trainable params: 0
_________________________________________________________________
None
Model: "sequential_8"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_27 (Dense) (None, None, 128) 17920
_________________________________________________________________
dropout_22 (Dropout) (None, None, 128) 0
_________________________________________________________________
dense_28 (Dense) (None, None, 128) 16512
_________________________________________________________________
dropout_23 (Dropout) (None, None, 128) 0
_________________________________________________________________
dense_29 (Dense) (None, None, 128) 16512
_________________________________________________________________
dropout_24 (Dropout) (None, None, 128) 0
_________________________________________________________________
dense_30 (Dense) (None, None, 4) 516
=================================================================
Total params: 51,460
Trainable params: 51,460
Non-trainable params: 0
_________________________________________________________________
None
POST UPDATED WITH THE CODE BELOW. -DQN class with model, train etc.
class DQNAgent:
def __init__(self):
#main model # gets trained every step
self.model = self.create_model()
#Target model this is what we .predict against every step
self.target_model = self.create_model()
self.target_model.set_weights(self.model.get_weights())
self.replay_memory = deque(maxlen=REPLAY_MEMORY_SIZE)
#self.tensorboard = ModifiedTensorBoard(log_dir=f"logs/{MODEL_NAME}-{int(time.time())}")
self.target_update_counter = 0
def create_model(self):
model = Sequential()
model.add(Dense(128, input_shape=(None,139), activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(4, activation='softmax'))
#Model compile settings:
opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)
# Compile model
model.compile(
loss='sparse_categorical_crossentropy',
optimizer=opt,
metrics=['accuracy']
)
print(model.summary())
return model
def update_replay_memory(self, transition):
self.replay_memory.append(transition)
def train(self, terminal_state):
global export
if len(self.replay_memory) < MIN_REPLAY_MEMORY_SIZE:
return
minibatch = random.sample(self.replay_memory, MINIBATCH_SIZE)
current_states = np.array([transition[0] for transition in minibatch])
current_qs_list = self.model.predict(current_states)
new_states = np.array([transition[3] for transition in minibatch])
future_qs_list = self.target_model.predict(new_states)
X = []
y = []
# Now we need to enumerate our batches
for index, (current_state, action, reward, new_state, done) in enumerate(minibatch):
# If not a terminal state, get new q from future states, otherwise set it to 0
# almost like with Q Learning, but we use just part of equation here
if not done:
max_future_q = np.max(future_qs_list[index])
new_q = reward + DISCOUNT * max_future_q
else:
new_q = reward
# Update Q value for given state
qs = current_qs_list[index]
print(qs, qs.shape)
qs[0,action] = new_q
# And append to our training data
X.append(current_state)
y.append(qs)
# Fit on all samples as one batch, log only on terminal state
self.model.fit(np.array(X), np.array(y), batch_size=MINIBATCH_SIZE, verbose=0, shuffle=False, callbacks=[tensorboard] if terminal_state else None)
# Update target network counter every episode
if done:
self.target_update_counter += 1
# If counter reaches set value, update target network with weights of main network
if self.target_update_counter > UPDATE_TARGET_EVERY:
self.target_model.set_weights(self.model.get_weights())
self.target_update_counter = 0
# Queries main network for Q values given current observation space (environment state)
def get_qs(self, state):
return self.model.predict(scaler.transform(np.array(state).reshape(-1, *state.shape)))[0]
agent = DQNAgent()
-
for i in range(EPOCHS):
print("EPOCH #", i, " starting, of ", EPOCHS, "epochs")
if i == EPOCHS - 1: # the last epoch, use test data set
current_state, xdata = preprocess(test_filename)
else:
current_state, xdata = preprocess(dataframe)
win_loss = 0
step = 1
# Iterate over episodes
for episode in tqdm(range(1, EPISODES + 1), ascii=True, unit="episodes"):
# Update tensorboard step every episode
# agent.tensorboard.step = episode
# Restarting episode - reset episode reward and step number
episode_reward = 0
# Reset flag and start iterating until episode ends
done = False
while not done:
done = are_we_done(current_state)
# This part stays mostly the same, the change is to query a model for Q values
if np.random.random() > epsilon:
# Get action from Q table
action = np.argmax(agent.get_qs(current_state))
# print("Q-value action")
action = action_check(current_state, action, orders)
else:
# Get random action
# print("Random action")
action = np.random.randint(0, 4)
action = action_check(current_state, action, orders)
(
new_state,
terminal_state,
win_loss,
close,
total_win_loss,
step,
orders,
) = to_market(current_state, action, step, win_loss)
reward = get_reward(win_loss, prev_win_loss)
episode_reward += reward
# Every step we update replay memory and train main network
scaled_current = (scaler.transform(current_state)).reshape(
1, current_state.shape[1]
)
scaled_new_state = (scaler.transform(new_state)).reshape(
1, new_state.shape[1]
)
agent.update_replay_memory(
(scaled_current, action, reward, scaled_new_state, done)
)
agent.train(done)
# step += 1
current_state = new_state
prev_win_loss = win_loss
if (
current_state.flatten()[3] == 23 and current_state.flatten()[4] >= 57
): # Close for trades between 23.57 and 00.15 due to swaps and crazy market
market_close = True
while market_close:
if (
current_state.flatten()[3] == 0
and current_state.flatten()[4] >= 15
):
market_close = False
else:
sleep(10)
market_close = False
# Append episode reward to a list and log stats (every given number of episodes)
ep_rewards.append(episode_reward)
with writer.as_default():
tf.summary.scalar("Reward", episode_reward, step=episode)
average_reward = sum(ep_rewards) / len(ep_rewards)
min_reward = min(ep_rewards)
max_reward = max(ep_rewards)
agent.tensorboard.update_stats(
reward_avg=average_reward,
reward_min=min_reward,
reward_max=max_reward,
epsilon=epsilon,
)
# Save model, but only when min reward is greater or equal a set value
if total_win_loss >= MIN_WIN_LOSS:
agent.model.save(f"models/{MODEL_NAME}__{SYMBOL}__{int(time.time())}.model")
# Decay epsilon
if epsilon > MIN_EPSILON:
epsilon *= EPSILON_DECAY
epsilon = max(MIN_EPSILON, epsilon)
Print("EPOCH #", i, " done, of ", epochs, "epochs")
Related
I'm using MONAI on Spyder Anaconda to build a U-Net network. I want to add/modify layers starting from this baseline.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = nets.UNet(
spatial_dims = 2,
in_channels = 3,
out_channels = 1,
channels = (4, 8, 16, 32, 64),
strides = (2, 2, 2, 2),
num_res_units = 3,
norm = layers.Norm.BATCH,
kernel_size=3,).to(device)
loss_function = losses.DiceLoss()
torch.backends.cudnn.benchmark = True
optimizer = torch.optim.Adam(model.parameters(), lr = 1e-4, weight_decay = 0)
post_pred = Compose([EnsureType(), Activations(sigmoid = True), AsDiscrete(threshold=0.5)])
post_label = Compose([EnsureType()])
inferer = SimpleInferer()
utils.set_determinism(seed=46)
My final aim is to create a MultiResUNet that has different layers such as:
class Conv2d_batchnorm(torch.nn.Module):
'''
2D Convolutional layers
Arguments:
num_in_filters {int} -- number of input filters
num_out_filters {int} -- number of output filters
kernel_size {tuple} -- size of the convolving kernel
stride {tuple} -- stride of the convolution (default: {(1, 1)})
activation {str} -- activation function (default: {'relu'})
'''
def __init__(self, num_in_filters, num_out_filters, kernel_size, stride = (1,1), activation = 'relu'):
super().__init__()
self.activation = activation
self.conv1 = torch.nn.Conv2d(in_channels=num_in_filters, out_channels=num_out_filters, kernel_size=kernel_size, stride=stride, padding = 'same')
self.batchnorm = torch.nn.BatchNorm2d(num_out_filters)
def forward(self,x):
x = self.conv1(x)
x = self.batchnorm(x)
if self.activation == 'relu':
return torch.nn.functional.relu(x)
else:
return x
This is just an example of a different Conv2d layer that I would use instead of the native one of the baseline.
Hope some of you can figure out how to proceed.
Thanks, Fede
I modified Distiller (https://github.com/IntelLabs/distiller) to emulate in-memory computing circuit, especially added a convolution layer quantization during QAT. However, accuracy drops over 60% (90% > 30%) even with 32b quantization for sanity check. I also want to say that it was perfectly fine when I just add a calculated quantization noise. Below is the code.
import torch
import torch.nn as nn
import math
__all__ = ['preact_resnet20_cifar', 'preact_resnet32_cifar', 'preact_resnet44_cifar', 'preact_resnet56_cifar',
'preact_resnet110_cifar', 'preact_resnet20_cifar_conv_ds', 'preact_resnet32_cifar_conv_ds',
'preact_resnet44_cifar_conv_ds', 'preact_resnet56_cifar_conv_ds', 'preact_resnet110_cifar_conv_ds']
NUM_CLASSES = 10
device = torch.device("cuda")
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
def quantize(tensor, n_bits=32, dim=144, clip_ratio=1):
max_value = dim*clip_ratio
min_value = -dim*clip_ratio
delta = max_value - min_value
y = tensor.clone()
y = torch.clamp(y, min = min_value, max = max_value)
lsb = delta / (2**n_bits)
y = (y // lsb)*lsb
return y
class PreactBasicBlock(nn.Module):
expansion = 1
def __init__(self, block_gates, inplanes, planes, stride=1, downsample=None, preact_downsample=True, resolution = 32):
super(PreactBasicBlock, self).__init__()
self.block_gates = block_gates
self.pre_bn = nn.BatchNorm2d(inplanes)
self.pre_relu = nn.ReLU(inplace=False) # To enable layer removal inplace must be False
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=False)
self.conv2 = conv3x3(planes, planes)
self.downsample = downsample
self.stride = stride
self.preact_downsample = preact_downsample
self.resolution = resolution
def forward(self, x):
need_preact = self.block_gates[0] or self.block_gates[1] or self.downsample and self.preact_downsample # add pre-activation for block_gates 0 and 1 and when is downsampled
if need_preact: # x > bn > relu > out
preact = self.pre_bn(x)
preact = self.pre_relu(preact)
out = preact
else: # x > out
preact = out = x
if self.block_gates[0]: # (preact) > conv > bn > relu
out = self.conv1(out)
dim = self.conv1.in_channels * self.conv1.kernel_size[0] * self.conv1.kernel_size[1]
out = quantize(out, n_bits=self.resolution, dim=dim, clip_ratio=1)
out = self.bn(out)
out = self.relu(out)
if self.block_gates[1]: # (preact)> conv
out = self.conv2(out)
dim = self.conv2.in_channels * self.conv2.kernel_size[0] * self.conv2.kernel_size[1]
out = quantize(out, n_bits=self.resolution, dim=dim, clip_ratio=1)
if self.downsample is not None:
if self.preact_downsample:
residual = self.downsample(preact)
else:
residual = self.downsample(x)
else:
residual = x
out += residual
return out
class PreactResNetCifar(nn.Module):
def __init__(self, block, layers, num_classes=NUM_CLASSES, conv_downsample=False):
self.nlayers = 0
# Each layer manages its own gates
self.layer_gates = []
for layer in range(3):
# For each of the 3 layers, create block gates: each block has two layers
self.layer_gates.append([]) # [True, True] * layers[layer])
for blk in range(layers[layer]):
self.layer_gates[layer].append([True, True])
self.inplanes = 16 # 64
super(PreactResNetCifar, self).__init__()
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False)
self.layer1 = self._make_layer(self.layer_gates[0], block, 16, layers[0],
conv_downsample=conv_downsample)
self.layer2 = self._make_layer(self.layer_gates[1], block, 32, layers[1], stride=2,
conv_downsample=conv_downsample)
self.layer3 = self._make_layer(self.layer_gates[2], block, 64, layers[2], stride=2,
conv_downsample=conv_downsample)
self.final_bn = nn.BatchNorm2d(64 * block.expansion)
self.final_relu = nn.ReLU(inplace=True)
self.avgpool = nn.AvgPool2d(8, stride=1)
self.fc = nn.Linear(64 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
def _make_layer(self, layer_gates, block, planes, blocks, stride=1, conv_downsample=False):
downsample = None
outplanes = planes * block.expansion
if stride != 1 or self.inplanes != outplanes:
if conv_downsample:
downsample = nn.Conv2d(self.inplanes, outplanes,
kernel_size=1, stride=stride, bias=False)
else:
# Identity downsample uses strided average pooling + padding instead of convolution
pad_amount = int(self.inplanes / 2)
downsample = nn.Sequential(
nn.AvgPool2d(2),
nn.ConstantPad3d((0, 0, 0, 0, pad_amount, pad_amount), 0)
)
layers = []
layers.append(block(layer_gates[0], self.inplanes, planes, stride, downsample, conv_downsample))
self.inplanes = outplanes
for i in range(1, blocks):
layers.append(block(layer_gates[i], self.inplanes, planes))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.final_bn(x)
x = self.final_relu(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
def preact_resnet20_cifar(**kwargs):
model = PreactResNetCifar(PreactBasicBlock, [3, 3, 3], **kwargs)
return model
def preact_resnet32_cifar(**kwargs):
model = PreactResNetCifar(PreactBasicBlock, [5, 5, 5], **kwargs)
return model
def preact_resnet44_cifar(**kwargs):
model = PreactResNetCifar(PreactBasicBlock, [7, 7, 7], **kwargs)
return model
def preact_resnet56_cifar(**kwargs):
model = PreactResNetCifar(PreactBasicBlock, [9, 9, 9], **kwargs)
return model
def preact_resnet110_cifar(**kwargs):
model = PreactResNetCifar(PreactBasicBlock, [18, 18, 18], **kwargs)
return model
def preact_resnet182_cifar(**kwargs):
model = PreactResNetCifar(PreactBasicBlock, [30, 30, 30], **kwargs)
return model
def preact_resnet20_cifar_conv_ds(**kwargs):
return preact_resnet20_cifar(conv_downsample=True)
def preact_resnet32_cifar_conv_ds(**kwargs):
return preact_resnet32_cifar(conv_downsample=True)
def preact_resnet44_cifar_conv_ds(**kwargs):
return preact_resnet44_cifar(conv_downsample=True)
def preact_resnet56_cifar_conv_ds(**kwargs):
return preact_resnet56_cifar(conv_downsample=True)
def preact_resnet110_cifar_conv_ds(**kwargs):
return preact_resnet110_cifar(conv_downsample=True)
def preact_resnet182_cifar_conv_ds(**kwargs):
return preact_resnet182_cifar(conv_downsample=True)
I use distiller.example.classifier_compression.compress_classifier.py in Distiller, and use terminal with schedule file. Command is "python compress_classifier.py -a preact_resnet20_cifar --lr 0.1 -p 50 -b 128 -j 1 --epochs 200 --compress=../quantization/quant_aware_train/preact_resnet_cifar_dorefa.yaml --wd=0.0002 --vs=0 --gpus 0", for your reference.
I tried to add quantization noise, and accuracy was fine. But in my opinion it is not perfect emulation for quantization because data distribution is not uniform.
I was Trying to to Calculate initial embedding of all data frame which is first step to implement my GNN which is Heterogeneous in nature.I have used twitter msg data set in the task I loaded it in following way
load_path = '/Users/hemangjiwnani/Desktop/Projects/Paper1/KPGNN/datasets/Twitter/'
save_path = '/Users/hemangjiwnani/Desktop/Projects/Paper1/KPGNN/datasets/Twitter/'
# load dataset`enter code here`
p_part1 = load_path + '68841_tweets_multiclasses_filtered_0722_part1.npy'
p_part2 = load_path + '68841_tweets_multiclasses_filtered_0722_part2.npy'
#"./datasets/Twitter/68841_tweets_multiclasses_filtered_0722_part1.npy"
df_np_part1 = np.load(p_part1, allow_pickle=True)
df_np_part2 = np.load(p_part2, allow_pickle=True)
Then I have created a data frame of the same with the help of following code
df_np = np.concatenate((df_np_part1, df_np_part2), axis = 0) #Axis = 0 means horizontal
print("Loaded data.")
df = pd.DataFrame(data=df_np, columns=["event_id", "tweet_id", "text", "user_id", "created_at", "user_loc",\
"place_type", "place_full_name", "place_country_code", "hashtags", "user_mentions", "image_urls", "entities",
"words", "filtered_words", "sampled_words"])
print("Data converted to dataframe.")
print(df.shape)
print(df.head(5))
Which was having following output
Loaded data.
Data converted to dataframe.
(68841, 16)
event_id ... sampled_words
0 0 ... []
1 0 ... []
2 0 ... []
3 0 ... []
4 0 ... []
[5 rows x 16 columns]
This Function Below is raising a error while returning
def documents_to_features(df):
nlp = spacy.load("en_core_web_sm"
#nlp = en_core_web_lg.load()
features = df.filtered_words.apply(lambda x: nlp(' '.join(x)).vector).values
-->return np.stack(features, axis=0)
ERROR
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-12-2e8bc5e83009> in <module>()
----> 1 d_features = documents_to_features(df)
2 print("Document features generated.")
3 t_features = df_to_t_features(df)
4 print("Time features generated.")
5 combined_features = np.concatenate((d_features, t_features), axis=1)
1 frames
<ipython-input-9-b772a7744232> in documents_to_features(df)
3 #nlp = en_core_web_lg.load()
4 features = df.filtered_words.apply(lambda x: nlp(' '.join(x)).vector).values
----> 5 return np.stack(features, axis=0)
<__array_function__ internals> in stack(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/numpy/core/shape_base.py in stack(arrays, axis, out)
425 shapes = {arr.shape for arr in arrays}
426 if len(shapes) != 1:
--> 427 raise ValueError('all input arrays must have the same shape')
428
429 result_ndim = arrays[0].ndim + 1
ValueError: all input arrays must have the same shape
I have a dataset composed by welds and masks (white for weld and black for background), although I need to use Mask R-CNN so I have to convert them to COCO dataset annotation. Does anybody have any suggestions on how to do this?
I tried this one: https://github.com/chrise96/image-to-coco-json-converter
but I'm getting this error:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-3-0ddc235b1528> in <module>
94
95 # Create images and annotations sections
---> 96 coco_format["images"], coco_format["annotations"], annotation_cnt = images_annotations_info(mask_path)
97
98 with open("output/{}.json".format(keyword),"w") as outfile:
<ipython-input-3-0ddc235b1528> in images_annotations_info(maskpath)
57 sub_masks = create_sub_masks(mask_image_open, w, h)
58 for color, sub_mask in sub_masks.items():
---> 59 category_id = category_colors[color]
60
61 # "annotations" info
KeyError: '(1, 1, 1)'
Here is the code, I've just added the weld cathegory:
import glob
from src.create_annotations import *
# Label ids of the dataset
category_ids = {
"outlier": 0,
"window": 1,
"wall": 2,
"balcony": 3,
"door": 4,
"roof": 5,
"sky": 6,
"shop": 7,
"chimney": 8,
"weld": 9,
}
# Define which colors match which categories in the images
category_colors = {
"(0, 0, 0)": 0, # Outlier
"(255, 0, 0)": 1, # Window
"(255, 255, 0)": 2, # Wall
"(128, 0, 255)": 3, # Balcony
"(255, 128, 0)": 4, # Door
"(0, 0, 255)": 5, # Roof
"(128, 255, 255)": 6, # Sky
"(0, 255, 0)": 7, # Shop
"(128, 128, 128)": 8, # Chimney
"(255, 255, 255)": 9 # Weld
}
# Define the ids that are a multiplolygon. In our case: wall, roof and sky
multipolygon_ids = [9, 2, 5, 6]
# Get "images" and "annotations" info
def images_annotations_info(maskpath):
# This id will be automatically increased as we go
annotation_id = 0
image_id = 0
annotations = []
images = []
for mask_image in glob.glob(maskpath + "*.png"):
# The mask image is *.png but the original image is *.jpg.
# We make a reference to the original file in the COCO JSON file
original_file_name = os.path.basename(mask_image).split(".")[0] + ".jpg"
# Open the image and (to be sure) we convert it to RGB
mask_image_open = Image.open(mask_image).convert("RGB")
w, h = mask_image_open.size
# "images" info
image = create_image_annotation(original_file_name, w, h, image_id)
images.append(image)
sub_masks = create_sub_masks(mask_image_open, w, h)
for color, sub_mask in sub_masks.items():
category_id = category_colors[color]
# "annotations" info
polygons, segmentations = create_sub_mask_annotation(sub_mask)
# Check if we have classes that are a multipolygon
if category_id in multipolygon_ids:
# Combine the polygons to calculate the bounding box and area
multi_poly = MultiPolygon(polygons)
annotation = create_annotation_format(multi_poly, segmentations, image_id, category_id, annotation_id)
annotations.append(annotation)
annotation_id += 1
else:
for i in range(len(polygons)):
# Cleaner to recalculate this variable
segmentation = [np.array(polygons[i].exterior.coords).ravel().tolist()]
annotation = create_annotation_format(polygons[i], segmentation, image_id, category_id, annotation_id)
annotations.append(annotation)
annotation_id += 1
image_id += 1
return images, annotations, annotation_id
if __name__ == "__main__":
# Get the standard COCO JSON format
coco_format = get_coco_json_format()
for keyword in ["train", "val"]:
mask_path = "dataset/{}_mask/".format(keyword)
# Create category section
coco_format["categories"] = create_category_annotation(category_ids)
# Create images and annotations sections
coco_format["images"], coco_format["annotations"], annotation_cnt = images_annotations_info(mask_path)
with open("output/{}.json".format(keyword),"w") as outfile:
json.dump(coco_format, outfile)
print("Created %d annotations for images in folder: %s" % (annotation_cnt, mask_path))
Check that 255, 255 , 255 its the correct value of the object in the mask.
Check also the bit depth of the masks it must be the same for all masks.
I downloaded SynthText in the Wild Dataset from official.
And then, I read official's readme.txt, however I couldn't find how many characters the dataset has.
I googled it but couldn't find it...
As you can see below example image, some symbols such like .,: and - exists. So, this dataset has alphabets(27) + numbers(10) + some symbols(?).
Does anyone know it?
I implemented own code counting up symbols.
def get_characters(basedir, imagedirname='SynthText', skip_missing=False):
class Symbols:
def __init__(self):
self.symbols = set()
def update(self, data):
self.symbols = self.symbols.union(data)
def __len__(self):
return len(self.symbols)
def __str__(self):
return ''.join(self.symbols)
symbols = Symbols()
def csvgenerator(annodir, imagedir, cbb, wBB, imname, txts, symbols, **kwargs):
image_num = kwargs.get('image_num')
i = kwargs.get('i')
imgpath = os.path.join(imagedir, imname)
img = cv2.imread(imgpath)
h, w, _ = img.shape
if not os.path.exists(imgpath):
if not skip_missing:
raise FileNotFoundError('{} was not found'.format(imgpath))
else:
logging.warning('Missing image: {}'.format(imgpath))
raise _Skip()
# convert txts to list of str
# I don't know why txts is
# ['Lines:\nI lost\nKevin ', 'will ', 'line\nand ',
# 'and\nthe ', '(and ', 'the\nout ',
# 'you ', "don't\n pkg "]
# there is strange blank and the length of txts is different from the one of wBB
txts = ' '.join(txts.tolist()).split()
text_num = len(txts)
if wBB.ndim == 2:
# convert shape=(2, 4,) to (2, 4, 1)
wBB = np.expand_dims(wBB, 2)
assert text_num == wBB.shape[2], 'The length of text and wordBB must be same, but got {} and {}'.format(
text_num, wBB.shape[2])
# replace non-alphanumeric characters with *
alltexts_asterisk = ''.join([re.sub(r'[^A-Za-z0-9]', '*', text) for text in txts])
assert len(alltexts_asterisk) == cbb.shape[
2], 'The length of characters and cbb must be same, but got {} and {}'.format(
len(alltexts_asterisk), cbb.shape[2])
for b in range(text_num):
text = txts[b]
symboltext = re.sub(r'[A-Za-z0-9]+', '', text)
symbols.update(symboltext)
sys.stdout.write('\r{}, and number is {}...{:0.1f}% ({}/{})'.format(symbols, len(symbols), 100 * (float(i + 1) / image_num), i + 1, image_num))
sys.stdout.flush()
_gtmatRecognizer(csvgenerator, basedir, imagedirname, customLog=True, symbols=symbols)
print()
print('symbols are {}, and number is {}'.format(symbols, len(symbols)))
def _gtmatRecognizer(generator, basedir, imagedirname='SynthText', customLog=False, **kwargs):
"""
convert gt.mat to https://github.com/MhLiao/TextBoxes_plusplus/blob/master/data/example.xml
<annotation>
<folder>train_images</folder>
<filename>img_10.jpg</filename>
<size>
<width>1280</width>
<height>720</height>
<depth>3</depth>
</size>
<object>
<difficult>1</difficult>
<content>###</content>
<name>text</name>
<bndbox>
<x1>1011</x1>
<y1>157</y1>
<x2>1079</x2>
<y2>160</y2>
<x3>1076</x3>
<y3>173</y3>
<x4>1011</x4>
<y4>170</y4>
<xmin>1011</xmin>
<ymin>157</ymin>
<xmax>1079</xmax>
<ymax>173</ymax>
</bndbox>
</object>
.
.
.
</annotation>
:param basedir: str, directory path under \'SynthText\'(, \'licence.txt\')
:param imagedirname: (Optional) str, image directory name including \'gt.mat\
:return:
"""
logging.basicConfig(level=logging.INFO)
imagedir = os.path.join(basedir, imagedirname)
gtpath = os.path.join(imagedir, 'gt.mat')
annodir = os.path.join(basedir, 'Annotations')
if not os.path.exists(gtpath):
raise FileNotFoundError('{} was not found'.format(gtpath))
if not os.path.exists(annodir):
# create Annotations directory
os.mkdir(annodir)
"""
ref: http://www.robots.ox.ac.uk/~vgg/data/scenetext/readme.txt
gts = dict;
__header__: bytes
__version__: str
__globals__: list
charBB: object ndarray, shape = (1, image num).
Character level bounding box. shape = (2=(x,y), 4=(top left,...: clockwise), BBox word num)
wordBB: object ndarray, shape = (1, image num).
Word level bounding box. shape = (2=(x,y), 4=(top left,...: clockwise), BBox char num)
imnames: object ndarray, shape = (1, image num, 1).
txt: object ndarray, shape = (i, image num).
Text. shape = (word num)
"""
logging.info('Loading {} now.\nIt may take a while.'.format(gtpath))
gts = sio.loadmat(gtpath)
logging.info('Loaded\n'.format(gtpath))
charBB = gts['charBB'][0]
wordBB = gts['wordBB'][0]
imnames = gts['imnames'][0]
texts = gts['txt'][0]
image_num = imnames.size
for i, (cbb, wBB, imname, txts) in enumerate(zip(charBB, wordBB, imnames, texts)):
imname = imname[0]
try:
generator(annodir, imagedir, cbb, wBB, imname, txts, i=i, image_num=image_num, **kwargs)
except _Skip:
pass
if not customLog:
sys.stdout.write('\rGenerating... {:0.1f}% ({}/{})'.format(100 * (float(i + 1) / image_num), i + 1, image_num))
sys.stdout.flush()
print()
logging.info('Finished!!!')
Finally, I got the symbol number.
It appears that ASCII printable characters without space.
INFO:root:Loading ~/data/text/SynthText/SynthText/gt.mat now.
It may take a while.
INFO:root:Loaded
}&|%_(],$^{+?##/-`).<=;~['>:\!"*, and number is 32...100.0% (858750/858750)
INFO:root:Finished!!!
symbols are }&|%_(],$^{+?##/-`).<=;~['>:\!"*, and number is 32