How can I implement my trained neural net model on the 3D array? - arrays

I have a neural net model that was trained using 2D array of samples and features (1125, 8) (here 1125 is the number of samples and 8 is number of features). Now, I wanna use the model to predict on the feature layers 3D array (called 'finalyrs' in my code below) (8, 496, 495) (here 8 is the number of features (same features that were used at the training process) and (496, 495) are number of rows and columns in the imagery.) I could easily implement the model on the dataset with similar nd array of training dataset. However, the case here is different. Could someone help me with the code that could implement the model and create an imagery classified into the desired number of classes?
#####Neural network
model = Sequential()
model.add(Dense(16, input_dim=np.size(X_train, 1), activation='relu'))
model.add(Dense(12, activation='relu'))
model.add(Dense(5, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam',
metrics=
['accuracy'])
#Training the model
history = model.fit(X_train, y_train, epochs=100, batch_size=30)
#Prediction
finalyrs=np.array([R,G,B,h,s,EXG,GLI,WT])

def get_model():
Input_1 = Input(shape=(256, 512, 512, 1))
MaxPooling3D_27 = MaxPooling3D(pool_size= (1,3,3))(Input_1)
Convolution3D_1 = Convolution3D(kernel_dim1= 4,nb_filter= 10,activation= 'relu' ,kernel_dim3= 4,kernel_dim2= 4)(MaxPooling3D_27)
Convolution3D_7 = Convolution3D(kernel_dim1= 4,nb_filter= 10,activation= 'relu' ,kernel_dim3= 4,kernel_dim2= 4)(Convolution3D_1)
BatchNormalization_28 = BatchNormalization()(Convolution3D_7)
MaxPooling3D_12 = MaxPooling3D(pool_size= (2,2,2))(BatchNormalization_28)
SpatialDropout3D_1 = SpatialDropout3D(p= 0.5)(MaxPooling3D_12)
Convolution3D_9 = Convolution3D(kernel_dim1= 2,nb_filter= 20,activation= 'relu' ,kernel_dim3= 2,kernel_dim2= 2)(SpatialDropout3D_1)
Convolution3D_11 = Convolution3D(kernel_dim1= 2,nb_filter= 20,activation= 'relu' ,kernel_dim3= 2,kernel_dim2= 2)(Convolution3D_9)
BatchNormalization_9 = BatchNormalization()(Convolution3D_11)
MaxPooling3D_14 = MaxPooling3D(pool_size= (2,2,2))(BatchNormalization_9)
SpatialDropout3D_4 = SpatialDropout3D(p= 0.5)(MaxPooling3D_14)
Convolution3D_12 = Convolution3D(kernel_dim1= 2,nb_filter= 40,activation= 'relu' ,kernel_dim3= 2,kernel_dim2= 2)(SpatialDropout3D_4)
Convolution3D_13 = Convolution3D(kernel_dim1= 2,nb_filter= 40,activation= 'relu' ,kernel_dim3= 2,kernel_dim2= 2)(Convolution3D_12)
MaxPooling3D_23 = MaxPooling3D(pool_size= (2,2,2))(Convolution3D_13)
BatchNormalization_23 = BatchNormalization()(MaxPooling3D_23)
SpatialDropout3D_5 = SpatialDropout3D(p= 0.5)(BatchNormalization_23)
GlobalMaxPooling3D_1 = GlobalMaxPooling3D()(SpatialDropout3D_5)
Dense_1 = Dense(activation= 'relu' ,output_dim= 10)(GlobalMaxPooling3D_1)
Dropout_14 = Dropout(p= 0.3)(Dense_1)
Dense_6 = Dense(activation= 'relu' ,output_dim= 10)(Dropout_14)
Dense_2 = Dense(activation= 'softmax' ,output_dim= 2)(Dense_6)
return Model([Input_1],[Dense_2])

Related

How to create my own layers on MONAI U-Net?

I'm using MONAI on Spyder Anaconda to build a U-Net network. I want to add/modify layers starting from this baseline.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = nets.UNet(
spatial_dims = 2,
in_channels = 3,
out_channels = 1,
channels = (4, 8, 16, 32, 64),
strides = (2, 2, 2, 2),
num_res_units = 3,
norm = layers.Norm.BATCH,
kernel_size=3,).to(device)
loss_function = losses.DiceLoss()
torch.backends.cudnn.benchmark = True
optimizer = torch.optim.Adam(model.parameters(), lr = 1e-4, weight_decay = 0)
post_pred = Compose([EnsureType(), Activations(sigmoid = True), AsDiscrete(threshold=0.5)])
post_label = Compose([EnsureType()])
inferer = SimpleInferer()
utils.set_determinism(seed=46)
My final aim is to create a MultiResUNet that has different layers such as:
class Conv2d_batchnorm(torch.nn.Module):
'''
2D Convolutional layers
Arguments:
num_in_filters {int} -- number of input filters
num_out_filters {int} -- number of output filters
kernel_size {tuple} -- size of the convolving kernel
stride {tuple} -- stride of the convolution (default: {(1, 1)})
activation {str} -- activation function (default: {'relu'})
'''
def __init__(self, num_in_filters, num_out_filters, kernel_size, stride = (1,1), activation = 'relu'):
super().__init__()
self.activation = activation
self.conv1 = torch.nn.Conv2d(in_channels=num_in_filters, out_channels=num_out_filters, kernel_size=kernel_size, stride=stride, padding = 'same')
self.batchnorm = torch.nn.BatchNorm2d(num_out_filters)
def forward(self,x):
x = self.conv1(x)
x = self.batchnorm(x)
if self.activation == 'relu':
return torch.nn.functional.relu(x)
else:
return x
This is just an example of a different Conv2d layer that I would use instead of the native one of the baseline.
Hope some of you can figure out how to proceed.
Thanks, Fede

IndexError: too many indices for array: array is 2-dimensional, but 3 were indexed

I am trying to identify Global Feature Relationships with SHAP values. The SHAP library returns three matrices and I am trying to select the SHAP matrix however, I am getting this error: "IndexError: too many indices for array: array is 2-dimensional, but 3 were indexed".
The code I have is below:
df_score = spark.sql("select * from sandbox.yt_trng_churn_device")
#XGBoost Model
import pickle
from xgboost import XGBClassifier
from mlflow.tracking import MlflowClient
client = MlflowClient()
local_dir = "/dbfs/FileStore/"
local_path = client.download_artifacts
model_path = '/dbfs/FileStore/'
model = XGBClassifier()
model = pickle.load(open(model_path, 'rb'))
HorizonDate = datetime.datetime(2022, 9, 5)
df = df_score
score_data = df.toPandas()
results = model.predict_proba(score_data)
results_l = model.predict(score_data)
score_data["p"]=pd.Series( (v[1] for v in results) )
score_data["l"]=pd.Series( (v for v in results_l) )
spark.createDataFrame(score_data).createOrReplaceTempView("yt_vw_tmp_dev__scores")
spark.sql("create or replace table sandbox.yt_vw_tmp_dev__scores as select * from yt_vw_tmp_dev__scores")
#SHAP Analysis on XGBoost
from shap import KernelExplainer, summary_plot
sql = """
select d_a.*
from
hive_metastore.sandbox.yt_trng_device d_a
right join
(select decile, msisdn, MSISDN_L2L
from(
select ntile(10) over (order by p desc) as decile, msisdn, MSISDN_L2L
from sandbox.yt_vw_tmp_dev__scores
) inc
order by decile) d_b
on d_a.MSISDN_L2L = d_b.MSISDN_L2L and d_a.msisdn = d_b.msisdn
"""
df = spark.sql(sql).drop('msisdn', 'imei', 'imsi', 'event_date', 'MSISDN_L2L', 'account_id')
score_df = df.toPandas()
mode = score_df.mode().iloc[0]
sample = score_df.sample(n=min(100, score_df.shape[0]), random_state=508502835).fillna(mode)
predict = lambda x: model.predict(pd.DataFrame(x, columns=score_df.columns))
explainer = KernelExplainer(predict, sample, link="identity")
shap_values = explainer.shap_values(sample, l1_reg=False)
# The return of the explainer has three matrices, we will get the shap values one
shap_values = shap_values[ :, :, 0]
I am fairly new to coding but it would be great if someone could give some direction on this

IndexError: The shape of the mask [183, 10] at index 1 does not match the shape of the indexed tensor [183, 1703] at index 1

I'm trying to load the Cornell dataset from PyTorch Geometric to train my Graph Neural Network. I want to apply a mask but I achieve this error (also on Chameleon, Wisconsin, Texas datasets). My Dataset class works perfectly with all the datasets of Planetoid that are mono dimensional tensors, probable bidimensional tensors give problem.
I insert my code that can be ruined on Colab without problems.
!pip install torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric -f https://data.pyg.org/whl/torch-1.12.0+cu113.html
import torch_geometric
from torch_geometric.datasets import Planetoid, WebKB
from torch_geometric.utils import to_dense_adj, to_undirected, remove_self_loops
class Dataset(object):
def __init__(self, name):
super(Dataset, self).__init__()
self.name = name
if (name == 'Cora'):
dataset = Planetoid(root='/tmp/Cora', name='Cora', split="full")
if(name == 'Citeseer'):
dataset = Planetoid(root='/tmp/Cora', name='Citeseer', split="full")
if(name == 'PubMed'):
dataset = Planetoid(root='/tmp/Cora', name='Pubmed', split="full")
if(name == 'Cornell'):
dataset = WebKB(root='/tmp/WebKB', name='Cornell')
self.data = dataset[0]
print(self.data)
self.train_mask = self.data.train_mask
self.valid_mask = self.data.val_mask
self.test_mask = self.data.test_mask
def train_val_test_split(self):
train_x = self.data.x[self.data.train_mask]
train_y = self.data.y[self.data.train_mask]
valid_x = self.data.x[self.data.val_mask]
valid_y = self.data.y[self.data.val_mask]
test_x = self.data.x[self.data.test_mask]
test_y = self.data.y[self.data.test_mask]
return train_x, train_y, valid_x, valid_y, test_x, test_y
def get_fullx(self):
return self.data.x
def get_edge_index(self):
return self.data.edge_index
def get_adjacency_matrix(self):
# We will ignore this for the first part
adj = to_dense_adj(self.data.edge_index)[0]
return adj
The error that I achieve is in the title and is obtained in this snippet:
cornell_dataset = Dataset(name = 'Cornell')
train_x, train_y, valid_x, valid_y, test_x, test_y = cornell_dataset.train_val_test_split()
# check and confirm our data shapes match our expectations
print(f"Train shape x: {train_x.shape}, y: {train_y.shape}")
print(f"Val shape x: {valid_x.shape}, y: {valid_y.shape}")
print(f"Test shape x: {test_x.shape}, y: {test_y.shape}")

Fusion of vgg19 model and dense layer model

AIM: To perform multiclassification of materials using images and roughness values.
DATASET - I have a dataset which has 11 material classes, each class has 25 images. So the total number of train images are 2011=220, and 511=55 belong to validation set. Each image has a corresponding 6 roughness parameters. These parameters are stored in csv file as input = RaX, RaY, RqX, RqY, RzX, RzY and output = 11 material classes. So for every class, 25 values of RaX, RaY, RqX, RqY, RzX, RzY are recorded. I have trained the images on vgg19 and roughness values as dense layers.
PROBLEM STATEMENT - The training accuracy is 7% and loss is 0.7. The vgg model and dense model when trained separately gave good scores. But fusion or concatenation of both models reduced the scores significantly.
Could you please help me out in this regard. Any suggestions and information are welcomed. Thanks in advance.
Below is the description of the code:
data_path = "Excel_data/Roughness_11_classes.csv"
image_path = "WOOD_PLASTIC/IMAGES_11_classes"
classes=11
rv=[]#### return values object list from rough_data values
label=[]
image_list = []
a=[]
batch_size = 16
vgg19 = applications.VGG19(include_top=False, weights='imagenet')
datagen = ImageDataGenerator(rescale=1. / 255,validation_split=0.2)
img_width, img_height = 128, 512
def rough_values(path): ## process rougness values
global trainRX
data = pd.read_csv(data_path)
data_array=data.to_numpy()
x=data_array[:,0:6].astype(np.float32)
y1=data_array[:,6].reshape(-1,1)
encoder = OneHotEncoder(sparse=False)
y = encoder.fit_transform(y1)
trainRX,testRX,trainRY,testRY=train_test_split(x,y,test_size=0.2,shuffle=False,random_state=0)
rv=[data_array,trainRX,testRX,trainRY,testRY]
return rv
def images(path): ### processes images
##### TRAIN #########################################################
generator_train = datagen.flow_from_directory(path, target_size=(img_width,
img_height),batch_size=batch_size, class_mode='categorical', shuffle=False,
subset='training')
nb_train_samples = len(generator_train.filenames)
num_classes = len(generator_train.class_indices)
predict_size_train = int(math.ceil(nb_train_samples / batch_size))
bottleneck_features_train = vgg19.predict_generator(generator_train,
predict_size_train)
print (bottleneck_features_train.shape)
np.save("bottleneck_features_vgg19_multi_input.npy", bottleneck_features_train)
#####saves train data as .npy file
#VALIDATION##################################################################################
generator_val = datagen.flow_from_directory(path,target_size=(img_width, img_height),batch_size=batch_size, class_mode='categorical', shuffle=False, subset='validation')
print('generator_val is',generator_val)
nb_validation_samples = len(generator_val.filenames)
num_classes = len(generator_val.class_indices)
predict_size_validation = int(math.ceil(nb_validation_samples / batch_size))
bottleneck_features_validation = vgg19.predict_generator(generator_val, predict_size_validation)
np.save("bottleneck_features_validation_vgg19_multi_input.npy", bottleneck_features_validation)
# training data load #############################################################
generator_top_train = datagen.flow_from_directory(path,target_size=(img_width, img_height),batch_size=batch_size, class_mode='categorical',shuffle=False,subset='training')
nb_train_samples = len(generator_top_train.filenames)
print('nb_train_samples are',nb_train_samples)
num_classes = len(generator_top_train.class_indices)
train_data = np.load("bottleneck_features_vgg19_multi_input.npy")
print (train_data.shape)
train_labels = generator_top_train.classes
train_labels = to_categorical(train_labels, num_classes=num_classes)
# validation data load #############################################################
generator_top_val = datagen.flow_from_directory(path,target_size=(img_width, img_height),batch_size=batch_size, class_mode='categorical',shuffle=False,subset='validation')
nb_validation_samples = len(generator_top_val.filenames)
print('nb_validation_samples are',nb_validation_samples)
num_classes = len(generator_top_val.class_indices)
validation_data = np.load("bottleneck_features_validation_vgg19_multi_input.npy")
validation_labels = generator_top_val.classes
validation_labels = to_categorical(validation_labels, num_classes=num_classes)
z=[train_data, validation_data, validation_labels, train_labels]
return z
def create_dense(feed): ## cnn model for roughness parameters, define MLP network
model = Sequential() ### input shape to dense is 1D
model.add(Dense(64, input_shape=feed, activation='relu', name='fc1'))
model.add(BatchNormalization())
model.add(Dense(32, activation='relu', name='fc2'))
model.add(BatchNormalization())
return model
def create_vgg(input_shape, n_classes, optimizer='rmsprop', fine_tune=0):
### cnn model for images
######### Adding own model on top of vgg ##################
model = Sequential()
model.add(Flatten(input_shape=input_shape))
model.add(Dropout(0.5))
model.add(Dense(100))
model.add(layers.LeakyReLU(alpha=0.3))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(50))
model.add(layers.LeakyReLU(alpha=0.3))
model.add(BatchNormalization())
model.add(Dropout(0.5))
### Group the convolutional base and new fully-connected layers into a Model object#
return model
rv=rough_values(data_path)
input_dense=[]
DA=rv[0]
TrRX=rv[1]
TeRX=rv[2]
TrRY=rv[3]
TeRY=rv[4]
z=images(image_path) ## stores train_data, validation_data, validation_labels,
train_labels
train_data=z[0]
vali=z[1]
val_label=z[2]
tr_label=z[3]
input_dense=TrRX[0].shape
class_names=
['ABS','PA','PC','PP','WOOD1','WOOD2','WOOD3','WOOD4','WOOD5','WOOD6','WOOD7']
### 11 material classes
mlp = create_dense(input_dense)
cnn = create_vgg(train_data.shape[1:],classes)
combinedInput = concatenate([mlp.output, cnn.output]) ## concatenated models
top= Dense(64, activation="relu")(combinedInput)
top_model= Dense(11, activation="softmax")(top) ### output as 11 classes
model = Model(inputs=[mlp.input, cnn.input], outputs=top_model)
## training #################################################################
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
optimizer=optimizers.Adam(lr=1e-4),metrics=['acc'])
history= model.fit(x=[TrRX, train_data], y=TrRY, validation_data=([TeRX, vali], TeRY),
epochs=100, batch_size=32,validation_steps=(55//batch_size))
(eval_loss, eval_accuracy) = model.evaluate([TeRX, vali], TeRY, batch_size=batch_size,
verbose=1)
print("test accuracy: {:.2f}%".format(eval_accuracy * 100))
print("test Loss: {}".format(eval_loss))
model.save('fusion_model.h5')
#result=model.predict([TeRX,vali])

Tensorflow Probability Logistic Regression Example

I feel I must be missing something obvious, in struggling to get a positive control for logistic regression going in tensorflow probability.
I've modified the example for logistic regression here, and created a positive control features and labels data. I struggle to achieve accuracy over 60%, however this is an easy problem for a 'vanilla' Keras model (accuracy 100%). What am I missing? I tried different layers, activations, etc.. With this method of setting up the model, is posterior updating actually being performed? Do I need to specify an interceptor object? Many thanks..
### Added positive control
nSamples = 80
features1 = np.float32(np.hstack((np.reshape(np.ones(40), (40, 1)),
np.reshape(np.random.randn(nSamples), (40, 2)))))
features2 = np.float32(np.hstack((np.reshape(np.zeros(40), (40, 1)),
np.reshape(np.random.randn(nSamples), (40, 2)))))
features = np.vstack((features1, features2))
labels = np.concatenate((np.zeros(40), np.ones(40)))
featuresInt, labelsInt = build_input_pipeline(features, labels, 10)
###
#w_true, b_true, features, labels = toy_logistic_data(FLAGS.num_examples, 2)
#featuresInt, labelsInt = build_input_pipeline(features, labels, FLAGS.batch_size)
with tf.name_scope("logistic_regression", values=[featuresInt]):
layer = tfp.layers.DenseFlipout(
units=1,
activation=None,
kernel_posterior_fn=tfp.layers.default_mean_field_normal_fn(),
bias_posterior_fn=tfp.layers.default_mean_field_normal_fn())
logits = layer(featuresInt)
labels_distribution = tfd.Bernoulli(logits=logits)
neg_log_likelihood = -tf.reduce_mean(labels_distribution.log_prob(labelsInt))
kl = sum(layer.losses)
elbo_loss = neg_log_likelihood + kl
predictions = tf.cast(logits > 0, dtype=tf.int32)
accuracy, accuracy_update_op = tf.metrics.accuracy(
labels=labelsInt, predictions=predictions)
with tf.name_scope("train"):
optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate)
train_op = optimizer.minimize(elbo_loss)
init_op = tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer())
with tf.Session() as sess:
sess.run(init_op)
# Fit the model to data.
for step in range(FLAGS.max_steps):
_ = sess.run([train_op, accuracy_update_op])
if step % 100 == 0:
loss_value, accuracy_value = sess.run([elbo_loss, accuracy])
print("Step: {:>3d} Loss: {:.3f} Accuracy: {:.3f}".format(
step, loss_value, accuracy_value))
### Check with basic Keras
kerasModel = tf.keras.models.Sequential([
tf.keras.layers.Dense(1)])
optimizer = tf.train.AdamOptimizer(5e-2)
kerasModel.compile(optimizer = optimizer, loss = 'binary_crossentropy',
metrics = ['accuracy'])
kerasModel.fit(features, labels, epochs = 50) #100% accuracy
Compared to the github example, you forgot to divide by the number of examples when defining the KL divergence:
kl = sum(layer.losses) / FLAGS.num_examples
When I change this to your code, I quickly get to an accuracy of 99.9% on your toy data.
Additionaly, the output layer of your Keras model actually expects a sigmoid activation for this problem (binary classification):
kerasModel = tf.keras.models.Sequential([
tf.keras.layers.Dense(1, activation='sigmoid')])
It's a toy problem, but you will notice that the model gets to 100% accuracy faster with a sigmoid activation.

Resources