Commit 811997cf authored by Picon Ruiz, Artzai's avatar Picon Ruiz, Artzai

initial commit

parent 954f9b72
/.idea/
*.pyc
\ No newline at end of file
import tensorflow as tf
def _get_triplet_mask(labels):
"""Return a 3D mask where mask[a, p, n] is True iff the triplet (a, p, n) is valid.
A triplet (i, j, k) is valid if:
- i, j, k are distinct
- labels[i] == labels[j] and labels[i] != labels[k]
Args:
labels: tf.int32 `Tensor` with shape [batch_size]
"""
# Check that i, j and k are distinct
indices_equal = tf.cast(tf.eye(tf.shape(labels)[0]), tf.bool)
indices_not_equal = tf.logical_not(indices_equal)
i_not_equal_j = tf.expand_dims(indices_not_equal, 2)
i_not_equal_k = tf.expand_dims(indices_not_equal, 1)
j_not_equal_k = tf.expand_dims(indices_not_equal, 0)
distinct_indices = tf.logical_and(tf.logical_and(i_not_equal_j, i_not_equal_k), j_not_equal_k)
# Check if labels[i] == labels[j] and labels[i] != labels[k]
label_equal = tf.equal(tf.expand_dims(labels, 0), tf.expand_dims(labels, 1))
i_equal_j = tf.expand_dims(label_equal, 2)
i_equal_k = tf.expand_dims(label_equal, 1)
valid_labels = tf.logical_and(i_equal_j, tf.logical_not(i_equal_k))
# Combine the two masks
mask = tf.logical_and(distinct_indices, valid_labels)
return mask
def constellation(k, BATCH_SIZE):
def c_loss(labels, embeddings):
"""Build the constellation loss over a batch of embeddings.
Args:
labels: labels of the batch, of size (batch_size,)
embeddings: tensor of shape (batch_size, embed_dim)
Returns:
ctl_loss: scalar tensor containing the constellation loss
"""
labels = labels[:, 0]
labels_list = []
embeddings_list = []
for i in range(k):
labels_list.append(labels[BATCH_SIZE * i:BATCH_SIZE * (i + 1)])
embeddings_list.append(embeddings[BATCH_SIZE * i:BATCH_SIZE * (i + 1)])
loss_list = []
for i in range(len(embeddings_list)):
# Get the dot product
pairwise_dist = tf.matmul(embeddings_list[i], tf.transpose(embeddings_list[i]))
# shape (batch_size, batch_size, 1)
anchor_positive_dist = tf.expand_dims(pairwise_dist, 2)
assert anchor_positive_dist.shape[2] == 1, "{}".format(anchor_positive_dist.shape)
# shape (batch_size, 1, batch_size)
anchor_negative_dist = tf.expand_dims(pairwise_dist, 1)
assert anchor_negative_dist.shape[1] == 1, "{}".format(anchor_negative_dist.shape)
ctl_loss = anchor_negative_dist - anchor_positive_dist
# (where label(a) != label(p) or label(n) == label(a) or a == p)
mask = _get_triplet_mask(labels_list[i])
mask = tf.to_float(mask)
ctl_loss = tf.multiply(mask, ctl_loss)
loss_list.append(ctl_loss)
ctl_loss = 1. + tf.exp(loss_list[0])
for i in range(1, len(embeddings_list)):
ctl_loss += tf.exp(loss_list[i])
ctl_loss = tf.log(ctl_loss)
# # Get final mean constellation loss and divide due to very large loss value
ctl_loss = tf.reduce_sum(ctl_loss) / 1000.
return ctl_loss
return c_loss
# Alfonso Medela & Artzai Picon, "Constellation Loss: Improving the efficiency of deep metric learning loss functions for optimal embedding.", submitted to NeurIPS 2019.
import os
import keras
import numpy as np
import tensorflow as tf
import pandas as pd
from keras import backend as K
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import davies_bouldin_score, silhouette_score, balanced_accuracy_score
from constellation_loss import constellation
from utils import load_data, load_images
from network import inception, generator
if __name__ == '__main__':
# PARAMETERS
IMG_SIZE = 150
EMB_VECTOR = 128
BATCH_SIZE = 32
DATASET_PATH = '/mnt/RAID5/users/alfonsomedela/projects/piccolo/nature/NATURE'
# Folds
k_number = [i for i in range(2, 8)]
# LOAD THE DATA
x, y = load_data(DATASET_PATH)
random_seeds = [666, 100, 200, 300, 400, 500, 600, 700, 800, 900]
for k in k_number:
accuracy = []
silhouette = []
davis = []
bac_list = []
fold = 0
for seed in random_seeds:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=seed)
x_train_input = load_images(x_train)
x_test_input = load_images(x_test)
print('Train data:', x_train.shape, y_train.shape)
print('Test data:', x_test.shape, y_test.shape)
print('')
print('Training...')
# TRAIN
gpu_device = "/gpu:0" # 0,1,2,3
if keras.backend.backend() == 'tensorflow':
os.environ["CUDA_VISIBLE_DEVICES"] = gpu_device.rsplit(':', 1)[-1]
session_config = K.tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
session_config.gpu_options.allow_growth = True
session = K.tf.Session(config=session_config)
with K.tf.device(gpu_device):
# DEFINE THE MODEL
model = inception(EMB_VECTOR, IMG_SIZE)
model.compile(loss=constellation(k, BATCH_SIZE), optimizer=keras.optimizers.Adam(1e-3))
model.load_weights('final_exp/' + str(k) + '/weights/constellation_fold_' + str(fold) + '.h5')
embeddings_train = model.predict([x_train_input])
embeddings_test = model.predict([x_test_input])
'ML model to train with extracted feature vectors'
KNN = KNeighborsClassifier()
KNN.fit(embeddings_train, y_train)
score = KNN.score(embeddings_test, y_test)
accuracy.append(score * 100.)
y_pred = KNN.predict(embeddings_test)
BAC = balanced_accuracy_score(y_test, y_pred) * 100.
bac_list.append(BAC)
'Homogeneity test'
d_score = davies_bouldin_score(embeddings_test, y_test)
s_score = silhouette_score(embeddings_test, y_test)
silhouette.append(s_score)
davis.append(d_score)
session.close()
fold += 1
accuracy = np.asarray(accuracy)
davis = np.asarray(davis)
silhouette = np.asarray(silhouette)
bac_list = np.asarray(bac_list)
# Get results from list of crossvalidations
mean_accuracy, std_accuracy = np.mean(accuracy), np.std(accuracy)
mean_s, std_s = np.mean(silhouette), np.std(silhouette)
mean_d, std_d = np.mean(davis), np.std(davis)
mean_bac, std_bac = np.mean(bac_list), np.std(bac_list)
mean_accuracy, std_accuracy = np.reshape(mean_accuracy, (1)), np.reshape(std_accuracy, (1))
mean_s, std_s = np.reshape(mean_s, (1)), np.reshape(std_s, (1))
mean_d, std_d = np.reshape(mean_d, (1)), np.reshape(std_d, (1))
mean_bac, std_bac = np.reshape(mean_bac, (1)), np.reshape(std_bac, (1))
# Save the data
accuracy_row = np.concatenate((accuracy,mean_accuracy,std_accuracy),axis=0)
accuracy_row = np.reshape(accuracy_row, (len(accuracy_row),1))
accuracy_row = np.around(accuracy_row,decimals=2)
silhouette_row = np.concatenate((silhouette, mean_s, std_s), axis=0)
silhouette_row = np.reshape(silhouette_row, (len(silhouette_row),1))
silhouette_row = np.around(silhouette_row, decimals=4)
davis_row = np.concatenate((davis, mean_d, std_d), axis=0)
davis_row = np.reshape(davis_row, (len(davis_row), 1))
davis_row = np.around(davis_row, decimals=4)
bac_row = np.concatenate((bac_list, mean_bac, std_bac), axis=0)
bac_row = np.reshape(bac_row, (len(bac_row), 1))
bac_row = np.around(bac_row, decimals=2)
csv_array = np.concatenate((accuracy_row, bac_row, davis_row, silhouette_row), axis=-1)
df = pd.DataFrame(csv_array,columns=['accuracy','BAC','Davis','Silhouette'])
df.to_csv('final_exp/results/final_exp_k_' + str(k) + '.csv')
# Alfonso Medela & Artzai Picon, "Constellation Loss: Improving the efficiency of deep metric learning loss functions for optimal embedding.", submitted to NeurIPS 2019.
import os
import keras
import tensorflow as tf
from keras import backend as K
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from constellation_loss import constellation
from utils import load_data
from network import inception, generator
class PlotLosses(keras.callbacks.Callback):
def on_train_begin(self, logs={}):
self.i = 0
self.x = []
self.losses = []
self.val_losses = []
self.fig = plt.figure()
self.logs = []
def on_epoch_end(self, epoch, logs={}):
self.logs.append(logs)
self.x.append(self.i)
self.losses.append(logs.get('loss'))
self.val_losses.append(logs.get('val_loss'))
self.i += 1
plt.plot(self.x, self.losses, label="loss")
plt.plot(self.x, self.val_losses, label="val_loss")
plt.savefig('final_exp/' + str(k) +'/plots/constellation_fold_' + str(fold) + '.png');
if __name__ == '__main__':
loss_plot = PlotLosses()
DATASET_PATH = '/mnt/RAID5/users/alfonsomedela/projects/piccolo/nature/NATURE'
# PARAMETERS
BATCH_SIZE = 32
IMG_SIZE = 150
EMB_VECTOR = 128
k_number = [i for i in range(2,8)]
EPOCHS = 10
# LOAD THE DATA
x, y = load_data(DATASET_PATH)
random_seeds = [666, 100, 200, 300, 400, 500, 600, 700, 800, 900]
for k in k_number:
print('K number: ' + str(K))
fold = 0
for seed in random_seeds:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=seed)
steps_train = int(len(x_train) * 1. / BATCH_SIZE) + 1
steps_test = int(len(x_test) * 1. / BATCH_SIZE) + 1
print('Fold: ' + str(fold))
# TRAIN
gpu_device = "/gpu:0" # 0,1,2,3
if keras.backend.backend() == 'tensorflow':
os.environ["CUDA_VISIBLE_DEVICES"] = gpu_device.rsplit(':', 1)[-1]
session_config = K.tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
session_config.gpu_options.allow_growth = True
session = K.tf.Session(config=session_config)
with K.tf.device(gpu_device):
# DEFINE THE MODEL
model = inception(EMB_VECTOR, IMG_SIZE)
for layer in model.layers:
if isinstance(layer, keras.layers.BatchNormalization):
layer.momentum = 0.9
for layer in model.layers[:-50]:
if not isinstance(layer, keras.layers.BatchNormalization):
layer.trainable = False
# TRAIN THE MODEL
model.compile(loss=constellation(k, BATCH_SIZE), optimizer=keras.optimizers.Adam(1e-3))
model.fit_generator(generator(x_train, y_train, k, BATCH_SIZE), steps_per_epoch=steps_train,
validation_data=generator(x_test, y_test, k, BATCH_SIZE), validation_steps=steps_test,
epochs=EPOCHS, callbacks=[loss_plot])
model.save_weights('final_exp/' + str(k) +'/weights/constellation_fold_' + str(fold) + '.h5')
# close session and add a fold
session.close()
fold += 1
# Alfonso Medela & Artzai Picon, "Constellation Loss: Improving the efficiency of deep metric learning loss functions for optimal embedding.", submitted to NeurIPS 2019.
import os
import keras
import numpy as np
import tensorflow as tf
import pandas as pd
from keras import backend as K
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import davies_bouldin_score, silhouette_score, balanced_accuracy_score
from sklearn.manifold import TSNE
from constellation_loss import constellation
from utils import load_data, load_images
from network import inception, generator
if __name__ == '__main__':
DATASET_PATH = '/mnt/RAID5/users/alfonsomedela/projects/piccolo/nature/NATURE'
# PARAMETERS
IMG_SIZE = 150
EMB_VECTOR = 128
BATCH_SIZE = 32
k = 6
# LOAD THE DATA
x, y = load_data(DATASET_PATH)
fold = 5 # Choose fold
random_seeds = [666, 100, 200, 300, 400, 500, 600, 700, 800, 900]
SEED = random_seeds[fold]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=SEED)
x_test_input = load_images(x_test)
# TRAIN
gpu_device = "/gpu:2" # 0,1,2,3
if keras.backend.backend() == 'tensorflow':
os.environ["CUDA_VISIBLE_DEVICES"] = gpu_device.rsplit(':', 1)[-1]
session_config = K.tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
session_config.gpu_options.allow_growth = True
session = K.tf.Session(config=session_config)
with K.tf.device(gpu_device):
# DEFINE THE MODEL
model = inception(EMB_VECTOR, IMG_SIZE)
model.compile(loss=constellation(k, BATCH_SIZE), optimizer=keras.optimizers.Adam(1e-3))
model.load_weights('final_exp/' + str(k) + '/weights/constellation_fold_' + str(fold) + '.h5')
embeddings_test = model.predict([x_test_input])
x_tsne = TSNE(n_components=2).fit_transform(embeddings_test)
color = ['b', 'g', 'r', 'c', 'm', 'y', 'k', (1., 170. / 255., 58. / 255.)]
labels = ['Empty', 'Lympho', 'Mucosa', 'Stroma', 'Tumor', 'Complex', 'Debris', 'Adipose']
ind = [0 for i in range(len(np.unique(y_test)))]
for i in range(len(embeddings_test)):
if ind[y_test[i]] == 0:
plt.scatter(x_tsne[i, 0], x_tsne[i, 1], color=color[y_test[i]], label=labels[y_test[i]])
ind[y_test[i]] = 1
else:
plt.scatter(x_tsne[i, 0], x_tsne[i, 1], color=color[y_test[i]])
plt.ylabel(r'$z_2$')
plt.xlabel(r'$z_1$')
# plt.legend()
plt.savefig('tsne.png')
session.close()
import keras
from keras.layers import Lambda
import keras.backend as K
from sklearn.utils import shuffle
import skimage.io as io
import numpy as np
from utils import data_augmentation
def inception(EMB_VECTOR,IMG_SIZE, use_imagenet=True):
# load pre-trained model graph, don't add final layer
model = keras.applications.InceptionV3(include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3),
weights='imagenet' if use_imagenet else None)
# add global pooling just like in InceptionV3
new_output = keras.layers.GlobalAveragePooling2D()(model.output)
# # add new dense layer for our labels
new_output = keras.layers.Dense(EMB_VECTOR,activation='sigmoid')(new_output)
new_output = Lambda(lambda x: K.l2_normalize(x, axis=-1))(new_output)
model = keras.engine.training.Model(model.inputs, new_output)
return model
def generator(x, y, k, BATCH_SIZE):
n_img = 0
while True:
x_in = []
y_in= []
for n in range(BATCH_SIZE*k):
if n_img >= len(x):
x, y = shuffle(x, y)
n_img = 0
image = io.imread(x[n_img])
image_augmented = data_augmentation(image)
x_in.append(image_augmented)
y_in.append(y[n_img])
n_img += 1
x_in, y_in = np.asarray(x_in), np.asarray(y_in)
yield x_in, y_in
import keras
import matplotlib as plt
import glob
import numpy as np
from sklearn.utils import shuffle
import skimage
import random
import skimage.io as io
def load_images(x):
x_input = []
for i in range(len(x)):
image = io.imread(x[i])
image_augmented = data_augmentation(image)
x_input.append(image_augmented)
return np.asarray(x_input)
def load_data(dataset_path):
'''
You can obtain the dataset here: https://www.nature.com/articles/srep27988
'''
# Nature images here
folders = glob.glob(dataset_path + '/*')
N_CAT_TOT = len(folders)
x = []
y = []
n_cat = 0
for folder in folders:
image_paths = glob.glob(folder + '/*.tif')
for img in image_paths:
x.append(img)
y.append(n_cat)
n_cat += 1
x, y = np.asarray(x), np.asarray(y)
x, y = shuffle(x, y, random_state=666)
return x, y
def data_augmentation(img):
a = random.randint(0,1)
b = random.randint(0,3)
image_a = [img, img[::-1,:,:]]