How to specify the correlation coefficient as the loss function in keras
The following code is an implementation of correlation coefficient in tensorflow version 2.0
import tensorflow as tf
def correlation(x, y):
mx = tf.math.reduce_mean(x)
my = tf.math.reduce_mean(y)
xm, ym = x-mx, y-my
r_num = tf.math.reduce_mean(tf.multiply(xm,ym))
r_den = tf.math.reduce_std(xm) * tf.math.reduce_std(ym)
return r_num / r_den
It returns the same result as numpy's corrcoef
function.
@Trifon's answer is correct if you have all your data available at the same time. The below code implements Pearson Correlation as a Keras metric which allows you to get the metric using batch inputs as is typically done during DNN training/eval:
class PearsonCorrelation(tf.keras.metrics.Metric):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.cov = tf.metrics.Sum()
self.sq_yt = tf.metrics.Sum()
self.sq_yp = tf.metrics.Sum()
self.mean_yp = tf.metrics.Mean()
self.mean_yt = tf.metrics.Mean()
self.count = tf.metrics.Sum()
def update_state(self, y_true, y_pred, ):
''' Note y_pred are one-hot predictions, not probs/scores '''
self.cov(y_true * y_pred)
self.sq_yp(y_pred**2)
self.sq_yt(y_true**2)
self.mean_yp(y_pred)
self.mean_yt(y_true)
self.count(tf.reduce_sum(tf.shape(y_true)))
def result(self):
count = self.count.result()
mean_yp = self.mean_yp.result()
mean_yt = self.mean_yt.result()
numerator = (self.cov.result() - count * self.mean_yp.result() * self.mean_yt.result())
denominator = tf.sqrt(self.sq_yp.result() - count * mean_yp**2) * \
tf.sqrt(self.sq_yt.result() - count * mean_yt**2)
return numerator / denominator
def reset_states(self):
self.cov.reset_states()
self.sq_yt.reset_states()
self.sq_yp.reset_states()
self.mean_yp.reset_states()
self.mean_yt.reset_states()
self.count.reset_states()
According to keras documentation, you should pass the squared correlation coefficient as a function instead of the string 'mean_squared_error'
.
The function needs to receive 2 tensors (y_true, y_pred)
. You can look at keras source code for inspiration.
There is also a function tf.contrib.metrics.streaming_pearson_correlation
implemented on tensorflow. Just be careful on the order of the parameters, it should be something like this:
Update 1: initialize local variables according to this issue
import tensorflow as tf
def correlation_coefficient(y_true, y_pred):
pearson_r, update_op = tf.contrib.metrics.streaming_pearson_correlation(y_pred, y_true, name='pearson_r'
# find all variables created for this metric
metric_vars = [i for i in tf.local_variables() if 'pearson_r' in i.name.split('/')]
# Add metric variables to GLOBAL_VARIABLES collection.
# They will be initialized for new session.
for v in metric_vars:
tf.add_to_collection(tf.GraphKeys.GLOBAL_VARIABLES, v)
# force to update metric values
with tf.control_dependencies([update_op]):
pearson_r = tf.identity(pearson_r)
return 1-pearson_r**2
...
model.compile(loss=correlation_coefficient, optimizer='adam')
Update 2: even though you cannot use the scipy function directly, you can look at the implementation and port it to your code using keras backend.
Update 3: The tensorflow function as it is may not be differentiable, your loss function needs to be something like this: (Please check the math)
from keras import backend as K
def correlation_coefficient_loss(y_true, y_pred):
x = y_true
y = y_pred
mx = K.mean(x)
my = K.mean(y)
xm, ym = x-mx, y-my
r_num = K.sum(tf.multiply(xm,ym))
r_den = K.sqrt(tf.multiply(K.sum(K.square(xm)), K.sum(K.square(ym))))
r = r_num / r_den
r = K.maximum(K.minimum(r, 1.0), -1.0)
return 1 - K.square(r)
Update 4: The results are different on both functions, but correlation_coefficient_loss
gives the same results as scipy.stats.pearsonr
:
Here is the code to test it:
import tensorflow as tf
from keras import backend as K
import numpy as np
import scipy.stats
inputa = np.array([[3,1,2,3,4,5],
[1,2,3,4,5,6],
[1,2,3,4,5,6]])
inputb = np.array([[3,1,2,3,4,5],
[3,1,2,3,4,5],
[6,5,4,3,2,1]])
with tf.Session() as sess:
a = tf.placeholder(tf.float32, shape=[None])
b = tf.placeholder(tf.float32, shape=[None])
f1 = correlation_coefficient(a, b)
f2 = correlation_coefficient_loss(a, b)
sess.run(tf.global_variables_initializer())
for i in range(inputa.shape[0]):
f1_result, f2_result = sess.run([f1, f2], feed_dict={a: inputa[i], b: inputb[i]})
scipy_result =1- scipy.stats.pearsonr(inputa[i], inputb[i])[0]**2
print("a: "+ str(inputa[i]) + " b: " + str(inputb[i]))
print("correlation_coefficient: " + str(f1_result))
print("correlation_coefficient_loss: " + str(f2_result))
print("scipy.stats.pearsonr:" + str(scipy_result))
Results:
a: [3 1 2 3 4 5] b: [3 1 2 3 4 5]
correlation_coefficient: -2.38419e-07
correlation_coefficient_loss: 0.0
scipy.stats.pearsonr:0.0
a: [1 2 3 4 5 6] b: [3 1 2 3 4 5]
correlation_coefficient: 0.292036
correlation_coefficient_loss: 0.428571
scipy.stats.pearsonr:0.428571428571
a: [1 2 3 4 5 6] b: [6 5 4 3 2 1]
correlation_coefficient: 0.994918
correlation_coefficient_loss: 0.0
scipy.stats.pearsonr:0.0