Overlapping probability of two normal distribution with scipy
You can use the answer suggested by @duhalme to get the intersect and then use this point to define the range of integral limits,
Where the code for this looks like,
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
norm.cdf(1.96)
def solve(m1,m2,std1,std2):
a = 1/(2*std1**2) - 1/(2*std2**2)
b = m2/(std2**2) - m1/(std1**2)
c = m1**2 /(2*std1**2) - m2**2 / (2*std2**2) - np.log(std2/std1)
return np.roots([a,b,c])
m1 = 2.5
std1 = 1.0
m2 = 5.0
std2 = 1.0
#Get point of intersect
result = solve(m1,m2,std1,std2)
#Get point on surface
x = np.linspace(-5,9,10000)
plot1=plt.plot(x,norm.pdf(x,m1,std1))
plot2=plt.plot(x,norm.pdf(x,m2,std2))
plot3=plt.plot(result,norm.pdf(result,m1,std1),'o')
#Plots integrated area
r = result[0]
olap = plt.fill_between(x[x>r], 0, norm.pdf(x[x>r],m1,std1),alpha=0.3)
olap = plt.fill_between(x[x<r], 0, norm.pdf(x[x<r],m2,std2),alpha=0.3)
# integrate
area = norm.cdf(r,m2,std2) + (1.-norm.cdf(r,m1,std1))
print("Area under curves ", area)
plt.show()
The cdf is used to obtain the integral of the Gaussian here, although symbolic version of the Gaussian could be defined and scipy.quad
employed (or something else). Alternatively, you could use a Monte Carlo method like this link (i.e. generate random numbers and reject any outside the range you want).
Ed's answer is great. However, I noticed that it does not work when there are two or infinite (completely overlapping) points of contact. Here is version of the code that handles these two cases as well.
If you also want to continue seeing the plots of the distributions, you can use Ed's code.
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
def solve(m1,m2,std1,std2):
a = 1./(2.*std1**2) - 1./(2.*std2**2)
b = m2/(std2**2) - m1/(std1**2)
c = m1**2 /(2*std1**2) - m2**2 / (2*std2**2) - np.log(std2/std1)
return np.roots([a,b,c])
m1 = 2.5
std1 = 1.0
m2 = 5.0
std2 = 1.0
result = solve(m1,m2,std1,std2)
# 'lower' and 'upper' represent the lower and upper bounds of the space within which we are computing the overlap
if(len(result)==0): # Completely non-overlapping
overlap = 0.0
elif(len(result)==1): # One point of contact
r = result[0]
if(m1>m2):
tm,ts=m2,std2
m2,std2=m1,std1
m1,std1=tm,ts
if(r<lower): # point of contact is less than the lower boundary. order: r-l-u
overlap = (norm.cdf(upper,m1,std1)-norm.cdf(lower,m1,std1))
elif(r<upper): # point of contact is more than the upper boundary. order: l-u-r
overlap = (norm.cdf(r,m2,std2)-norm.cdf(lower,m2,std2))+(norm.cdf(upper,m1,std1)-norm.cdf(r,m1,std1))
else: # point of contact is within the upper and lower boundaries. order: l-r-u
overlap = (norm.cdf(upper,m2,std2)-norm.cdf(lower,m2,std2))
elif(len(result)==2): # Two points of contact
r1 = result[0]
r2 = result[1]
if(r1>r2):
temp=r2
r2=r1
r1=temp
if(std1>std2):
tm,ts=m2,std2
m2,std2=m1,std1
m1,std1=tm,ts
if(r1<lower):
if(r2<lower): # order: r1-r2-l-u
overlap = (norm.cdf(upper,m1,std1)-norm.cdf(lower,m1,std1))
elif(r2<upper): # order: r1-l-r2-u
overlap = (norm.cdf(r2,m2,std2)-norm.cdf(lower,m2,std2))+(norm.cdf(upper,m1,std1)-norm.cdf(r2,m1,std1))
else: # order: r1-l-u-r2
overlap = (norm.cdf(upper,m2,std2)-norm.cdf(lower,m2,std2))
elif(r1<upper):
if(r2<upper): # order: l-r1-r2-u
print norm.cdf(r1,m1,std1), "-", norm.cdf(lower,m1,std1), "+", norm.cdf(r2,m2,std2), "-", norm.cdf(r1,m2,std2), "+", norm.cdf(upper,m1,std1), "-", norm.cdf(r2,m1,std1)
overlap = (norm.cdf(r1,m1,std1)-norm.cdf(lower,m1,std1))+(norm.cdf(r2,m2,std2)-norm.cdf(r1,m2,std2))+(norm.cdf(upper,m1,std1)-norm.cdf(r2,m1,std1))
else: # order: l-r1-u-r2
overlap = (norm.cdf(r1,m1,std1)-norm.cdf(lower,m1,std1))+(norm.cdf(upper,m2,std2)-norm.cdf(r1,m2,std2))
else: # l-u-r1-r2
overlap = (norm.cdf(upper,m1,std1)-norm.cdf(lower,m1,std1))
Starting Python 3.8
, the standard library provides the NormalDist
object as part of the statistics
module.
NormalDist
can be used to compute the overlapping coefficient (OVL) between two normal distributions via the NormalDist.overlap(other)
method which returns a value between 0.0 and 1.0 giving the overlapping area for two probability density functions:
from statistics import NormalDist
NormalDist(mu=2.5, sigma=1).overlap(NormalDist(mu=5.0, sigma=1))
# 0.2112995473337106