python: distplot with multiple distributions
The important thing is to sort the dataframe by values where target
is 0
, 1
, or 2
.
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
import seaborn as sns
iris = load_iris()
iris = pd.DataFrame(data=np.c_[iris['data'], iris['target']],
columns=iris['feature_names'] + ['target'])
# Sort the dataframe by target
target_0 = iris.loc[iris['target'] == 0]
target_1 = iris.loc[iris['target'] == 1]
target_2 = iris.loc[iris['target'] == 2]
sns.distplot(target_0[['sepal length (cm)']], hist=False, rug=True)
sns.distplot(target_1[['sepal length (cm)']], hist=False, rug=True)
sns.distplot(target_2[['sepal length (cm)']], hist=False, rug=True)
sns.plt.show()
The output looks like:
If you don't know how many values target
may have, find the unique values in the target
column, then slice the dataframe and add to the plot appropriately.
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
import seaborn as sns
iris = load_iris()
iris = pd.DataFrame(data=np.c_[iris['data'], iris['target']],
columns=iris['feature_names'] + ['target'])
unique_vals = iris['target'].unique() # [0, 1, 2]
# Sort the dataframe by target
# Use a list comprehension to create list of sliced dataframes
targets = [iris.loc[iris['target'] == val] for val in unique_vals]
# Iterate through list and plot the sliced dataframe
for target in targets:
sns.distplot(target[['sepal length (cm)']], hist=False, rug=True)
sns.plt.show()
A more common approach for this type of problems is to recast your data into long format using melt, and then let map do the rest.
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
import seaborn as sns
iris = load_iris()
iris = pd.DataFrame(data=np.c_[iris['data'], iris['target']],
columns=iris['feature_names'] + ['target'])
# recast into long format
df = iris.melt(['target'], var_name='cols', value_name='vals')
df.head()
target cols vals
0 0.0 sepal length (cm) 5.1
1 0.0 sepal length (cm) 4.9
2 0.0 sepal length (cm) 4.7
3 0.0 sepal length (cm) 4.6
4 0.0 sepal length (cm) 5.0
You can now plot simply by creating a FacetGrid and using map:
g = sns.FacetGrid(df, col='cols', hue="target", palette="Set1")
g = (g.map(sns.distplot, "vals", hist=False, rug=True))
I have found a simpler solution using FacetGrid on https://github.com/mwaskom/seaborn/issues/861 by citynorman:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
iris = load_iris()
iris = pd.DataFrame(data= np.c_[iris['data'], iris['target']],columns= iris['feature_names'] + ['target'])
g = sns.FacetGrid(iris, hue="target")
g = g.map(sns.distplot, "sepal length (cm)", hist=False, rug=True)