stacked bar plot using matplotlib
I found this such a pain that I wrote a function to do it. I'm sharing it in the hope that others find it useful:
import numpy as np
import matplotlib.pyplot as plt
def plot_stacked_bar(data, series_labels, category_labels=None,
show_values=False, value_format="{}", y_label=None,
colors=None, grid=True, reverse=False):
"""Plots a stacked bar chart with the data and labels provided.
Keyword arguments:
data -- 2-dimensional numpy array or nested list
containing data for each series in rows
series_labels -- list of series labels (these appear in
the legend)
category_labels -- list of category labels (these appear
on the x-axis)
show_values -- If True then numeric value labels will
be shown on each bar
value_format -- Format string for numeric value labels
(default is "{}")
y_label -- Label for y-axis (str)
colors -- List of color labels
grid -- If True display grid
reverse -- If True reverse the order that the
series are displayed (left-to-right
or right-to-left)
"""
ny = len(data[0])
ind = list(range(ny))
axes = []
cum_size = np.zeros(ny)
data = np.array(data)
if reverse:
data = np.flip(data, axis=1)
category_labels = reversed(category_labels)
for i, row_data in enumerate(data):
color = colors[i] if colors is not None else None
axes.append(plt.bar(ind, row_data, bottom=cum_size,
label=series_labels[i], color=color))
cum_size += row_data
if category_labels:
plt.xticks(ind, category_labels)
if y_label:
plt.ylabel(y_label)
plt.legend()
if grid:
plt.grid()
if show_values:
for axis in axes:
for bar in axis:
w, h = bar.get_width(), bar.get_height()
plt.text(bar.get_x() + w/2, bar.get_y() + h/2,
value_format.format(h), ha="center",
va="center")
Example:
plt.figure(figsize=(6, 4))
series_labels = ['Series 1', 'Series 2']
data = [
[0.2, 0.3, 0.35, 0.3],
[0.8, 0.7, 0.6, 0.5]
]
category_labels = ['Cat A', 'Cat B', 'Cat C', 'Cat D']
plot_stacked_bar(
data,
series_labels,
category_labels=category_labels,
show_values=True,
value_format="{:.1f}",
colors=['tab:orange', 'tab:green'],
y_label="Quantity (units)"
)
plt.savefig('bar.png')
plt.show()
You need the bottom
of each dataset to be the sum of all the datasets that came before. you may also need to convert the datasets to numpy arrays to add them together.
p1 = plt.bar(ind, dataset[1], width, color='r')
p2 = plt.bar(ind, dataset[2], width, bottom=dataset[1], color='b')
p3 = plt.bar(ind, dataset[3], width,
bottom=np.array(dataset[1])+np.array(dataset[2]), color='g')
p4 = plt.bar(ind, dataset[4], width,
bottom=np.array(dataset[1])+np.array(dataset[2])+np.array(dataset[3]),
color='c')
Alternatively, you could convert them to numpy arrays before you start plotting.
dataset1 = np.array(dataset[1])
dataset2 = np.array(dataset[2])
dataset3 = np.array(dataset[3])
dataset4 = np.array(dataset[4])
p1 = plt.bar(ind, dataset1, width, color='r')
p2 = plt.bar(ind, dataset2, width, bottom=dataset1, color='b')
p3 = plt.bar(ind, dataset3, width, bottom=dataset1+dataset2, color='g')
p4 = plt.bar(ind, dataset4, width, bottom=dataset1+dataset2+dataset3,
color='c')
Or finally if you want to avoid converting to numpy arrays, you could use a list comprehension:
p1 = plt.bar(ind, dataset[1], width, color='r')
p2 = plt.bar(ind, dataset[2], width, bottom=dataset[1], color='b')
p3 = plt.bar(ind, dataset[3], width,
bottom=[sum(x) for x in zip(dataset[1],dataset[2])], color='g')
p4 = plt.bar(ind, dataset[4], width,
bottom=[sum(x) for x in zip(dataset[1],dataset[2],dataset[3])],
color='c')