Plotting a large number of points using matplotlib and running out of memory
Just to extend on the accepted answer, it appears that "workaround" saving function no longer works as the signature of write_png
has changed. My workaround is as follows:
import numpy as np
from PIL import Image
def png_write(fig, filename):
width, height = map(int, fig.get_size_inches() * fig.get_dpi())
image = np.frombuffer(fig.canvas.tostring_argb(), dtype='uint8')
image = image.reshape(width, height, 4)
image = np.roll(image, -1, 2)
Image.fromarray(image, 'RGBA').save(filename)
Something like this (sorry for the long code, most of it is copied from the stardard axes.Axes.draw
):
from operator import itemgetter
class generator_scatter_axes(matplotlib.axes.Axes):
def __init__(self, *args, **kwargs):
matplotlib.axes.Axes.__init__(self, *args, **kwargs)
self._big_data = None
def draw(self, renderer=None, inframe=None):
# copied from original draw (so you can still add normal artists ect)
if renderer is None:
renderer = self._cachedRenderer
if renderer is None:
raise RuntimeError('No renderer defined')
if not self.get_visible():
return
renderer.open_group('axes')
locator = self.get_axes_locator()
if locator:
pos = locator(self, renderer)
self.apply_aspect(pos)
else:
self.apply_aspect()
artists = []
artists.extend(self.collections)
artists.extend(self.patches)
artists.extend(self.lines)
artists.extend(self.texts)
artists.extend(self.artists)
if self.axison and not inframe:
if self._axisbelow:
self.xaxis.set_zorder(0.5)
self.yaxis.set_zorder(0.5)
else:
self.xaxis.set_zorder(2.5)
self.yaxis.set_zorder(2.5)
artists.extend([self.xaxis, self.yaxis])
if not inframe:
artists.append(self.title)
artists.append(self._left_title)
artists.append(self._right_title)
artists.extend(self.tables)
if self.legend_ is not None:
artists.append(self.legend_)
# the frame draws the edges around the axes patch -- we
# decouple these so the patch can be in the background and the
# frame in the foreground.
if self.axison and self._frameon:
artists.extend(self.spines.itervalues())
if self.figure.canvas.is_saving():
dsu = [(a.zorder, a) for a in artists]
else:
dsu = [(a.zorder, a) for a in artists
if not a.get_animated()]
# add images to dsu if the backend support compositing.
# otherwise, does the manaul compositing without adding images to dsu.
if len(self.images) <= 1 or renderer.option_image_nocomposite():
dsu.extend([(im.zorder, im) for im in self.images])
_do_composite = False
else:
_do_composite = True
dsu.sort(key=itemgetter(0))
# rasterize artists with negative zorder
# if the minimum zorder is negative, start rasterization
rasterization_zorder = self._rasterization_zorder
if (rasterization_zorder is not None and
len(dsu) > 0 and dsu[0][0] < rasterization_zorder):
renderer.start_rasterizing()
dsu_rasterized = [l for l in dsu if l[0] < rasterization_zorder]
dsu = [l for l in dsu if l[0] >= rasterization_zorder]
else:
dsu_rasterized = []
# the patch draws the background rectangle -- the frame below
# will draw the edges
if self.axison and self._frameon:
self.patch.draw(renderer)
if _do_composite:
# make a composite image blending alpha
# list of (mimage.Image, ox, oy)
zorder_images = [(im.zorder, im) for im in self.images
if im.get_visible()]
zorder_images.sort(key=lambda x: x[0])
mag = renderer.get_image_magnification()
ims = [(im.make_image(mag), 0, 0, im.get_alpha()) for z, im in zorder_images]
l, b, r, t = self.bbox.extents
width = mag * ((round(r) + 0.5) - (round(l) - 0.5))
height = mag * ((round(t) + 0.5) - (round(b) - 0.5))
im = mimage.from_images(height,
width,
ims)
im.is_grayscale = False
l, b, w, h = self.bbox.bounds
# composite images need special args so they will not
# respect z-order for now
gc = renderer.new_gc()
gc.set_clip_rectangle(self.bbox)
gc.set_clip_path(mtransforms.TransformedPath(
self.patch.get_path(),
self.patch.get_transform()))
renderer.draw_image(gc, round(l), round(b), im)
gc.restore()
if dsu_rasterized:
for zorder, a in dsu_rasterized:
a.draw(renderer)
renderer.stop_rasterizing()
for zorder, a in dsu:
a.draw(renderer)
############################
# new bits
############################
if self._big_data is not None:
for x, y, z in self._big_data:
# add the (single point) to the axes
a = self.scatter(x, y, color='r',
alpha=1, s=10, marker='s', linewidth=0)
# add the point, in Agg this will render + composite
a.draw(renderer)
# remove the artist from the axes, shouldn't let the render know
a.remove()
# delete the artist for good measure
del a
#######################
# end new bits
#######################
# again, from original to clean up
renderer.close_group('axes')
self._cachedRenderer = renderer
use it like such:
In [42]: fig = figure()
In [43]: ax = generator_scatter_axes(fig, [.1, .1, .8, .8])
In [44]: fig.add_axes(ax)
Out[44]: <__main__.generator_scatter_axes at 0x56fe090>
In [45]: ax._big_data = rand(500, 3)
In [46]: draw()
I changed your scatter function to have shapes that are visible in small numbers. This will be very slow as you are setting up a scatter
object every time. I would either take sensible chunks of your data and plot those, or replace the call to scatter
to the underlying artist objects, or use Joe's suggestion and just update a single artist.
@tcaswell's suggestion to override the Axes.draw
method is definitely the most flexible way to approach this.
However, you can use/abuse blitting to do this without subclassing Axes
. Just use draw_artist
each time without restoring the canvas.
There's one additional trick: We need to have a special save
method, as all of the others draw the canvas before saving, which will wipe out everything we've drawn on it previously.
Also, as tcaswell notes, calling draw_artist
for every item is rather slow, so for a large number of points, you'll want to chunk your input data. Chunking will give a significant speedup, but this method is always going to be slower than drawing a single PathCollection
.
At any rate, either one of these answers should alleviate your memory problems. Here's a simplistic example.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import _png
from itertools import izip
def main():
# We'll be saving the figure's background, so let's make it transparent.
fig, ax = plt.subplots(facecolor='none')
# You'll have to know the extent of the input beforehand with this method.
ax.axis([0, 10, 0, 10])
# We need to draw the canvas before we start adding points.
fig.canvas.draw()
# This won't actually ever be drawn. We just need an artist to update.
col = ax.scatter([5], [5], color=[0.1, 0.1, 0.1], alpha=0.3)
for xy, color in datastream(int(1e6), chunksize=int(1e4)):
col.set_offsets(xy)
col.set_color(color)
ax.draw_artist(col)
save(fig, 'test.png')
def datastream(n, chunksize=1):
"""Returns a generator over "n" random xy positions and rgb colors."""
for _ in xrange(n//chunksize):
xy = 10 * np.random.random((chunksize, 2))
color = np.random.random((chunksize, 3))
yield xy, color
def save(fig, filename):
"""We have to work around `fig.canvas.print_png`, etc calling `draw`."""
renderer = fig.canvas.renderer
with open(filename, 'w') as outfile:
_png.write_png(renderer._renderer.buffer_rgba(),
renderer.width, renderer.height,
outfile, fig.dpi)
main()
Also, you might notice that the top and left spines are getting drawn over. You could work around this by re-drawing those two spines (ax.draw_artist(ax.spines['top'])
, etc) before saving.