Creating an in memory rasterio Dataset from numpy array
You need to recreate a DatasetReader
manually and you can use a MemoryFile
to avoid writing to disk.
You can re-use the metadata from the input raster in the DatasetReader
, but you'll need to modify the height
and width
properties and the transform
. From documentation:
After these resolution changing operations, the dataset’s resolution and the resolution components of its affine transform property no longer apply to the new arrays.
In the example below note that:
- I use a
contextmanager
so theDatasetReader
andMemoryFile
objects get cleaned up automatically. This is why I useyield
notreturn
in the function - I had to change the order of indexes in
raster.read
as arrays are(band, row, col)
order not(row, col, band)
like you used in your snippet.
# Example licensed under cc by-sa 3.0 with attribution required
from contextlib import contextmanager
import rasterio
from rasterio import Affine, MemoryFile
from rasterio.enums import Resampling
# use context manager so DatasetReader and MemoryFile get cleaned up automatically
@contextmanager
def resample_raster(raster, scale=2):
t = raster.transform
# rescale the metadata
transform = Affine(t.a / scale, t.b, t.c, t.d, t.e / scale, t.f)
height = raster.height * scale
width = raster.width * scale
profile = src.profile
profile.update(transform=transform, driver='GTiff', height=height, width=width)
data = raster.read( # Note changed order of indexes, arrays are band, row, col order not row, col, band
out_shape=(raster.count, height, width),
resampling=Resampling.bilinear,
)
with MemoryFile() as memfile:
with memfile.open(**profile) as dataset: # Open as DatasetWriter
dataset.write(data)
del data
with memfile.open() as dataset: # Reopen as DatasetReader
yield dataset # Note yield not return
with rasterio.open('path/to/raster') as src:
with resample_raster(src) as resampled:
print('Orig dims: {}, New dims: {}'.format(src.shape, resampled.shape))
print(repr(resampled))
Orig dims: (4103, 4682), New dims: (8206, 9364)
<open DatasetReader name='/vsimem/95befda0-2061-4294-982b-20e46f127066.' mode='r'>
I wrote a wrapper for rasterio.mask.mask
that accepts numpy arrays as inputs.
def mask_raster_with_geometry(raster, transform, shapes, **kwargs):
"""Wrapper for rasterio.mask.mask to allow for in-memory processing.
Docs: https://rasterio.readthedocs.io/en/latest/api/rasterio.mask.html
Args:
raster (numpy.ndarray): raster to be masked with dim: [H, W]
transform (affine.Affine): the transform of the raster
shapes, **kwargs: passed to rasterio.mask.mask
Returns:
masked: numpy.ndarray or numpy.ma.MaskedArray with dim: [H, W]
"""
with rasterio.io.MemoryFile() as memfile:
with memfile.open(
driver='GTiff',
height=raster.shape[0],
width=raster.shape[1],
count=1,
dtype=raster.dtype,
transform=transform,
) as dataset:
dataset.write(raster, 1)
with memfile.open() as dataset:
output, _ = rasterio.mask.mask(dataset, shapes, **kwargs)
return output.squeeze(0)
This is done via first writing to memfile
and then reading from it before the function call to rasterio.mask.mask()
. The MemoryFile
will be discarded after exiting the with
statement.
Unit test:
@pytest.mark.parametrize(
'raster,transform,shapes,expected',
[
pytest.param(
# the raster is positioned between x in (5, 8) and y in (-14, -10)
np.ones((4, 3), dtype=np.float32),
rasterio.transform.Affine(1, 0, 5, 0, -1, -10),
# mask out two pixels
# box params: xmin, ymin, xmax, ymax
[shapely.geometry.box(6.1, -12.9, 6.9, -11.1)],
np.array([
[0, 0, 0],
[0, 1, 0],
[0, 1, 0],
[0, 0, 0],
], dtype=np.float32),
),
],
)
def test_mask_raster_with_geometry(raster, transform, shapes, expected):
np.testing.assert_array_equal(
mask_raster_with_geometry(raster, transform, shapes),
expected)