Encode numpy array using uncompressed RLE for COCO dataset
As an improvement of @waspinator 's answer. This is 35% faster.
def binary_mask_to_rle(binary_mask):
rle = {'counts': [], 'size': list(binary_mask.shape)}
counts = rle.get('counts')
last_elem = 0
running_length = 0
for i, elem in enumerate(binary_mask.ravel(order='F')):
if elem == last_elem:
pass
else:
counts.append(running_length)
running_length = 0
last_elem = elem
running_length += 1
counts.append(running_length)
return rle
Information on the format is available here: https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocotools/mask.py
RLE is a simple yet efficient format for storing binary masks. RLE first divides a vector (or vectorized image) into a series of piecewise constant regions and then for each piece simply stores the length of that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] (note that the odd counts are always the numbers of zeros).
import numpy as np
from itertools import groupby
def binary_mask_to_rle(binary_mask):
rle = {'counts': [], 'size': list(binary_mask.shape)}
counts = rle.get('counts')
for i, (value, elements) in enumerate(groupby(binary_mask.ravel(order='F'))):
if i == 0 and value == 1:
counts.append(0)
counts.append(len(list(elements)))
return rle
test_list_1 = np.array([0, 0, 1, 1, 1, 0, 1])
test_list_2 = np.array([1, 1, 1, 1, 1, 1, 0])
print(binary_mask_to_rle(test_list_1))
print(binary_mask_to_rle(test_list_2))
output:
{'counts': [2, 3, 1, 1], 'size': [7]}
{'counts': [0, 6, 1], 'size': [7]}
You can use mask.frPyObjects(rle, size_x, size_y)
to encode the RLE, and then do all the usual mask
operations.
import json
import numpy as np
from pycocotools import mask
from skimage import measure
ground_truth_binary_mask = np.array([[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
[ 0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
[ 0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
[ 0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
[ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=np.uint8)
fortran_ground_truth_binary_mask = np.asfortranarray(ground_truth_binary_mask)
encode the mask to RLE:
rle = binary_mask_to_rle(fortran_ground_truth_binary_mask)
print(rle)
output:
{'counts': [6, 1, 40, 4, 5, 4, 5, 4, 21], 'size': [9, 10]}
compress the RLE, and then decode:
compressed_rle = mask.frPyObjects(rle, rle.get('size')[0], rle.get('size')[1])
mask.decode(compressed_rle)
output:
array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
[0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
[0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
[0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=uint8)
In order to decode the binary masks encoded in the COCO annotations, you need to first use the COCO's API to get the RLE and then use opencv to get the contours like this:
# Import libraries
import numpy as np
import cv2
import json
import mask
# Read the annotations
file_path = "coco/annotations/stuff_annotations_trainval2017/stuff_train2017.json"
with open(file_path, 'r') as f:
data = json.load(f)
updated_data = []
# For each annotation
for annotation in data['annotations']:
# Initialize variables
obj = {}
segmentation = []
segmentation_polygons = []
# Decode the binary mask
mask_list = mask.decode(annotation['segmentation'])
mask_list = np.ascontiguousarray(mask_list, dtype=np.uint8)
mask_new, contours, hierarchy = cv2.findContours((mask_list).astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# Get the contours
for contour in contours:
contour = contour.flatten().tolist()
segmentation.append(contour)
if len(contour) > 4:
segmentation.append(contour)
if len(segmentation) == 0:
continue
# Get the polygons as (x, y) coordinates
for i, segment in enumerate(segmentation):
poligon = []
poligons = []
for j in range(len(segment)):
poligon.append(segment[j])
if (j+1)%2 == 0:
poligons.append(poligon)
poligon = []
segmentation_polygons.append(poligons)
# Save the segmentation and polygons for the current annotation
obj['segmentation'] = segmentation
obj['segmentation_polygons'] = segmentation_polygons
updated_data.append(obj)
Note: Only the COCO stuff 2017
annotations are using binary masks, the COCO person 2017
annotations aren't, so you don't need to decode the latter and find their contours.
Inspired by this solution.