Merging multiple adjacent rectangles into one polygon

If any one else comes by this, I've also implemented this algorithm based off of the answer from mmgp in JavaScript. If anyone else is trying to do this in JS, you're probably better off using mine than having to figure out the little issues with porting this yourself from the other examples.

I have it open sourced as part of a larger project here - https://github.com/Crabcyborg/crabcyborg/blob/e282aff3a033fba76bba1a7c924f04e6df7b3dc4/shapeup/svg-helper.js#L199

Since it's JavaScript, I can demo it live. It's used in the "polygon per color" example on https://crabcyb.org/experiment/shapeup-svg/

const pointsToPolygons = (points, size) => {
    let edges_v = {}, edges_h = {};
    const setEdges = (edges, cmp, e) => {
        points.sort(cmp);
        let edge_index = 0;
        const length = points.length;
        while(edge_index < length) {
            const curr = points[edge_index][e];
            do {
                edges[points[edge_index]] = points[edge_index+1];
                edges[points[edge_index+1]] = points[edge_index];
                edge_index += 2
            } while(edge_index < length && points[edge_index][e] == curr);
        }
    };
    setEdges(edges_v, xThenY, 0);
    setEdges(edges_h, yThenX, 1);
    
    let polygon = [], keys;
    while((keys = Object.keys(edges_h)).length) {
        const [ key ] = keys;
        delete edges_h[key];
        
        const first_vertex = new V2(key);
        let previous = [first_vertex.toArray(), 0];
        let vertices = [first_vertex];

        while(1) {
            const [edge_index, edge] = previous;
            const edges = [edges_v, edges_h][edge];
            const next_vertex = new V2(edges[edge_index]);
            const next = [next_vertex.toArray(), 1-edge];
            delete edges[edge_index];

            if(first_vertex.compare(next_vertex)) {
                break;
            }

            vertices.push(next_vertex);
            previous = next;
        }

        let scaled_vertices = [];
        for(let vertex of vertices) {
            scaled_vertices.push(vertex.scale(size).toArray());

            const edge_index = vertex.toArray();
            delete edges_v[edge_index];
            delete edges_h[edge_index];
        }

        polygon.push(scaled_vertices);
    }

    return polygon;
};

function V2(x,y) {
    if(Array.isArray(x)) {
        y = x[1];
        x = x[0];
    } else {
        switch(typeof x) {
            case 'object': 
                y = x.y;
                x = x.x;
            break;

            case 'string':
                const split = x.split(',');
                x = parseInt(split[0]);
                y = parseInt(split[1]);
            break;
        }
    }

    this.x = x;
    this.y = y;
}

V2.prototype = {
    scale: function(scale) {
        return new V2(this.x * scale, this.y * scale);
    },
    compare: function(v) {
        return this.x == v.x && this.y == v.y;
    },
    toArray: function() {
        return [this.x, this.y];
    }
};

const xThenY = (a,b) => a[0]<b[0] || (a[0]==b[0] && a[1]<b[1]) ? -1 : 1;
const yThenX = (a,b) => a[1]<b[1] || (a[1]==b[1] && a[0]<b[0]) ? -1 : 1;

Here is my solution:

RectUnion.php

<?php 

class RectUnion {
    private $x, $y;
    private $sides;
    private $points;

    function __construct() {
        $this->x = array();
        $this->y = array();
        $this->sides = array();
        $this->points = array();
    }

    function addRect($r) {
        extract($r);
        $this->x[] = $x1;
        $this->x[] = $x2;
        $this->y[] = $y1;
        $this->y[] = $y2;
        if ($x1 > $x2) { $tmp = $x1; $x1 = $x2; $x2 = $tmp; }
        if ($y1 > $y2) { $tmp = $y1; $y1 = $y2; $y2 = $tmp; }
        $this->sides[] = array($x1, $y1, $x2, $y1);
        $this->sides[] = array($x2, $y1, $x2, $y2);
        $this->sides[] = array($x1, $y2, $x2, $y2);
        $this->sides[] = array($x1, $y1, $x1, $y2);
    }

    function splitSides() {
       $result = array();
       $this->x = array_unique($this->x);
       $this->y = array_unique($this->y);
       sort($this->x);
       sort($this->y);
       foreach ($this->sides as $i => $s) {
           if ($s[0] - $s[2]) {     // Horizontal
               foreach ($this->x as $xx) {
                   if (($xx > $s[0]) && ($xx < $s[2])) {
                       $result[] = array($s[0], $s[1], $xx, $s[3]);
                       $s[0] = $xx;
                   }
               }
           } else {                 // Vertical
               foreach ($this->y as $yy) {
                   if (($yy > $s[1]) && ($yy < $s[3])) {
                       $result[] = array($s[0], $s[1], $s[2], $yy);
                       $s[1] = $yy;
                   }
               }
           }
           $result[] = $s;
       }
       return($result);
    }

    function removeDuplicates($sides) {
        $x = array();
        foreach ($sides as $i => $s) {
            @$x[$s[0].','.$s[1].','.$s[2].','.$s[3]]++;
        }
        foreach ($x as $s => $n) {
            if ($n > 1) {
              unset($x[$s]);
            } else {
              $this->points[] = explode(",", $s);
            }
        }
        return($x);
    }

    function drawPoints($points, $outfile = null) {
        $xs = $this->x[count($this->x) - 1] + $this->x[0];
        $ys = $this->y[count($this->y) - 1] + $this->y[0];
        $img = imagecreate($xs, $ys);
        if ($img !== FALSE) {
            $wht = imagecolorallocate($img, 255, 255, 255);
            $blk = imagecolorallocate($img, 0, 0, 0);
            $red = imagecolorallocate($img, 255, 0, 0);
            imagerectangle($img, 0, 0, $xs - 1, $ys - 1, $red);
            $oldp = $points[0];
            for ($i = 1; $i < count($points); $i++) {
                $p = $points[$i];
                imageline($img, $oldp['x'], $oldp['y'], $p['x'], $p['y'], $blk);
                $oldp = $p;
            }
            imageline($img, $oldp['x'], $oldp['y'], $points[0]['x'], $points[0]['y'], $blk);
            if ($outfile == null) header("content-type: image/png");
            imagepng($img, $outfile);
            imagedestroy($img);
        }
    }

    function drawSides($sides, $outfile = null) {
        $xs = $this->x[count($this->x) - 1] + $this->x[0];
        $ys = $this->y[count($this->y) - 1] + $this->y[0];
        $img = imagecreate($xs, $ys);
        if ($img !== FALSE) {
            $wht = imagecolorallocate($img, 255, 255, 255);
            $blk = imagecolorallocate($img, 0, 0, 0);
            $red = imagecolorallocate($img, 255, 0, 0);
            imagerectangle($img, 0, 0, $xs - 1, $ys - 1, $red);
            foreach ($sides as $s => $n) {
                if (is_array($n)) {
                    $r = $n;
                } else {
                    $r = explode(",", $s);
                }
                imageline($img, $r['x1'], $r['y1'], $r['x2'], $r['y2'], $blk);
            }
            if ($outfile == null) header("content-type: image/png");
            imagepng($img, $outfile);
            imagedestroy($img);
        }
    }

    function getSides($sides = FALSE) {
        if ($sides === FALSE) {
            foreach ($this->sides as $r) {
                $result[] = array('x1' => $r[0], 'y1' => $r[1], 'x2' => $r[2], 'y2' => $r[3]);
            }
        } else {
            $result = array();
            foreach ($sides as $s => $n) {
                $r = explode(",", $s);
                $result[] = array('x1' => $r[0], 'y1' => $r[1], 'x2' => $r[2], 'y2' => $r[3]);
            }
        }
        return($result);
    }

    private function _nextPoint(&$points, $lastpt) {
        @extract($lastpt);
        foreach ($points as $i => $p) {
            if (($p[0] == $x) && ($p[1] == $y)) {
                unset($points[$i]);
                return(array('x' => $p[2], 'y' => $p[3]));
            } else if (($p[2] == $x) && ($p[3] == $y)) {
                unset($points[$i]);
                return(array('x' => $p[0], 'y' => $p[1]));
            }
        }
        return false;
    }

    function getPoints($points = FALSE) {
        if ($points === FALSE) $points = $this->points;
        $result = array(
            array('x' => $points[0][0], 'y' => $points[0][1])
        );
        $lastpt = array('x' => $points[0][2], 'y' => $points[0][3]);
        unset($points[0]);
        do {
            $result[] = $lastpt;
        } while ($lastpt = $this->_nextPoint($points, $lastpt));
        return($result);
    }
}

?>

merge.php

<?php

require_once("RectUnion.php");

function generateRect($prev, $step) {
    $rect = array(
        'x1' => $prev['x2'],
        'x2' => $prev['x2'] + rand($step, $step * 10),
        'y1' => rand($prev['y1'] + 2, $prev['y2'] - 2),
        'y2' => rand($step * 2, $step * 10)
    );
    return($rect);
}

$x0 = 50;       // Pixels
$y0 = 50;       // Pixels
$step = 20;     // Pixels
$nrect = 10;    // Number of rectangles
$rects = array(
    array("x1" => 50, "y1" => 50, "x2" => 100, "y2" => 100)
);
for ($i = 1; $i < $nrect - 1; $i++) {
    $rects[$i] = generateRect($rects[$i - 1], $step);
}

$start_tm = microtime(true);

$ru = new RectUnion();
foreach ($rects as $r) {
    $ru->addRect($r);
}
$union = $ru->removeDuplicates($ru->splitSides());

$stop_tm = microtime(true);

$ru->drawSides($ru->getSides(), "before.png");

if (FALSE) {    // Lines
    $sides = $ru->getSides($union);
    $ru->drawSides($sides, "after.png");
} else {        // Points
    $points = $ru->getPoints();
    $ru->drawPoints($points, "after.png");
}

?>
<!DOCTYPE html>
<html>
    <body>
        <fieldset>
            <legend>Before Union</legend>
            <img src='before.png'>
        </fieldset>
        <fieldset>
            <legend>After Union</legend>
            <img src='after.png'>
        </fieldset>
        <h4>Elapsed Time: <?= round($stop_tm - $start_tm, 4) ?> seconds</h4>
        <?php if (isset($sides)): ?>
        <h4>Sides:</h4>
        <pre><?= print_r($sides, true) ?></pre>
        <?php elseif (isset($points)): ?>
        <h4>Points:</h4>
        <pre><?= print_r($points, true) ?></pre>
        <?php endif ?>
    </body>
</html>

How does it work?

The script identifies and removes all "overlapping" segments. For example:
example

First, the sides of each rectangle are split at the intersections with the sides of the adiacent rectangle.
For example, consider the B2-B3 side of the B rectangle: the "splitSides" method splits it into the B2-D1, D1-D4 and D4-B3 segments.
Then the "removeDuplicates" method removes all the overlapping (duplicate) segments.
For example, the D1-D4 segment is a duplicate, since it appears either in the B rectangle and in the D rectangle.
Finally the "getSides" method returns the list of the remaining segments, while the "getPoints" method returns the list of the polygon points.
The "draw" methods are only for the graphical representation of the result, and require the GD extension to work:
result

About performance

Here are some execution times:

  • 10 rectangles: 0,003 seconds
  • 100 rectangles: 0,220 seconds
  • 1000 rectangles: 4,407 seconds
  • 2000 rectangles: 13,448 seconds

By profiling the execution with XDebug, I've got the following results:

cachegrind


O'Rourke has studied a problem that is related to this one (along many others that relate to Computational Geometry) and as a consequence, produced a very beautiful method to efficiently solve it. His method is described in the paper Uniqueness of orthogonal connect-the-dots and is also clearly illustrated at http://www.cs.mcgill.ca/~cs644/Godfried/2005/Fall/sdroui9/p0_introduction.html. Note that it says that the polygon shouldn't share vertices in order to apply this method, but this happens very often in the problem we are discussing here. Thus, all we need to do is to eliminate vertices that are shared. Note that this still produces a polygon, and it is the polygon that is wanted as result. Also observe that the list of rectangles must not contain duplicates (I will assume that is the case, otherwise preprocess it to make the list unique).

I've used Python to code it, and if there is any doubt about its meaning, feel free to ask. Here is an overview of the implementation. We start with a list of rectangles described according to OP's notation. Then we obtain the four vertices of each rectangle, discarding shared vertices along the way. This is efficiently achieved using a set. Now we simply apply the algorithm mentioned. Note that I use two hash tables, edges_h (for horizontal edges) and edges_v (for vertical edges), to store the polygon edges. These hash tables effectively work as an undirected graph. Thus, after all the edges are obtained it is easy and fast to obtain the ordered vertices of the polygon. Pick any (key, value) from the hash table edges_h, for example. Now, the next ordered vertex is the one given by edges_v[value] = next_value, and the next one by edges_h[next_value] and so on. Repeat this process till we hit the first chosen vertex, and it is done.

A quick view into the mentioned algorithm is:

  1. Sort points by lowest x, lowest y
  2. Go through each column and create edges between the vertices 2i and 2i + 1 in that column
  3. Sort points by lowest y, lowest x
  4. Go through each row and create edges between the vertices 2i and 2i + 1 in that row.
# These rectangles resemble the OP's illustration.
rect = ([[0,  10], [10, 0]],
        [[10, 13], [19, 0]],
        [[19, 10], [23, 0]])

points = set()
for (x1, y1), (x2, y2) in rect:
    for pt in ((x1, y1), (x2, y1), (x2, y2), (x1, y2)):
        if pt in points: # Shared vertice, remove it.
            points.remove(pt)
        else:
            points.add(pt)
points = list(points)

def y_then_x(a, b):
    if a[1] < b[1] or (a[1] == b[1] and a[0] < b[0]):
        return -1
    elif a == b:
        return 0
    else:
        return 1

sort_x = sorted(points)
sort_y = sorted(points, cmp=y_then_x)

edges_h = {}
edges_v = {}

i = 0
while i < len(points):
    curr_y = sort_y[i][1]
    while i < len(points) and sort_y[i][1] == curr_y: //6chars comments, remove it
        edges_h[sort_y[i]] = sort_y[i + 1]
        edges_h[sort_y[i + 1]] = sort_y[i]
        i += 2
i = 0
while i < len(points):
    curr_x = sort_x[i][0]
    while i < len(points) and sort_x[i][0] == curr_x:
        edges_v[sort_x[i]] = sort_x[i + 1]
        edges_v[sort_x[i + 1]] = sort_x[i]
        i += 2

# Get all the polygons.
p = []
while edges_h:
    # We can start with any point.
    polygon = [(edges_h.popitem()[0], 0)]
    while True:
        curr, e = polygon[-1]
        if e == 0:
            next_vertex = edges_v.pop(curr)
            polygon.append((next_vertex, 1))
        else:
            next_vertex = edges_h.pop(curr)
            polygon.append((next_vertex, 0))
        if polygon[-1] == polygon[0]:
            # Closed polygon
            polygon.pop()
            break
    # Remove implementation-markers from the polygon.
    poly = [point for point, _ in polygon]
    for vertex in poly:
        if vertex in edges_h: edges_h.pop(vertex)
        if vertex in edges_v: edges_v.pop(vertex)

    p.append(poly)


for poly in p:
    print poly

the result is a list of ordered vertices for the polygon:

[(0, 0), (0, 10), (10, 10), (10, 13), (19, 13), (19, 10), (23, 10), (23, 0)]

We can also experiment with a more complicated layout:

rect = ([[1, 2], [3, 1]], [[1, 4], [2, 2]], [[1, 6], [2, 4]], [[2, 6], [3, 5]],
        [[3, 8], [4, 4]], [[2, 8], [3, 7]], [[3, 10], [5, 8]], [[3, 4], [9, 3]],
        [[4, 5], [7, 4]], [[6, 8], [7, 5]], [[6, 9], [8, 8]], [[8, 9], [10, 6]],
        [[9, 6], [10, 3]])

which is represented as the following set of rectangles:

enter image description here

and the method produces the following lists:

[(6, 9), (6, 5), (4, 5), (4, 8), (5, 8), (5, 10), (3, 10), (3, 8),
 (2, 8), (2, 7), (3, 7), (3, 6), (1, 6), (1, 1), (3, 1), (3, 2),
 (2, 2), (2, 5), (3, 5), (3, 3), (10, 3), (10, 9)]

[(9, 4), (9, 6), (8, 6), (8, 8), (7, 8), (7, 4)]

which, if drawn, represents the polygons in blue and red, respectively, as in:

enter image description here

As simple benchmarks go:

  • 1000 rectangles: ~ 0.04 seconds
  • 10000 rectangles: ~ 0.62 seconds
  • 100000 rectangles: ~ 8.68 seconds

These timings are simply the average of 10 runs on a busy outdated machine. Rectangles were randomly generated.

EDIT:

Implementation in PHP if needed.