Sort an array by the "Levenshtein Distance" with best performance in Javascript

Updated: http://jsperf.com/levenshtein-distance/5

The new Revision annihilates all other benchmarks. I was specifically chasing Chromium/Firefox performance as I don't have an IE8/9/10 test environment, but the optimisations made should apply in general to most browsers.

Levenshtein Distance

The matrix to perform Levenshtein Distance can be reused again and again. This was an obvious target for optimisation (but be careful, this now imposes a limit on string length (unless you were to resize the matrix dynamically)).

The only option for optimisation not pursued in jsPerf Revision 5 is memoisation. Depending on your use of Levenshtein Distance, this could help drastically but was omitted due to its implementation specific nature.

// Cache the matrix. Note this implementation is limited to
// strings of 64 char or less. This could be altered to update
// dynamically, or a larger value could be used.
var matrix = [];
for (var i = 0; i < 64; i++) {
    matrix[i] = [i];
    matrix[i].length = 64;
}
for (var i = 0; i < 64; i++) {
    matrix[0][i] = i;
}

// Functional implementation of Levenshtein Distance.
String.levenshteinDistance = function(__this, that, limit) {
    var thisLength = __this.length, thatLength = that.length;

    if (Math.abs(thisLength - thatLength) > (limit || 32)) return limit || 32;
    if (thisLength === 0) return thatLength;
    if (thatLength === 0) return thisLength;

    // Calculate matrix.
    var this_i, that_j, cost, min, t;
    for (i = 1; i <= thisLength; ++i) {
        this_i = __this[i-1];

        for (j = 1; j <= thatLength; ++j) {
            // Check the jagged ld total so far
            if (i === j && matrix[i][j] > 4) return thisLength;

            that_j = that[j-1];
            cost = (this_i === that_j) ? 0 : 1;  // Chars already match, no ++op to count.
            // Calculate the minimum (much faster than Math.min(...)).
            min    = matrix[i - 1][j    ] + 1;                      // Deletion.
            if ((t = matrix[i    ][j - 1] + 1   ) < min) min = t;   // Insertion.
            if ((t = matrix[i - 1][j - 1] + cost) < min) min = t;   // Substitution.

            matrix[i][j] = min; // Update matrix.
        }
    }

    return matrix[thisLength][thatLength];
};

Damerau-Levenshtein Distance

jsperf.com/damerau-levenshtein-distance

Damerau-Levenshtein Distance is a small modification to Levenshtein Distance to include transpositions. There is very little to optimise.

// Damerau transposition.
if (i > 1 && j > 1 && this_i === that[j-2] && this[i-2] === that_j
&& (t = matrix[i-2][j-2]+cost) < matrix[i][j]) matrix[i][j] = t;

Sorting Algorithm

The second part of this answer is to choose an appropriate sort function. I will upload optimised sort functions to http://jsperf.com/sort soon.


I came to this solution:

var levenshtein = (function() {
        var row2 = [];
        return function(s1, s2) {
            if (s1 === s2) {
                return 0;
            } else {
                var s1_len = s1.length, s2_len = s2.length;
                if (s1_len && s2_len) {
                    var i1 = 0, i2 = 0, a, b, c, c2, row = row2;
                    while (i1 < s1_len)
                        row[i1] = ++i1;
                    while (i2 < s2_len) {
                        c2 = s2.charCodeAt(i2);
                        a = i2;
                        ++i2;
                        b = i2;
                        for (i1 = 0; i1 < s1_len; ++i1) {
                            c = a + (s1.charCodeAt(i1) === c2 ? 0 : 1);
                            a = row[i1];
                            b = b < a ? (b < c ? b + 1 : c) : (a < c ? a + 1 : c);
                            row[i1] = b;
                        }
                    }
                    return b;
                } else {
                    return s1_len + s2_len;
                }
            }
        };
})();

See also http://jsperf.com/levenshtein-distance/12

Most speed was gained by eliminating some array usages.


I wrote an inline spell checker a few years ago and implemented a Levenshtein algorithm - since it was inline and for IE8 I did quite a lot of performance optimisation.

var levDist = function(s, t) {
    var d = []; //2d matrix

    // Step 1
    var n = s.length;
    var m = t.length;

    if (n == 0) return m;
    if (m == 0) return n;

    //Create an array of arrays in javascript (a descending loop is quicker)
    for (var i = n; i >= 0; i--) d[i] = [];

    // Step 2
    for (var i = n; i >= 0; i--) d[i][0] = i;
    for (var j = m; j >= 0; j--) d[0][j] = j;

    // Step 3
    for (var i = 1; i <= n; i++) {
        var s_i = s.charAt(i - 1);

        // Step 4
        for (var j = 1; j <= m; j++) {

            //Check the jagged ld total so far
            if (i == j && d[i][j] > 4) return n;

            var t_j = t.charAt(j - 1);
            var cost = (s_i == t_j) ? 0 : 1; // Step 5

            //Calculate the minimum
            var mi = d[i - 1][j] + 1;
            var b = d[i][j - 1] + 1;
            var c = d[i - 1][j - 1] + cost;

            if (b < mi) mi = b;
            if (c < mi) mi = c;

            d[i][j] = mi; // Step 6

            //Damerau transposition
            if (i > 1 && j > 1 && s_i == t.charAt(j - 2) && s.charAt(i - 2) == t_j) {
                d[i][j] = Math.min(d[i][j], d[i - 2][j - 2] + cost);
            }
        }
    }

    // Step 7
    return d[n][m];
}