Swipe Type Converter

JavaScript, ES6, Three Pass Optimizer, 112 187 235 240 241 243 and 231 234 passes

A three pass filter which first figures out critical keys in the keystroke sequence and then passes the sequence through the three filters:

  1. A loosely formed RegEx out of the critical keys and helping keys. This gives correct result for majority of the keys (around 150)
  2. A strict RegEx made out of just critical keys. This gives correct result for an extra 85 sequences
  3. A third filter to figure out ambiguity amongst close answers. This works for 40% ambiguous cases.

Code

keyboard = {
  x: {},
  y: ['  q      w      e      r      t      y      u      i      o      p',
      '    a      s      d      f      g      h      j      k      l',
      '        z      x      c      v      b      n      m'],
};
for (var i in keyboard.y)
  for (var y of keyboard.y[i])
    keyboard.x[y] = +i*7;
p = C => (x=keyboard.x[C],{x, y: keyboard.y[x/7].indexOf(C)})
angle = (L, C, R) => (
  p0 = p(L), p1 = p(C), p2 = p(R),
  a = Math.pow(p1.x-p0.x,2) + Math.pow(p1.y-p0.y,2),
  b = Math.pow(p1.x-p2.x,2) + Math.pow(p1.y-p2.y,2),
  c = Math.pow(p2.x-p0.x,2) + Math.pow(p2.y-p0.y,2),
  Math.acos((a+b-c) / Math.sqrt(4*a*b))/Math.PI*180
)
corner = (L, C, R, N, W) => {
  if (skip) {
    skip = false;
    return [];
  } 
  ngl = angle(L, C, R);
  if (ngl < 80) return [C + "{1,3}"]
  if (ngl < 115 && p(L).x != p(R).x && p(L).x != p(C) && p(R).x != p(C).x && Math.abs(p(L).y - p(R).y) < 5) return [C + "{0,3}"]
  if (ngl < 138) {
    if (N && Math.abs(ngl - angle(C, R, N)) < 6) {
      skip = true;
      return [L + "{0,3}", "([" + C + "]{0,3}|[" + R + "]{0,3})?", N + "{0,3}"]
    }
    return [C + "{0,3}"]
  }
  return ["([" + L + "]{0,3}|[" + C + "]{0,3}|[" + R + "]{0,3})?"]
}
f = S => {
  for (W = [S[0] + "{1,2}"],i = 1; i < S.length - 1; i++)
    W.push(...corner(S[i - 1], S[i], S[i + 1], S[i + 2], W))
  return [
    new RegExp("^" + W.join("") + S[S.length - 1] + "{1,3}$"),
    new RegExp("^" + W.filter(C=>!~C.indexOf("[")).join("") + S[S.length - 1] + "{1,3}$")
  ]
}
thirdPass = (F, C) => {
  if (!F[0]) return null
  F = F.filter((s,i)=>!F[i - 1] || F[i - 1] != s)
  FF = F.map(T=>[...T].filter((c,i)=>!T[i - 1] || T[i - 1] != c).join(""))
  if (FF.length == 1) return F[0];
  if (FF.length < 6 && FF[0][2] && FF[1][2] && FF[0][0] == FF[1][0] && FF[0][1] == FF[1][1])
    if (Math.abs(F[0].length - F[1].length) < 1)
      for (i=0;i<Math.min(F[0].length, FF[1].length);i++) {
        if (C.indexOf(FF[0][i]) < C.indexOf(FF[1][i])) return F[0]
        else if (C.indexOf(FF[0][i]) > C.indexOf(FF[1][i])) return F[1]
      }
  return F[0]
}
var skip = false;
SwiftKey = C => (
  C = [...C].filter((c,i)=>!C[i - 1] || C[i - 1] != c).join(""),
  skip = false, matched = [], secondPass = [], L = C.length, reg = f(C),
  words.forEach(W=>W.match(reg[0])&&matched.push(W)),
  words.forEach(W=>W.match(reg[1])&&secondPass.push(W)),
  matched = matched.sort((a,b)=>Math.abs(L-a.length)>Math.abs(L-b.length)),
  secondPass = secondPass.sort((a,b)=>Math.abs(L-a.length)>Math.abs(L-b.length)),
  first = matched[0], second = secondPass[0], third = thirdPass(secondPass.length? secondPass: matched, C),
  second && second.length >= first.length - 1? first != third ? third: second: third.length >= first.length ? third: first
)

// For use by js shell of latest firefox
print(SwiftKey(readline()));

The code assumes a variable called words is present which is an array of all the words from this page

See the code in action here

See the test cases in action here

Both the above link work only on a latest Firefox (33 and above) (due to ES6).


Ruby, Regex Solver - 30 140 176 180 182 187 and 179 183 passes

I'll figure out the score later. Here is very naive solution that doesn't take the keyboard layout into account:

words = File.readlines('wordlist').map(&:chomp)

swipe = ARGV.shift
puts words.select {|word| word[0] == swipe[0] &&
                          word[-1] == swipe[-1]}
          .select {|word|
              chars = [word[0]]
              (1..word.size-1).each {|i| chars << word[i] if word[i] != word[i-1]}
              swipe[Regexp.new('^'+chars.join('.*')+'$')]
          }.sort_by {|word| word.size}[-1]

It takes input from ARGV and prints the result. I'm just filtering the word list by first and last letter, and them I'm trying all of the remaining words against the input (eliminating duplicate letters and using a regex like ^g.*u.*e.*s$ for "guess") and return the first of those if there are multiple solutions.

Run it like

ruby regex-solver.rb cvhjioiugfde

Anyone else, feel free to reuse this step as a first filter - I believe it won't throw out any correct words, so this preliminary check can greatly reduce the search space for better algorithms.

Edit: Following the OPs suggestion, I'm now selecting the longest of the candidates, which seems to be a decent heuristic.

Also thanks to es1024 for reminding me about duplicate letters.


C++, Discrete Fréchet Distance - 201 220 222 232 and 232 passes

To me, the problem very much called for the Fréchet Distance which unfortunately is very hard to compute.

Just for fun, I've tried to approach the problem by implementing a discrete approximation described by Thomas Eiter and Heikki Mannila in Computing Discrete Fréchet Distance (1994).

At first, I'm using the same approach as the others for filtering all words in the list which are subsequences of the input (also accounting for multiple sequential occurances of the same character). Then, I'm filling the polygon curve from letter to letter of every word with intermediate points and match it against the input curve. Finally, I divide every distance by the length of the word and take the minimum score.

So far, the method doesn't work as well as I had hoped (It recognizes the code example as "chide"), but this could just be the result of bugs I haven't found yet. Else, another idea would be to use other variations of the fréchet distance ("average" instead of "maximum dog leash length").

Edit: Now, I'm using an approximation to the "average dog leash length". This means that I'm finding an ordered mapping between both paths which minimizes the sum of all distances and later divide it by the number of distances.

If the same character appears twice or more often in the dictionary word, I only put one node in the path.

#include<iostream>
#include<fstream>
#include<vector>
#include<map>
#include<algorithm>
#include<utility>
#include<cmath>

using namespace std;

const double RESOLUTION = 3.2;

double dist(const pair<double, double>& a, const pair<double, double>& b) {
    return sqrt((a.first - b.first) * (a.first - b.first) + (a.second - b.second) * (a.second - b.second));
}

double helper(const vector<pair<double, double> >& a,
        const vector<pair<double, double> >& b,
        vector<vector<double> >& dp,
        int i,
        int j) {
    if (dp[i][j] > -1)
        return dp[i][j];
    else if (i == 0 && j == 0)
        dp[i][j] = dist(a[0], b[0]);
    else if (i > 0 && j == 0)
        dp[i][j] = helper(a, b, dp, i - 1, 0) +
                   dist(a[i], b[0]);
    else if (i == 0 && j > 0)
        dp[i][j] = helper(a, b, dp, 0, j - 1) +
                   dist(a[0], b[j]);
    else if (i > 0 && j > 0)
        dp[i][j] = min(min(helper(a, b, dp, i - 1, j),
                           helper(a, b, dp, i - 1, j - 1)),
                       helper(a, b, dp, i, j - 1)) +
                   dist(a[i], b[j]);
    return dp[i][j];
}

double discretefrechet(const vector<pair<double, double> >& a, const vector<pair<double, double> >& b) {
    vector<vector<double> > dp = vector<vector<double> >(a.size(), vector<double>(b.size(), -1.));
    return helper(a, b, dp, a.size() - 1, b.size() - 1);
}

bool issubsequence(string& a, string& b) {
    // Accounts for repetitions of the same character: hallo subsequence of halo
    int i = 0, j = 0;
    while (i != a.size() && j != b.size()) {
        while (a[i] == b[j])
            ++i;
        ++j;
    }
    return (i == a.size());
}

int main() {
    string swipedword;
    cin >> swipedword;

    ifstream fin;
    fin.open("wordlist");
    map<string, double> candidatedistance = map<string, double>();
    string readword;
    while (fin >> readword) {
        if (issubsequence(readword, swipedword) && readword[0] == swipedword[0] && readword[readword.size() - 1] == swipedword[swipedword.size() - 1]) {
            candidatedistance[readword] = -1.;
        }
    }
    fin.close();

    ifstream fin2;
    fin2.open("keypos.csv");
    map<char, pair<double, double> > keypositions = map<char, pair<double, double> >();
    char rc, comma;
    double rx, ry;
    while (fin2 >> rc >> comma >> rx >> comma >> ry) {
        keypositions[rc] = pair<double, double>(rx, ry);
    }
    fin2.close();

    vector<pair<double, double> > swipedpath = vector<pair<double, double> >();
    for (int i = 0; i != swipedword.size(); ++i) {
        swipedpath.push_back(keypositions[swipedword[i]]);
    }

    for (map<string, double>::iterator thispair = candidatedistance.begin(); thispair != candidatedistance.end(); ++thispair) {
        string thisword = thispair->first;
        vector<pair<double, double> > thispath = vector<pair<double, double> >();
        for (int i = 0; i != thisword.size() - 1; ++i) {
            pair<double, double> linestart = keypositions[thisword[i]];
            pair<double, double> lineend = keypositions[thisword[i + 1]];
            double linelength = dist(linestart, lineend);
            if (thispath.empty() || linestart.first != thispath[thispath.size() - 1].first || linestart.second != thispath[thispath.size() - 1].second)
                thispath.push_back(linestart);
            int segmentnumber = linelength / RESOLUTION;
            for (int j = 1; j < segmentnumber; ++j) {
                thispath.push_back(pair<double, double>(linestart.first + (lineend.first - linestart.first) * ((double)j) / ((double)segmentnumber),
                                    linestart.second + (lineend.second - lineend.second) * ((double)j) / ((double)segmentnumber)));
            }
        }
        pair<double, double> lastpoint = keypositions[thisword[thisword.size() - 1]];
        if (thispath.empty() || lastpoint.first != thispath[thispath.size() - 1].first || lastpoint.second != thispath[thispath.size() - 1].second)
        thispath.push_back(lastpoint);

        thispair->second = discretefrechet(thispath, swipedpath) / (double)(thispath.size());
    }

    double bestscore = 1e9;
    string bestword = "";
    for (map<string, double>::iterator thispair = candidatedistance.begin(); thispair != candidatedistance.end(); ++thispair) {
        double score = thispair->second;
        if (score < bestscore) {
            bestscore = score;
            bestword = thispair->first;
        }
        // cout << thispair->first << ": " << score << endl;
    }
    cout << bestword << endl;

    return 0;
}

I've compiled the code with clang, but g++ ./thiscode.cpp -o ./thiscode should also work fine.