Finding approximate correlations
Rust, n ≈ 30 or 31 or 32
On my laptop (two cores, i5-6200U), this gets through n = 1, …, 31 in 53 seconds, using about 2.5 GiB of memory, or through n = 1, …, 32 in 105 seconds, using about 5 GiB of memory. Compile with cargo build --release
and run target/release/correlations
.
src/main.rs
extern crate rayon;
type S = u32;
const S_BITS: u32 = 32;
fn cat(mut a: Vec<S>, mut b: Vec<S>) -> Vec<S> {
if a.capacity() >= b.capacity() {
a.append(&mut b);
a
} else {
b.append(&mut a);
b
}
}
fn search(n: u32, i: u32, ss: Vec<S>) -> u32 {
if ss.is_empty() {
0
} else if 2 * i + 1 > n {
search_end(n, i, ss)
} else if 2 * i + 1 == n {
search2(n, i, ss.into_iter().flat_map(|s| vec![s, s | 1 << i]))
} else {
search2(n,
i,
ss.into_iter()
.flat_map(|s| {
vec![s,
s | 1 << i,
s | 1 << n - i - 1,
s | 1 << i | 1 << n - i - 1]
}))
}
}
fn search2<SS: Iterator<Item = S>>(n: u32, i: u32, ss: SS) -> u32 {
let (shift, mask) = (n - i - 1, !(!(0 as S) << i + 1));
let close = |s: S| {
let x = (s ^ s >> shift) & mask;
x & x.wrapping_sub(1) == 0
};
let (ssy, ssn) = ss.partition(|&s| close(s));
let (cy, cn) = rayon::join(|| search(n, i + 1, ssy), || search(n, i + 1, ssn));
cy + cn
}
fn search_end(n: u32, i: u32, ss: Vec<S>) -> u32 {
if i >= n - 1 { 1 } else { search_end2(n, i, ss) }
}
fn search_end2(n: u32, i: u32, mut ss: Vec<S>) -> u32 {
let (shift, mask) = (n - i - 1, !(!(0 as S) << i + 1));
let close = |s: S| {
let x = (s ^ s >> shift) & mask;
x & x.wrapping_sub(1) == 0
};
match ss.iter().position(|&s| close(s)) {
Some(0) => {
match ss.iter().position(|&s| !close(s)) {
Some(p) => {
let (ssy, ssn) = ss.drain(p..).partition(|&s| close(s));
let (cy, cn) = rayon::join(|| search_end(n, i + 1, cat(ss, ssy)),
|| search_end(n, i + 1, ssn));
cy + cn
}
None => search_end(n, i + 1, ss),
}
}
Some(p) => {
let (ssy, ssn) = ss.drain(p..).partition(|&s| close(s));
let (cy, cn) = rayon::join(|| search_end(n, i + 1, ssy),
|| search_end(n, i + 1, cat(ss, ssn)));
cy + cn
}
None => search_end(n, i + 1, ss),
}
}
fn main() {
for n in 1..S_BITS + 1 {
println!("{}: {}", n, search(n, 1, vec![0, 1]));
}
}
Cargo.toml
[package]
name = "correlations"
version = "0.1.0"
authors = ["Anders Kaseorg <[email protected]>"]
[dependencies]
rayon = "0.7.0"
Try it online!
I also have a slightly slower variant using very much less memory.
Rust + CryptoMiniSat, n ≈ 41
src/main.rs
extern crate cryptominisat;
extern crate itertools;
use std::iter::once;
use cryptominisat::{Lbool, Lit, Solver};
use itertools::Itertools;
fn make_solver(n: usize) -> (Solver, Vec<Lit>) {
let mut solver = Solver::new();
let s: Vec<Lit> = (1..n).map(|_| solver.new_var()).collect();
let d: Vec<Vec<Lit>> = (1..n - 1)
.map(|k| {
(0..n - k)
.map(|i| (if i == 0 { s[k - 1] } else { solver.new_var() }))
.collect()
})
.collect();
let a: Vec<Lit> = (1..n - 1).map(|_| solver.new_var()).collect();
for k in 1..n - 1 {
for i in 1..n - k {
solver.add_xor_literal_clause(&[s[i - 1], s[k + i - 1], d[k - 1][i]], true);
}
for t in (0..n - k).combinations(2) {
solver.add_clause(&t.iter()
.map(|&i| d[k - 1][i])
.chain(once(!a[k - 1]))
.collect::<Vec<_>>()
[..]);
}
for t in (0..n - k).combinations(n - k - 1) {
solver.add_clause(&t.iter()
.map(|&i| !d[k - 1][i])
.chain(once(a[k - 1]))
.collect::<Vec<_>>()
[..]);
}
}
(solver, a)
}
fn search(n: usize,
solver: &mut Solver,
a: &Vec<Lit>,
assumptions: &mut Vec<Lit>,
k: usize)
-> usize {
match solver.solve_with_assumptions(assumptions) {
Lbool::True => search_sat(n, solver, a, assumptions, k),
Lbool::False => 0,
Lbool::Undef => panic!(),
}
}
fn search_sat(n: usize,
solver: &mut Solver,
a: &Vec<Lit>,
assumptions: &mut Vec<Lit>,
k: usize)
-> usize {
if k >= n - 1 {
1
} else {
let s = solver.is_true(a[k - 1]);
assumptions.push(if s { a[k - 1] } else { !a[k - 1] });
let c = search_sat(n, solver, a, assumptions, k + 1);
assumptions.pop();
assumptions.push(if s { !a[k - 1] } else { a[k - 1] });
let c1 = search(n, solver, a, assumptions, k + 1);
assumptions.pop();
c + c1
}
}
fn f(n: usize) -> usize {
let (mut solver, proj) = make_solver(n);
search(n, &mut solver, &proj, &mut vec![], 1)
}
fn main() {
for n in 1.. {
println!("{}: {}", n, f(n));
}
}
Cargo.toml
[package]
name = "correlations-cms"
version = "0.1.0"
authors = ["Anders Kaseorg <[email protected]>"]
[dependencies]
cryptominisat = "5.0.1"
itertools = "0.6.0"
How it works
This does a recursive search through the tree of all partial assignments to prefixes of the Y/N array, using a SAT solver to check at each step whether the current partial assignment is consistent and prune the search if not. CryptoMiniSat is the right SAT solver for this job due to its special optimizations for XOR clauses.
The three families of constraints are
Si ⊕ Sk + i ⊕ Dki, for 1 ≤ k ≤ n − 2, 0 ≤ i ≤ n − k;
Dki1 ∨ Dki2 ∨ ¬Ak, for 1 ≤ k ≤ n − 2, 0 ≤ i1 < i2 ≤ n − k;
¬Dki1 ∨ ⋯ ∨ ¬Dkin − k − 1 ∨ Ak, for 1 ≤ k ≤ n − 2, 0 ≤ i1 < ⋯ < in − k − 1 ≤ n − k;
except that, as an optimization, S0 is forced to false, so that Dk0 is simply equal to Sk.
C++ using the BuDDy library
A different approach: have a binary formula (as binary decision diagram) that takes the bits of S
as input and is true iff that gives some fixed values of Y
or N
at certain selected positions. If that formula is not constant false, select a free position and recurse, trying both Y
and N
. If there is no free position, we have found a possible output value. If the formula is constant false, backtrack.
This works relatively reasonable because there are so few possible values so that we can often backtrack early. I tried a similar idea with a SAT solver, but that was less successful.
#include<vector>
#include<iostream>
#include<bdd.h>
// does vars[0..i-1] differ from vars[n-i..n-1] in at least two positions?
bdd cond(int i, int n, const std::vector<bdd>& vars){
bdd x1 { bddfalse };
bdd xs { bddfalse };
for(int k=0; k<i; ++k){
bdd d { vars[k] ^ vars[n-i+k] };
xs |= d & x1;
x1 |= d;
}
return xs;
}
void expand(int i, int n, int &c, const std::vector<bdd>& conds, bdd x){
if (x==bddfalse)
return;
if (i==n-2){
++c;
return;
}
expand(i+1,n,c,conds, x & conds[2*i]);
x &= conds[2*i+1];
expand(i+1,n,c,conds, x);
}
int count(int n){
if (n==1) // handle trivial case
return 1;
bdd_setvarnum(n-1);
std::vector<bdd> vars {};
vars.push_back(bddtrue); // assume first bit is 1
for(int i=0; i<n-1; ++i)
if (i%2==0) // vars in mixed order
vars.push_back(bdd_ithvar(i/2));
else
vars.push_back(bdd_ithvar(n-2-i/2));
std::vector<bdd> conds {};
for(int i=n-1; i>1; --i){ // handle long blocks first
bdd cnd { cond(i,n,vars) };
conds.push_back( cnd );
conds.push_back( !cnd );
}
int c=0;
expand(0,n,c,conds,bddtrue);
return c;
}
int main(void){
bdd_init(20000000,1000000);
bdd_gbc_hook(nullptr); // comment out to see GC messages
for(int n=1; ; ++n){
std::cout << n << " " << count(n) << "\n" ;
}
}
To compile with debian 8 (jessie), install libbdd-dev
and do g++ -std=c++11 -O3 -o hb hb.cpp -lbdd
. It might be useful to increase the first argument to bdd_init
even more.