Count arrays of periods
C#, n=128 in about 2:40
using System;
using System.Collections.Generic;
using System.Linq;
namespace Sandbox
{
class PPCG137436
{
public static void Main(string[] args)
{
if (args.Length == 0) args = new string[] { "1", "2", "4", "8", "16", "32", "64", "128" };
foreach (string arg in args)
{
Console.WriteLine(Count(new int[(int)(0.5 + Math.Log(int.Parse(arg)) / Math.Log(2))], 0));
}
}
static int Count(int[] periods, int idx)
{
if (idx == periods.Length)
{
//Console.WriteLine(string.Join(", ", periods));
return 1;
}
int count = 0;
int p = idx == 0 ? 1 : periods[idx - 1];
for (int q = p; q <= 1 << (idx + 1); q++)
{
periods[idx] = q;
if (q == p || q > 1 << idx || p + q - Gcd(p, q) > 1 << idx && UnificationPasses(periods, idx, q)) count += Count(periods, idx + 1);
}
return count;
}
private static int Gcd(int a, int b)
{
while (a > 0) { int tmp = a; a = b % a; b = tmp; }
return b;
}
private static bool UnificationPasses(int[] periods, int idx, int q)
{
UnionSet union = new UnionSet(1 << idx);
for (int i = 0; i <= idx; i++)
{
for (int j = 0; j + periods[i] < Math.Min(2 << i, 1 << idx); j++) union.Unify(j, j + periods[i]);
}
IDictionary<int, long> rev = new Dictionary<int, long>();
for (int k = 0; k < 1 << idx; k++) rev[union.Find(k)] = 0L;
for (int k = 0; k < 1 << idx; k++) rev[union.Find(k)] |= 1L << k;
long zeroes = rev[union.Find(0)]; // wlog the value at position 0 is 0
ISet<int> onesIndex = new HashSet<int>();
// This can be seen as the special case of the next loop where j == -1.
for (int i = 0; i < idx; i++)
{
if (periods[i] == 2 << i) onesIndex.Add((2 << i) - 1);
}
for (int j = 0; j < idx - 1 && periods[j] == 2 << j; j++)
{
for (int i = j + 1; i < idx; i++)
{
if (periods[i] == 2 << i)
{
for (int k = (1 << j) + 1; k <= 2 << j; k++) onesIndex.Add((2 << i) - k);
}
}
}
for (int i = 1; i < idx; i++)
{
if (periods[i] == 1) continue;
int d = (2 << i) - periods[i];
long dmask = (1L << d) - 1;
if (((zeroes >> 1) & (zeroes >> periods[i]) & dmask) == dmask) onesIndex.Add(periods[i] - 1);
}
long ones = 0L;
foreach (var key in onesIndex) ones |= rev[union.Find(key)];
if ((zeroes & ones) != 0) return false; // Definite contradiction!
rev.Remove(union.Find(0));
foreach (var key in onesIndex) rev.Remove(key);
long[] masks = System.Linq.Enumerable.ToArray(rev.Values);
int numFilteredMasks = 0;
long set = 0;
long M = 0;
for (int i = 1; i <= idx; i++)
{
if (periods[i - 1] == 1) continue;
// Sort the relevant masks to the start
if (i == idx) numFilteredMasks = masks.Length; // Minor optimisation: skip the filter because we know we need all the masks
long filter = (1L << (1 << i)) - 1;
for (int j = numFilteredMasks; j < masks.Length; j++)
{
if ((masks[j] & filter) != 0)
{
var tmp = masks[j];
masks[j] = masks[numFilteredMasks];
masks[numFilteredMasks++] = tmp;
}
}
// Search for a successful assignment, using the information from the previous search to skip a few initial values in this one.
set |= (1L << numFilteredMasks) - 1 - M;
M = (1L << numFilteredMasks) - 1;
while (true)
{
if (TestAssignment(periods, i, ones, masks, set)) break;
if (set == 0) return false; // No suitable assignment found
// Gosper's hack with variant to reduce the number of bits on overflow
long c = set & -set;
long r = set + c;
set = (((r ^ set) >> 2) / c) | (r & M);
}
}
return true;
}
private static bool TestAssignment(int[] periods, int idx, long ones, long[] masks, long assignment)
{
for (int j = 0; j < masks.Length; j++, assignment >>= 1) ones |= masks[j] & -(assignment & 1);
for (int i = idx - 1; i > 0; i--) // i == 0 is already handled in the unification process.
{
if (Period(ones, 2 << i, periods[i - 1]) < periods[i]) return false;
}
return true;
}
private static int Period(long arr, int n, int min)
{
for (int p = min; p <= n; p++)
{
// If the bottom n bits have period p then the bottom (n-p) bits equal the bottom (n-p) bits of the integer shifted right p
long mask = (1L << (n - p)) - 1L;
if ((arr & mask) == ((arr >> p) & mask)) return p;
}
throw new Exception("Unreachable");
}
class UnionSet
{
private int[] _Lookup;
public UnionSet(int size)
{
_Lookup = new int[size];
for (int k = 0; k < size; k++) _Lookup[k] = k;
}
public int Find(int key)
{
var l = _Lookup[key];
if (l != key) _Lookup[key] = l = Find(l);
return l;
}
public void Unify(int key1, int key2)
{
int root1 = Find(key1);
int root2 = Find(key2);
if (root1 < root2) _Lookup[root2] = root1;
else _Lookup[root1] = root2;
}
}
}
}
Extending to n=256 would require switching to BigInteger
for the masks, which probably kills the performance too much for n=128 to pass without new ideas, let alone n=256.
Under Linux, compile with mono-csc
and execute with mono
.
Basic explanation
I'm not going to do a line-by-line dissection, just an overview of the concepts.
As a rule of thumb, I'm happy to iterate through on the order of 250 elements in a brute-force combinatoric program. To get to n=128 it is therefore necessary to use an approach which doesn't analyse every bitstring. So rather than working forwards from bit strings to period sequences, I work backwards: given a period sequence, is there a bitstring which realises it? For n=2x there's an easy upper bound of 2x(x+1)/2 period sequences (vs 22x bitstrings).
Some of the arguments use the string periodicity lemma:
Let
p
andq
be two periods of a string of lengthn
. Ifp + q ≤ n + gcd(p, q)
thengcd(p, q)
is also a period of the string.
Wlog I will assume that all of the bitstrings under consideration start with 0
.
Given a period sequence [p1 p2 ... pk]
where pi
is the period of the prefix of length 2i (p0 = 1
always), there are some easy observations about possible values of pk+1
:
pk+1 ≥ pk
since a period of a stringS
is also a period of any prefix ofS
.pk+1 = pk
is always a possible extension: just repeat the same primitive string for twice as many characters.2k < pk+1 ≤ 2k+1
is always a possible extension. It suffices to show this forpk+1 = 2k+1
because an aperiodic string of lengthL
can be extended to an aperiodic string of lengthL+1
by appending any letter which isn't its first letter.Take a string
Sx
of length 2k whose period ispk
and consider the stringSxyS
of length 2k+1. ClearlySxyS
has a period 2k+1. Suppose its periodq
is smaller.Then
2k+1 + q ≤ 2k+1+1 ≤ 2k+1 + gcd(2k+1, q)
so by the periodicity lemmagcd(2k+1, q)
is also a period ofSxyS
, and since the greatest divisor is less than or equal to its arguments andq
is the smallest period, we requireq
to be a proper factor of 2k+1. Since its quotient cannot be 2, we haveq ≤ (2k+1)/3
.Now, since
q ≤ 2k
is a period ofSxyS
it must be a period ofSx
. But the period ofSx
ispk
. We have two cases:gcd(pk, q) = pk
, or equivalentlypk
divides exactly intoq
.pk + q > 2k + gcd(pk, q)
so that the periodicity lemma doesn't force a smaller period.
Consider first the second case.
pk > 2k + gcd(pk, q) - q ≥ 2k+1 - q ≥ 2k+1 - (2k+1)/3 ≥ 2q
, contradicting the definition ofpk
as the period ofSx
. Therefore we are forced into the conclusion thatpk
is a factor ofq
.But since
q
is a period ofSx
andpk
is the period ofSx
, the prefix of lengthq
is justq/pk
copies of the prefix of lengthpk
, so we see thatpk
is also a period ofSxyS
.Therefore the period of
SxyS
is eitherpk
or 2k+1. But we have two options fory
! At most one choice ofy
will give periodpk
, so at least one will give period 2k+1. QED.The periodicity lemma allows us to reject some of the remaining possible extensions.
Any extension which hasn't passed a quick-accept or a quick-reject test needs to be tested constructively.
The construction of a bitstring given a period sequence is essentially a satisifiability problem, but it has a lot of structure. There are 2k - pk
simple equality constraints implied by each prefix period, so I use a union set data structure to combine bits into independent clusters. This was enough to tackle n=64, but for n=128 it was necessary to go further. I employ two useful lines of argument:
- If the prefix of length
M
is01M-1
and the prefix of lengthL > M
has periodL
then the prefix of lengthL
must end in1M
. This is most powerful precisely in the cases which would otherwise have most independent clusters, which is convenient. - If the prefix of length
M
is0M
and the prefix of lengthL > M
has periodL - d
withd < M
and ends in0d
then it must in fact end in10d
. This is most powerful at the opposite extreme, when the period sequence starts with a lot of ones.
If we don't get an immediate contradiction by forcing the cluster with the first bit (assumed to be zero) to be one then we brute force (with some micro-optimisations) over the possible values for the unforced clusters. Note that the order is in descending number of ones because if the i
th bit is a one then the period cannot be i
and we want to avoid periods which are shorter than the ones which are already enforced by the clustering. Going down increases the chances of finding a valid assignment early.
Rust, 32, 10s 11s 29s on my laptop
Call it with the bitsize as the command line argument.
Clever techniques: represent bitstrings directly as numbers, use bittwiddling to check for cycles. Only search the first half of the bitstrings, those starting with 0, because the array of periods of a bitstring and its inverse (0s swapped for 1s) are identical. If every possibility for the final position has already occured, I don't search it.
More clever stuff:
To deduplicate each block (strings where the first half of the bits are the same) I use a bitvector, which is much faster than a hashset, since the final cycle lengths don't need hashing.
Also, I skip the first steps of the cycle checking, because I know that the final cycle can't be shorter than the second-to-last cycle.
After lots of profiling, I can now tell that almost all of the time is being used productively, so algorithmic improvements will be required to improve from here, I think. I also switched to 32 bit integers to save a little more time.
//extern crate cpuprofiler;
//use cpuprofiler::PROFILER;
extern crate bit_vec;
use bit_vec::BitVec;
use std::collections::HashSet;
fn cycle_len(num: u32, mask: u32, skip_steps: usize) -> usize {
let mut left = num >> skip_steps;
let mut mask = mask >> skip_steps;
let mut steps = skip_steps;
loop {
left >>= 1;
if left == (num & mask) {
return steps;
}
mask >>= 1;
steps += 1;
}
}
fn all_cycles(size_log: usize) -> HashSet<Vec<usize>> {
let mut set = HashSet::new();
if size_log == 0 {
set.insert(vec![]);
return set;
} else if size_log == 1 {
set.insert(vec![0]);
set.insert(vec![1]);
return set;
}
let size: usize = 1 << size_log;
let half_size: usize = 1 << size_log - 1;
let shift_and_mask: Vec<(usize, u32)> = (1..size_log)
.map(|subsize_log| {
let subsize = 1 << subsize_log;
(size - subsize, (1 << (subsize - 1)) - 1)
})
.collect();
let size_mask = (1 << (size - 1)) - 1;
for block in 0..(1 << (half_size - 1)) as u32 {
let start: u32 = block << half_size;
if block % 1024 == 0 {
eprintln!(
"{} ({:.2}%): {}",
start,
start as f64 / (1u64 << size - 1) as f64 * 100f64,
set.len()
);
}
let leader = {
let mut cycles = Vec::new();
for &(shift, mask) in &shift_and_mask {
let subnum = start >> shift;
cycles.push(cycle_len(subnum, mask, 0));
}
cycles
};
let &end = leader.last().unwrap();
if (end..size).all(|count| {
let mut new = leader.clone();
new.push(count);
set.contains(&new)
})
{
continue;
}
let mut subset = BitVec::from_elem(size, false);
for num in start..start + (1 << half_size) {
subset.set(cycle_len(num, size_mask, end), true);
}
for (unique_cycle_len, _) in subset.into_iter().enumerate().filter(|x| x.1) {
let mut new_l = leader.clone();
new_l.push(unique_cycle_len);
set.insert(new_l);
}
}
set
}
fn main() {
let size: f32 = std::env::args().nth(1).unwrap().parse().unwrap();
let size_log = size.log2() as usize;
//PROFILER.lock().unwrap().start("./my-prof.profile").unwrap();
let cycles = all_cycles(size_log);
//PROFILER.lock().unwrap().stop().unwrap();
println!(
"Number of distinct arrays of periods of bitstrings of length {} is {}",
1 << size_log,
cycles.len()
);
}
Put bit-vec = "0.4.4"
in your Cargo.toml
If you'd like to run this, clone this: github.com/isaacg1/cycle then Cargo build --release
to build, then Cargo run --release 32
to run.
Rust, 16
use std::collections::HashSet;
use std::io;
fn main() {
print!("Enter a pow of two:");
let mut input_text = String::new();
io::stdin()
.read_line(&mut input_text)
.expect("failed to read from stdin");
let n_as_string = input_text.trim();
match n_as_string.parse::<usize>() {
Ok(n) => {
let log2 = (n as f64).log(2_f64) as usize;
if n != 1 << log2 {
panic!("{} is not a power of two", n);
}
let count = compute_count_array(log2, n);
println!("n = {} -> count = {}", n, count);
}
Err(_) => { panic!("{} is not a number", n_as_string); }
}
}
fn compute_count_array(log2:usize, n: usize) -> usize {
let mut z = HashSet::new();
let mut s:Vec<bool> = vec!(false; n);
loop {
let mut y:Vec<usize> = vec!();
for j in 0..log2+1 {
let p = find_period(&s[0..1<<j]);
y.push(p);
}
z.insert(y);
if !next(&mut s) {
break;
}
}
z.len()
}
#[inline]
fn find_period(s: &[bool]) -> usize {
let n=s.len();
let mut j=1;
while j<n {
if s[0..n-j] == s[j..n] {
return j;
}
j+=1;
}
n
}
#[inline]
fn next(s:&mut Vec<bool>) -> bool {
if s[0] {
s[0] = false;
for i in 1..s.len() {
if s[i] {
s[i] = false;
} else {
s[i] = true;
return true;
}
}
return false
} else {
s[0] = true;
}
true
}
Try it online!
Compilation: rustc -O <name>.rs
The string is implemented as a Bool vector.
The
next
function iterate through the combinations ;The
find_period
takes a Bool slice and returns the period ;The
compute_count_array
does the job for each "power of two" subsequence of each combinations of Bools.
Theoretically, no overflow is expected until 2^n
exceeds the u64 maximum value, ie n > 64
. This limit could be outreached with an expensive test on s=[true, true, ..., true].
Bad news is: it returns 317 for n=16, but I don't know why. I don't know either if it will make it in ten minutes for n=32, since the Vec<bool>
is not optimized for this kind of computation.
EDIT
I've managed to remove the limit of 64 for
n
. Now, it won't crash untiln
is greater than the max usize integer.I found why the previous code returned 317 for
n=32
. I was counting sets of periods and not arrays of periods. There were three arrays with the same elements:[1, 2, 3, 3, 8] -> {1, 2, 3, 8} [1, 2, 3, 8, 8] -> {1, 2, 3, 8} [1, 1, 3, 3, 7] -> {1, 3, 7} [1, 1, 3, 7, 7] -> {1, 3, 7} [1, 1, 3, 3, 8] -> {1, 3, 8} [1, 1, 3, 8, 8] -> {1, 3, 8}
Now it works. It is still slow but it works.