Spanning paths in a tournament on n nodes

C++ (gcc), \$N=10\$

#include <algorithm>
#include <iostream>
#include <vector>
#include <cstring>
#include <ctime>
//a rewrite for vector<int>
struct vi
{
	int l,a[9];
	const int& operator[](int u) const {return a[u];}
	int& operator[](int u) {return a[u];}
};
bool operator < (const vi&a,const vi&b)
{
	for(int i=0;i<a.l;++i) if(a[i]!=b[i]) return a[i]<b[i];
	return 0;
}
bool operator == (const vi&a,const vi&b)
{
	for(int i=0;i<a.l;++i) if(a[i]!=b[i]) return 0;
	return 1;
}
std::vector<vi> g[13];
void dfs(int x,int u,const vi&c,int*w)
{
	++w[u];
	if(c[x]&u)
		for(int o=c[x]&u;o;o^=o&-o)
			dfs(__builtin_ctz(o),u^(o&-o),c,w);
}
void dfsf(int x,int u,const vi&c,int*w)
{
	++w[u];
	if((~c[x])&u)
		for(int o=(~c[x])&u;o;o^=o&-o)
			dfsf(__builtin_ctz(o),u^(o&-o),c,w);
}
int main()
{
	g[0].push_back(vi());
	for(int i=0;i<9;++i)
	{
		for(auto t:g[i])
		{
			int m1=0,m=0;
			for(int j=0,r;j<i;++j)
			{
				if((r=__builtin_popcount(t[j]))>m1) m=0,m1=r;
				if(r==m1) m|=1<<j;
			}
			for(int e=0;e<(1<<i);++e) if(__builtin_popcount(e)>m1
			||(__builtin_popcount(e)==m1&&((e&m)==m))) //ensure the node has the highest degree
			{
				vi u; u.l=i+1;
				for(int p=0;p<=i;++p) //enumerate position
				{
					bool s=(p==0); int x;
					#define yield(l,c) do{if((x=(c))>u[l]&&!s) goto skip; else s|=x<u[l],u[l]=x;}while(0)
					for(int a=0;a<p;++a)
						yield(a,((t[a]>>p)<<(p+1))|(t[a]&((1<<p)-1))|((!((e>>a)&1))<<p));
					yield(p,((e>>p)<<(p+1))|(e&((1<<p)-1)));
					for(int a=p;a<i;++a)
						yield(a+1,((t[a]>>p)<<(p+1))|(t[a]&((1<<p)-1))|((!((e>>a)&1))<<p));
					skip:;
					#undef yield
				}
				g[i+1].push_back(u);
			}
		}
		std::sort(g[i+1].begin(),g[i+1].end());
		g[i+1].erase(std::unique(g[i+1].begin(),g[i+1].end()),g[i+1].end());
		int sz=g[i+1].size();
		std::vector<vi> h(sz);
		#pragma omp parallel for
		for(int w=0;w<sz;++w)
		{
			vi u=g[i+1][w],v;
			auto sw=[&](int x,int y) {
				std::swap(v[x],v[y]);
				for(int s=0;s<v.l;++s)
				{
					int&o=v[s];
					if(((o>>x)^(o>>y))&1) o^=(1<<x)|(1<<y);
				}
			};
			for(int x=0;x<=i;++x)
				for(int y=x+1;y<=i;++y)
				{
					{v=u;sw(x,y);u=std::min(u,v);}
					for(int z=y+1;z<=i;++z)
					{
						{v=u;sw(x,y);sw(y,z);u=std::min(u,v);}
						{v=u;sw(x,y);sw(x,z);u=std::min(u,v);}
						{v=u;sw(y,z);sw(x,z);u=std::min(u,v);}
						{v=u;sw(y,z);sw(x,y);u=std::min(u,v);}
						{v=u;sw(x,z);sw(x,y);u=std::min(u,v);}
						{v=u;sw(x,z);sw(y,z);u=std::min(u,v);}
					}
					{v=u;sw(x,y);u=std::min(u,v);}
				}
			h[w]=u;
		}
		std::sort(h.begin(),h.end());
		h.erase(std::unique(h.begin(),h.end()),h.end());
		g[i+1]=h;
	}
	std::cout<<"pre ("<<clock()*1./CLOCKS_PER_SEC<<"s)\n";
	for(int o=0;o<=9;++o)
	{
		int mx=0;
		const int s=g[o].size();
		#pragma omp parallel for reduction(max:mx)
		for(int t=0;t<s;++t)
		{
			int a[9<<9],b[9<<9];
			memset(a,0,sizeof(int)*(o<<o));
			memset(b,0,sizeof(int)*(o<<o));
			for(int i=0;i<o;++i)
				dfs(i,((1<<o)-1)^(1<<i),g[o][t],a+(i<<o)),
				dfsf(i,((1<<o)-1)^(1<<i),g[o][t],b+(i<<o));
			int r[9][9],c=(1<<o)-1,vx[1<<9];
			for(int p=0;p<o;++p)
			{
				int*A=a+(p<<o),vn=0;
				for(int x=0;x<=c;++x) if(A[x]) vx[vn++]=x;
				for(int q=0;q<o;++q) if(p!=q)
				{
					int*B=b+(q<<o),su=0;
					for(int u=0;u<vn;++u)
						su+=A[vx[u]]*B[c^vx[u]];
					r[p][q]=su;
				}
			}
			int m1=0,m=0;
			for(int j=0,pc;j<o;++j)
			{
				if((pc=__builtin_popcount(g[o][t][j]))>m1) m=0,m1=pc;
				if(pc==m1) m|=1<<j;
			}
			for(int i=0;i<(1<<o);++i) if(__builtin_popcount(i)>m1
			||(__builtin_popcount(i)==m1&&((i&m)==m)))
			{
				int tot=(o==0);
				for(int j=0;j<o;++j)
					if(i&(1<<j))
					{
						tot+=a[j<<o];
						for(int ii=c^i;ii;ii^=ii&-ii)
							tot+=r[j][__builtin_ctz(ii)];
					}
					else tot+=b[j<<o];
				mx=std::max(mx,tot);
			}
		}
		std::cout<<"a("<<o+1<<") = "<<mx<<" ("<<clock()*1./CLOCKS_PER_SEC<<"s)\n";
	}
}

Try it online!

Compile with g++ and -Ofast -funroll-all-loops -ffast-math -fno-stack-protector -march=native -fopenmp. Note that OpenMP is used for parallelism.

Output on my modest computer:

pre (5.217s)
a(1) = 1 (5.218s)
a(2) = 1 (5.218s)
a(3) = 3 (5.219s)
a(4) = 5 (5.221s)
a(5) = 15 (5.221s)
a(6) = 45 (5.222s)
a(7) = 189 (5.223s)
a(8) = 661 (5.225s)
a(9) = 3357 (5.318s)
a(10) = 15745 (18.634s)

The run time may differ a lot because of the use of OpenMP parallelism.


Explanation for \$N=9\$ version:

This code is two-fold. First, it will try to generate all tournament graphs up to isomorphism. Then, it will count the number of hamilton paths on these generated graphs. Adjacency matrices of graphs are stored as arrays of bitmasks (i.e. arrays of uint) for performance reasons.

The second part is relatively easier. We can just use depth-first-search to search for all possible paths and use bitmasks to speed up. The time complexity is \$O(\text{no. of paths in all graphs})\$. It's also possible to use bitmask dynamic programming to do it, with \$O(2^NN^2)\$. The above code used the former while the latter one should be faster when N goes higher. OpenMP parallelization is used to speed it up.

The first part is harder. My approach is incremental: first generating all tournament graphs of \$t\$ nodes, then add one node to be tournament graphs of \$t+1\$ nodes. The trick here is to find an order for adding nodes. Here, instead of adding any node, we restrict the added node to have the greatest degree in the result graph. Therefore, a graph won't be generated too many times.

Graph-isomorphism isn't a easy thing to check too. Therefore we're not really trying to make every graph unique, but to have some degree of tolerance. I have two strategies used here. First, the newest \$t+1\$-th node won't be directly assigned number \$t\$. Instead, we try all the possible numbers from \$0\$ to \$t\$ and take the result graph with the smallest lexicographic order (you can use early-stopping when comparing lexicographic order, so not much overhead). After generating all \$t+1\$-node graphs in this way, we remove the found duplicates by sorting and making them unique. This alone can cut down the number of found graphs for \$N=9\$ to around \$3\times 10^6\$.

The second strategy is that, after found such candidates, we further try to remove duplicates by trying to swap every two nodes and see if we can get a graph with smaller lexicographic order. We then proceed by sorting and making them unique. These two strategies cut down the number of found graphs for \$N=9\$ to around \$10^6\$. (This strategy alone is more powerful than I thought, cutting down to around \$1.1\times 10^6\$)


Explanation for \$N=10\$ version:

Some improvements are made upon the \$N=9\$ version. First, instead of swapping two positions, three positions are swapped to achieve a better result. This successfully cut the number of graphs generated at \$N=9\$ to less than \$5 \times 10^5\$ (~50% improvement). OpenMP parallelization is also introduced here to speed up.

Generating all graphs for \$N=10\$ sounds unfeasible, so instead I try to expand the last step. We still try to add one node for graphs with \$9\$ nodes, but instead of actually generating these new graphs, we can actually count the hamilton paths in them directly!

Consider the relationship between a hamilton path and the new node. The new node can be the first end of the path so we need to find an edge from the new node and continue the path from that node. The new node can also be the second end so we can find an edge to the new node and trace back the path starting from that node.

Of course, the new node can be in the middle of the path. In this case, we can enumerate the two nodes in front and on the back of it. We can then enumerate the subset of nodes both halves of the path contain and then multiply them to get the result.

Notice that the contribution is the same for every pair of nodes adjacent to the new node regardless of other edges to the new node, so we can get that precalculated for every two nodes.

In order to get the number of paths starting and ending on each node covering a certain subset of nodes, we can use depth-first-search from each starting and ending point. (A bitmask dynamic programming could work too, but I guess it's slower)


JavaScript (Node.js), \$N=8\$

Simple brute-force. Finds \$a(8)=661\$ in approximately 45 seconds on my laptop. Computing \$a(9)\$ with this code would take several hours.

function search(n) {
  let m = Array(n).fill(0),
      max = 0;

  function count(m) {
    let res = 0;

    for(let y = 0; y < n; y++) {
      (function path(y, visited) {
        if((visited |= 1 << y) == (1 << n) - 1) {
          res++;
          return;
        }
        for(let msk = m[y] & ~visited, b; msk; msk ^= b) {
          path(31 - Math.clz32(b = msk & -msk), visited);
        }
      })(y);
    }
    return res;
  }

  (function build(x, y) {
    if(y == n - 1) {
      max = Math.max(max, count(m));
    }
    else {
      for(let j = (x == y + 1 ? 1 : 0); j <= (x == n - 1 && !y ? 0 : 1); j++) {
        m[y] = m[y] & ~(1 << x) | j << x;
        m[x] = m[x] & ~(1 << y) | (j ^ 1) << y;
        x + 1 < n ? build(x + 1, y) : build(y + 2, y + 1);
      }
    }
  })(1, 0);

  return max;
}

Try it online!