How much can you quickly multiply?
Python 2.7
42.575 = ( 6,812,000 / 160,000 ) approx
Code:
import gmpy2
def fac1(n):
m=lambda(L):([]if len(L)%2==0 else[L.pop()])+map(lambda(l):l[0]*l[1],zip(L[1::2],L[-2::-2]))
L=map(gmpy2.mpz,xrange(1,n+1))
Number = (len(L)-1).bit_length()
while Number:Number-=1;L=m(L)
return L[0]
def fac2(n):
global E; E=0
def f(i):
global E; E+=i//2
return[]if i==1 else f(i//2)+range(3,i,2)+[[1,i][i%2]]
m=lambda(L):([]if len(L)%2==0 else[L.pop()])+map(lambda(l):l[0]*l[1],zip(L[1::2],L[-2::-2]))
L=map(gmpy2.mpz,f(n))
N=(len(L)-1).bit_length()
while N: N-=1;L=m(L)
return L[0]<<E
Test:
import time
start = time.time()
baseline(160000)
print time.time()-start
start = time.time()
fac1(6811000)
print time.time()-start
start = time.time()
fac2(6812000)
print time.time()-start
start = time.time()
gmpy2.fac(26000000)
print time.time()-start
Output:
10.0069999695
10.0729999542
10.0360000134
9.98699998856
How it works:
Bigger multiplications take more time, thus we want to do as many small multiplications as possible. This is especially true in Python where for numbers less that 2^64
we use hardware arithmetic, and above that we use software. So, in m(L)
, we start with a list L
; if it's odd length we remove one number from consideration to make it even again. Then we multiply element 1
with element -2
, element 3
with -4
, etc, so that
m([1,2,3,4,5,6,7,8]) = [2*7, 4*5, 6*3, 8*1] = [14, 20, 18, 8]
m([10,12,6]) = [360,112]
m([120,6]) = [40320]
This approach ensures we're using hardware arithmetic for as long as possible, following which we switch onto the efficient gmc arithmetic library.
In fac2
, we take a more classic divide and conquer approach as well, where we split out every multiple of 2 and bitshift them at the end for a trivial performance boost. I've included it here because it's usually around 0.5% faster than fac1
.
Golfed Version of fac1
(because I can), 220B
import gmpy2
def f(n):
m=lambda(L):([]if len(L)%2==0 else[L.pop()])+map(lambda(l):l[0]*l[1],zip(L[1::2],L[-2::-2]))
L=map(gmpy2.mpz,xrange(1,n+1));N=(len(L)-1).bit_length()
while N:N-=1;L=m(L)
return L[0]
C++ with GMP, score = 55.55 (10,000,000 / 180,000)
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <vector>
#include <iostream>
#include <queue>
#include <gmpxx.h>
int main(int argc, char *argv[]) {
uint64_t n = atoi(argv[1]);
// Iterate through 1..n. Strip off powers of 2. Multiply
// remainders together into <= 64 bit chunks.
uint64_t twos = 0;
std::vector<uint64_t> terms;
uint64_t m = 1;
for(uint64_t i = 1; i <= n; i++) {
uint64_t j = __builtin_ctzll(i);
twos += j;
uint64_t k = i >> j;
if(__builtin_clzll(m) + __builtin_clzll(k) >= 64) {
m *= k;
} else {
terms.push_back(m);
m = k;
}
}
if(m != 1) terms.push_back(m);
// convert to gmp
// why isn't there a 64-bit constructor?
std::queue<mpz_class> gmpterms;
for(int i = 0; i < terms.size(); i++) {
mpz_class x = (uint32_t)(terms[i] >> 32);
x <<= 32;
x += (uint32_t)terms[i];
gmpterms.push(x);
}
// pop two from the bottom, multiply them, push on the end.
while(gmpterms.size() > 1) {
mpz_class a = gmpterms.front();
gmpterms.pop();
mpz_class b = gmpterms.front();
gmpterms.pop();
gmpterms.push(a * b);
}
mpz_class r = gmpterms.front();
r <<= twos;
//std::cout << r << std::endl;
}
Java - 125.15 (21,400,000 / 171,000)
Also shamelessly copied from Peter Luschny's Github repo (thanks @semi-extrinsic) and licensed under the MIT license, this uses the "prime factorization nested squaring" algorithm as proposed by Albert Schönhage et al. (according to Luschny's factorial algorithms description page).
I slightly adapted the algorithm to use Java's BigInteger and to not use a lookup table for n < 20.
Compiled with gcj, which uses GMP for its BigInteger implementation, and ran on Linux 3.12.4 (Gentoo), on a Core i7 4700MQ at 2.40GHz
import java.math.BigInteger;
public class PrimeSieveFactorialSchoenhage {
private static int[] primeList, multiList;
public static BigInteger factorial(int n) {
int log2n = 31 - Integer.numberOfLeadingZeros(n);
int piN = log2n < 2 ? 1 : 2 + (15 * n) / (8 * (log2n - 1));
primeList = new int[piN];
multiList = new int[piN];
int len = primeFactors(n);
return nestedSquare(len).shiftLeft(n - Integer.bitCount(n));
}
private static BigInteger nestedSquare(int len) {
if (len == 0) {
return BigInteger.ONE;
}
int i = 0, mult = multiList[0];
while (mult > 1) {
if ((mult & 1) == 1) { // is mult odd ?
primeList[len++] = primeList[i];
}
multiList[i++] = mult / 2;
mult = multiList[i];
}
BigInteger ns = nestedSquare(i);
if (len <= i) {
return ns.multiply(ns);
}
return product(primeList, i, len - i).multiply(ns.multiply(ns));
}
private static BigInteger product(int[] a, int start, int length) {
if (length == 0) {
return BigInteger.ONE;
}
int len = (length + 1) / 2;
long[] b = new long[len];
int i, j, k;
for (k = 0, i = start, j = start + length - 1; i < j; i++, k++, j--) {
b[k] = a[i] * (long) a[j];
}
if (i == j) {
b[k++] = a[j];
}
return recProduct(b, 0, k - 1);
}
private static BigInteger recProduct(long[] s, int n, int m) {
if (n > m) {
return BigInteger.ONE;
}
if (n == m) {
return BigInteger.valueOf(s[n]);
}
int k = (n + m) >> 1;
return recProduct(s, n, k).multiply(recProduct(s, k + 1, m));
}
private static int primeFactors(int n) {
int[] primes = new int[n < 17 ? 6 : (int) Math.floor(n / (Math.log(n) - 1.5))];
int numPrimes = makePrimeList(n, primes);
int maxBound = n / 2, count = 0;
int start = indexOf(primes, 2, 0, numPrimes - 1);
int end = indexOf(primes, n, start, numPrimes);
for (int i = start; i < end; i++) {
int prime = primes[i];
int m = prime > maxBound ? 1 : 0;
if (prime <= maxBound) {
int q = n;
while (q >= prime) {
m += q /= prime;
}
}
primeList[count] = prime;
multiList[count++] = m;
}
return count;
}
private static int indexOf(final int[] data, int value, int low, int high) {
while (low < high) {
int mid = (low + high) >>> 1;
if (data[mid] < value) {
low = mid + 1;
} else {
high = mid;
}
}
if (low >= data.length) {
return low;
}
if (data[low] == value) {
low++;
}
return low;
}
private static int makePrimeList(int n, int[] prime) {
boolean[] composite = new boolean[n / 3];
sieveOfEratosthenes(composite);
boolean toggle = false;
int p = 5, i = 0, j = 2;
prime[0] = 2;
prime[1] = 3;
while (p <= n) {
if (!composite[i++]) {
prime[j++] = p;
}
// -- never mind, it's ok.
p += (toggle = !toggle) ? 2 : 4;
}
return j; // number of primes
}
private static void sieveOfEratosthenes(final boolean[] composite) {
int d1 = 8;
int d2 = 8;
int p1 = 3;
int p2 = 7;
int s1 = 7;
int s2 = 3;
int n = 0;
int len = composite.length;
boolean toggle = false;
while (s1 < len) { // -- scan sieve
if (!composite[n++]) { // -- if a prime is found, cancel its multiples
int inc = p1 + p2;
for (int k = s1; k < len; k += inc) {
composite[k] = true;
}
for (int k = s1 + s2; k < len; k += inc) {
composite[k] = true;
}
}
if (toggle = !toggle) { // Never mind, it's ok.
s1 += d2;
d1 += 16;
p1 += 2;
p2 += 2;
s2 = p2;
} else {
s1 += d1;
d2 += 8;
p1 += 2;
p2 += 6;
s2 = p1;
}
}
}
public static void main(String[] args) {
int n = Integer.parseInt(args[0]);
long nanos = System.nanoTime();
BigInteger fact = factorial(n);
nanos = System.nanoTime() - nanos;
// Commented out because it takes ages to print
//System.out.println(fact);
System.out.println(nanos / 1e9);
}
}