Testing if a number is a square
Score: 27462
About time I'd compete in a GOLF challenge :D
# First we look at the last 6 bits of the number. These bits must be
# one of the following:
#
# 0x00, 0x01, 0x04, 0x09, 0x10, 0x11,
# 0x19, 0x21, 0x24, 0x29, 0x31, 0x39
#
# That's 12/64, or a ~80% reduction in composites!
#
# Conveniently, a 64 bit number can hold 2**6 binary values. So we can
# use a single integer as a lookup table, by shifting. After shifting
# we check if the top bit is set by doing a signed comparison to 0.
and b, n, 0b111111
shl v, 0xc840c04048404040, b
le q, v, 0
jz no, q
jz yes, n
# Hacker's Delight algorithm - Newton-Raphson.
mov c, 1
sub x, n, 1
geu q, x, 2**32-1
jz skip32, q
add c, c, 16
shr x, x, 32
skip32:
geu q, x, 2**16-1
jz skip16, q
add c, c, 8
shr x, x, 16
skip16:
geu q, x, 2**8-1
jz skip8, q
add c, c, 4
shr x, x, 8
skip8:
geu q, x, 2**4-1
jz skip4, q
add c, c, 2
shr x, x, 4
skip4:
geu q, x, 2**2-1
add c, c, q
shl g, 1, c
shr t, n, c
add t, t, g
shr h, t, 1
leu q, h, g
jz newton_loop_done, q
newton_loop:
mov g, h
divu t, r, n, g
add t, t, g
shr h, t, 1
leu q, h, g
jnz newton_loop, q
newton_loop_done:
mulu u, h, g, g
cmp s, u, n
halt 0
yes:
mov s, 1
no:
halt 0
Score: 161558 227038 259038 260038 263068
I took the fastest integer square root algorithm I could find and return whether its remainder is zero.
# based on http://www.cc.utah.edu/~nahaj/factoring/isqrt.c.html
# converted to GOLF assembly for http://codegolf.stackexchange.com/questions/49356/testing-if-a-number-is-a-square
# unrolled for speed, original source commented out at bottom
start:
or u, t, 1 << 62
shr t, t, 1
gequ v, n, u
jz nope62, v
sub n, n, u
or t, t, 1 << 62
nope62:
or u, t, 1 << 60
shr t, t, 1
gequ v, n, u
jz nope60, v
sub n, n, u
or t, t, 1 << 60
nope60:
or u, t, 1 << 58
shr t, t, 1
gequ v, n, u
jz nope58, v
sub n, n, u
or t, t, 1 << 58
nope58:
or u, t, 1 << 56
shr t, t, 1
gequ v, n, u
jz nope56, v
sub n, n, u
or t, t, 1 << 56
nope56:
or u, t, 1 << 54
shr t, t, 1
gequ v, n, u
jz nope54, v
sub n, n, u
or t, t, 1 << 54
nope54:
or u, t, 1 << 52
shr t, t, 1
gequ v, n, u
jz nope52, v
sub n, n, u
or t, t, 1 << 52
nope52:
or u, t, 1 << 50
shr t, t, 1
gequ v, n, u
jz nope50, v
sub n, n, u
or t, t, 1 << 50
nope50:
or u, t, 1 << 48
shr t, t, 1
gequ v, n, u
jz nope48, v
sub n, n, u
or t, t, 1 << 48
nope48:
or u, t, 1 << 46
shr t, t, 1
gequ v, n, u
jz nope46, v
sub n, n, u
or t, t, 1 << 46
nope46:
or u, t, 1 << 44
shr t, t, 1
gequ v, n, u
jz nope44, v
sub n, n, u
or t, t, 1 << 44
nope44:
or u, t, 1 << 42
shr t, t, 1
gequ v, n, u
jz nope42, v
sub n, n, u
or t, t, 1 << 42
nope42:
or u, t, 1 << 40
shr t, t, 1
gequ v, n, u
jz nope40, v
sub n, n, u
or t, t, 1 << 40
nope40:
or u, t, 1 << 38
shr t, t, 1
gequ v, n, u
jz nope38, v
sub n, n, u
or t, t, 1 << 38
nope38:
or u, t, 1 << 36
shr t, t, 1
gequ v, n, u
jz nope36, v
sub n, n, u
or t, t, 1 << 36
nope36:
or u, t, 1 << 34
shr t, t, 1
gequ v, n, u
jz nope34, v
sub n, n, u
or t, t, 1 << 34
nope34:
or u, t, 1 << 32
shr t, t, 1
gequ v, n, u
jz nope32, v
sub n, n, u
or t, t, 1 << 32
nope32:
or u, t, 1 << 30
shr t, t, 1
gequ v, n, u
jz nope30, v
sub n, n, u
or t, t, 1 << 30
nope30:
or u, t, 1 << 28
shr t, t, 1
gequ v, n, u
jz nope28, v
sub n, n, u
or t, t, 1 << 28
nope28:
or u, t, 1 << 26
shr t, t, 1
gequ v, n, u
jz nope26, v
sub n, n, u
or t, t, 1 << 26
nope26:
or u, t, 1 << 24
shr t, t, 1
gequ v, n, u
jz nope24, v
sub n, n, u
or t, t, 1 << 24
nope24:
or u, t, 1 << 22
shr t, t, 1
gequ v, n, u
jz nope22, v
sub n, n, u
or t, t, 1 << 22
nope22:
or u, t, 1 << 20
shr t, t, 1
gequ v, n, u
jz nope20, v
sub n, n, u
or t, t, 1 << 20
nope20:
or u, t, 1 << 18
shr t, t, 1
gequ v, n, u
jz nope18, v
sub n, n, u
or t, t, 1 << 18
nope18:
or u, t, 1 << 16
shr t, t, 1
gequ v, n, u
jz nope16, v
sub n, n, u
or t, t, 1 << 16
nope16:
or u, t, 1 << 14
shr t, t, 1
gequ v, n, u
jz nope14, v
sub n, n, u
or t, t, 1 << 14
nope14:
or u, t, 1 << 12
shr t, t, 1
gequ v, n, u
jz nope12, v
sub n, n, u
or t, t, 1 << 12
nope12:
or u, t, 1 << 10
shr t, t, 1
gequ v, n, u
jz nope10, v
sub n, n, u
or t, t, 1 << 10
nope10:
or u, t, 1 << 8
shr t, t, 1
gequ v, n, u
jz nope8, v
sub n, n, u
or t, t, 1 << 8
nope8:
or u, t, 1 << 6
shr t, t, 1
gequ v, n, u
jz nope6, v
sub n, n, u
or t, t, 1 << 6
nope6:
or u, t, 1 << 4
shr t, t, 1
gequ v, n, u
jz nope4, v
sub n, n, u
or t, t, 1 << 4
nope4:
or u, t, 1 << 2
shr t, t, 1
gequ v, n, u
jz nope2, v
sub n, n, u
or t, t, 1 << 2
nope2:
or u, t, 1 << 0
shr t, t, 1
gequ v, n, u
jz nope0, v
sub n, n, u
nope0:
end:
not s, n # return !remainder
halt 0
# before unrolling...
#
# start:
# mov b, 1 << 62 # squaredbit = 01000000...
# loop: # do {
# or u, b, t # u = squaredbit | root
# shr t, t, 1 # root >>= 1
# gequ v, n, u # if remainder >= u:
# jz nope, v
# sub n, n, u # remainder = remainder - u
# or t, t, b # root = root | squaredbit
# nope:
# shr b, b, 2 # squaredbit >>= 2
# jnz loop, b # } while (squaredbit > 0)
# end:
# not s, n # return !remainder
# halt 0
EDIT 1: removed squaring test, return !remainder directly, save 3 ops per test
EDIT 2: use n as the remainder directly, save 1 op per test
EDIT 3: simplified the loop condition, save 32 ops per test
EDIT 4: unrolled the loop, save about 65 ops per test
Score: 344493
Does a simple binary search within the interval [1, 4294967296)
to approximate sqrt(n)
, then checks if n
is a perfect square.
mov b, 4294967296
mov c, -1
lesser:
add a, c, 1
start:
leu k, a, b
jz end, k
add c, a, b
shr c, c, 1
mulu d, e, c, c
leu e, d, n
jnz lesser, e
mov b, c
jmp start
end:
mulu d, e, b, b
cmp s, d, n
halt 0