Testing if a number is a square

Score: 27462

About time I'd compete in a GOLF challenge :D

    # First we look at the last 6 bits of the number. These bits must be
    # one of the following:
    #
    #     0x00, 0x01, 0x04, 0x09, 0x10, 0x11,
    #     0x19, 0x21, 0x24, 0x29, 0x31, 0x39
    #
    # That's 12/64, or a ~80% reduction in composites!
    #
    # Conveniently, a 64 bit number can hold 2**6 binary values. So we can
    # use a single integer as a lookup table, by shifting. After shifting
    # we check if the top bit is set by doing a signed comparison to 0.

    and b, n, 0b111111
    shl v, 0xc840c04048404040, b
    le q, v, 0
    jz no, q
    jz yes, n

    # Hacker's Delight algorithm - Newton-Raphson.
    mov c, 1
    sub x, n, 1
    geu q, x, 2**32-1
    jz skip32, q
    add c, c, 16
    shr x, x, 32
skip32:
    geu q, x, 2**16-1
    jz skip16, q
    add c, c, 8
    shr x, x, 16
skip16:
    geu q, x, 2**8-1
    jz skip8, q
    add c, c, 4
    shr x, x, 8
skip8:
    geu q, x, 2**4-1
    jz skip4, q
    add c, c, 2
    shr x, x, 4
skip4:
    geu q, x, 2**2-1
    add c, c, q

    shl g, 1, c
    shr t, n, c
    add t, t, g
    shr h, t, 1

    leu q, h, g
    jz newton_loop_done, q
newton_loop:
    mov g, h
    divu t, r, n, g
    add t, t, g
    shr h, t, 1
    leu q, h, g
    jnz newton_loop, q
newton_loop_done:
    
    mulu u, h, g, g
    cmp s, u, n 
    halt 0
yes:
    mov s, 1
no:
    halt 0

Score: 161558 227038 259038 260038 263068

I took the fastest integer square root algorithm I could find and return whether its remainder is zero.

# based on http://www.cc.utah.edu/~nahaj/factoring/isqrt.c.html
# converted to GOLF assembly for http://codegolf.stackexchange.com/questions/49356/testing-if-a-number-is-a-square

# unrolled for speed, original source commented out at bottom
start:
    or u, t, 1 << 62
    shr t, t, 1
    gequ v, n, u
    jz nope62, v
    sub n, n, u
    or t, t, 1 << 62
    nope62:

    or u, t, 1 << 60
    shr t, t, 1
    gequ v, n, u
    jz nope60, v
    sub n, n, u
    or t, t, 1 << 60
    nope60:

    or u, t, 1 << 58
    shr t, t, 1
    gequ v, n, u
    jz nope58, v
    sub n, n, u
    or t, t, 1 << 58
    nope58:

    or u, t, 1 << 56
    shr t, t, 1
    gequ v, n, u
    jz nope56, v
    sub n, n, u
    or t, t, 1 << 56
    nope56:

    or u, t, 1 << 54
    shr t, t, 1
    gequ v, n, u
    jz nope54, v
    sub n, n, u
    or t, t, 1 << 54
    nope54:

    or u, t, 1 << 52
    shr t, t, 1
    gequ v, n, u
    jz nope52, v
    sub n, n, u
    or t, t, 1 << 52
    nope52:

    or u, t, 1 << 50
    shr t, t, 1
    gequ v, n, u
    jz nope50, v
    sub n, n, u
    or t, t, 1 << 50
    nope50:

    or u, t, 1 << 48
    shr t, t, 1
    gequ v, n, u
    jz nope48, v
    sub n, n, u
    or t, t, 1 << 48
    nope48:

    or u, t, 1 << 46
    shr t, t, 1
    gequ v, n, u
    jz nope46, v
    sub n, n, u
    or t, t, 1 << 46
    nope46:

    or u, t, 1 << 44
    shr t, t, 1
    gequ v, n, u
    jz nope44, v
    sub n, n, u
    or t, t, 1 << 44
    nope44:

    or u, t, 1 << 42
    shr t, t, 1
    gequ v, n, u
    jz nope42, v
    sub n, n, u
    or t, t, 1 << 42
    nope42:

    or u, t, 1 << 40
    shr t, t, 1
    gequ v, n, u
    jz nope40, v
    sub n, n, u
    or t, t, 1 << 40
    nope40:

    or u, t, 1 << 38
    shr t, t, 1
    gequ v, n, u
    jz nope38, v
    sub n, n, u
    or t, t, 1 << 38
    nope38:

    or u, t, 1 << 36
    shr t, t, 1
    gequ v, n, u
    jz nope36, v
    sub n, n, u
    or t, t, 1 << 36
    nope36:

    or u, t, 1 << 34
    shr t, t, 1
    gequ v, n, u
    jz nope34, v
    sub n, n, u
    or t, t, 1 << 34
    nope34:

    or u, t, 1 << 32
    shr t, t, 1
    gequ v, n, u
    jz nope32, v
    sub n, n, u
    or t, t, 1 << 32
    nope32:

    or u, t, 1 << 30
    shr t, t, 1
    gequ v, n, u
    jz nope30, v
    sub n, n, u
    or t, t, 1 << 30
    nope30:

    or u, t, 1 << 28
    shr t, t, 1
    gequ v, n, u
    jz nope28, v
    sub n, n, u
    or t, t, 1 << 28
    nope28:

    or u, t, 1 << 26
    shr t, t, 1
    gequ v, n, u
    jz nope26, v
    sub n, n, u
    or t, t, 1 << 26
    nope26:

    or u, t, 1 << 24
    shr t, t, 1
    gequ v, n, u
    jz nope24, v
    sub n, n, u
    or t, t, 1 << 24
    nope24:

    or u, t, 1 << 22
    shr t, t, 1
    gequ v, n, u
    jz nope22, v
    sub n, n, u
    or t, t, 1 << 22
    nope22:

    or u, t, 1 << 20
    shr t, t, 1
    gequ v, n, u
    jz nope20, v
    sub n, n, u
    or t, t, 1 << 20
    nope20:

    or u, t, 1 << 18
    shr t, t, 1
    gequ v, n, u
    jz nope18, v
    sub n, n, u
    or t, t, 1 << 18
    nope18:

    or u, t, 1 << 16
    shr t, t, 1
    gequ v, n, u
    jz nope16, v
    sub n, n, u
    or t, t, 1 << 16
    nope16:

    or u, t, 1 << 14
    shr t, t, 1
    gequ v, n, u
    jz nope14, v
    sub n, n, u
    or t, t, 1 << 14
    nope14:

    or u, t, 1 << 12
    shr t, t, 1
    gequ v, n, u
    jz nope12, v
    sub n, n, u
    or t, t, 1 << 12
    nope12:

    or u, t, 1 << 10
    shr t, t, 1
    gequ v, n, u
    jz nope10, v
    sub n, n, u
    or t, t, 1 << 10
    nope10:

    or u, t, 1 << 8
    shr t, t, 1
    gequ v, n, u
    jz nope8, v
    sub n, n, u
    or t, t, 1 << 8
    nope8:

    or u, t, 1 << 6
    shr t, t, 1
    gequ v, n, u
    jz nope6, v
    sub n, n, u
    or t, t, 1 << 6
    nope6:

    or u, t, 1 << 4
    shr t, t, 1
    gequ v, n, u
    jz nope4, v
    sub n, n, u
    or t, t, 1 << 4
    nope4:

    or u, t, 1 << 2
    shr t, t, 1
    gequ v, n, u
    jz nope2, v
    sub n, n, u
    or t, t, 1 << 2
    nope2:

    or u, t, 1 << 0
    shr t, t, 1
    gequ v, n, u
    jz nope0, v
    sub n, n, u
    nope0:

end:
    not s, n        # return !remainder
    halt 0


# before unrolling...
#
# start:
#     mov b, 1 << 62  # squaredbit = 01000000...
# loop:               # do {
#     or u, b, t      #   u = squaredbit | root
#     shr t, t, 1     #   root >>= 1
#     gequ v, n, u    #   if remainder >= u:
#     jz nope, v
#     sub n, n, u     #       remainder = remainder - u
#     or t, t, b      #       root = root | squaredbit
# nope:
#     shr b, b, 2     #   squaredbit >>= 2
#     jnz loop, b      # } while (squaredbit > 0)
# end:
#     not s, n        # return !remainder
#     halt 0

EDIT 1: removed squaring test, return !remainder directly, save 3 ops per test

EDIT 2: use n as the remainder directly, save 1 op per test

EDIT 3: simplified the loop condition, save 32 ops per test

EDIT 4: unrolled the loop, save about 65 ops per test


Score: 344493

Does a simple binary search within the interval [1, 4294967296) to approximate sqrt(n), then checks if n is a perfect square.

mov b, 4294967296
mov c, -1

lesser:
    add a, c, 1

start:
    leu k, a, b
    jz end, k

    add c, a, b
    shr c, c, 1

    mulu d, e, c, c

    leu e, d, n
    jnz lesser, e
    mov b, c
    jmp start

end:
    mulu d, e, b, b
    cmp s, d, n

    halt 0