Smallest Bytecode Interpreter/VM
C, 752 (589+163 for define flags) * 0.5 (JIT) * 0.9 (extensions) * (0.75 optimization) * (0.64 seconds/2) = 81.216
C[S],J[S],i,j,k,y,c,X,Y,Z;char*R,a,b[9];main(x){R=mmap(0,S,6,34,-1,0);N=85;while(scanf("%c%s%*[ R]%d%*[ R]%d",&a,b,&x,&y)&&~getchar())a-=65,a-1?a-15?a-9?a?a-2?a-3?a-11?a-12?a-17?(N=41,v):(N=137,v):(N=137,u,N=247,g(H,4),N=139,u):(y?N=189+x,s(y):(N=51,g(G,G))):(N=137,u,N=247,g(H,6),N=139,u):(N=57,v,s(0xFC8A9F),--j):(N=1,v):(N=233,J[k++]=i,s(x)):b[1]-80?N=85+x:(N=93+x):(c=b[5],s(0x0F9EE78A),N=(c-69?c-71?c-76?1:8:11:0)+132,J[k++]=i,s(x)),C[++i]=j;U(E8,X)U(F0,Y)U(F8,Z)s(50013);i=j;while(k--)j=C[J[k]]+1,R[j-1]-233&&(j+=4),s(C[*(int*)(R+j)]-j-4);((int(*)())R)();printf("%u %u %u\n",X,Y,Z);}
Takes code (LOAD R0
, etc), no trailing characters, single space, no blank lines in the middle, no comments, etc. Trailing newline required.
This is then converted to 80386 bytecode and executed.
Loading 0
into a register is replaced by xor
ing the register with itself instead of mov
ing 0
into the register, which is three bytes shorter in the generated bytecode, and may be very marginally faster.
Compile with:
gcc -m32 -D"g(a,b)=(N=192|b<<3|a)"-D"s(b)=(*(int*)(R+j)=b,j+=4)"-DN=R[j++]-D"G=((x+1)|4)"
-D"H=((y+1)|4)"-DS=9999-D"u=g(0,G)"-D"v=g(G,H)"-D"U(b,c)=s(0xA3##b##89),--j,s(&c);"
bytecode.c -o bytecode
POSIX-compliant OS required.
Input is read from STDIN (use ./bytecode < file
to pipe from a file).
Resulting bytecode for the test program:
; start
0: 55 push %ebp
; LOAD R0 0
1: 33 ed xor %ebp,%ebp
; LOAD R2 0
3: 33 ff xor %edi,%edi
; LOAD R1 0
5: 33 f6 xor %esi,%esi
; PUSH $1
7: 56 push %esi
; MUL R1 R2
8: 89 f0 mov %esi,%eax
a: f7 e7 mul %edi
c: 8b f0 mov %eax,%esi
; PUSH R2
e: 57 push %edi
; LOAD R2 3
f: bf 03 00 00 00 mov $0x3,%edi
; DIV R1 R2
14: 89 f0 mov %esi,%eax
16: f7 f7 div %edi
18: 8b f0 mov %eax,%esi
; POP R2
1a: 5f pop %edi
; ADD R0 R1
1b: 01 f5 add %esi,%ebp
; POP R1
1d: 5e pop %esi
; PUSH R2
1e: 57 push %edi
; LOAD R2 1
1f: bf 01 00 00 00 mov $0x1,%edi
; ADD R1 R2
24: 01 fe add %edi,%esi
; POP R2
26: 5f pop %edi
; PUSH R2
27: 57 push %edi
; LOAD R2 10000
28: bf 10 27 00 00 mov $0x2710,%ed
; CMP R1 R2
2d: 39 fe cmp %edi,%esi
2f: 9f lahf
30: 8a fc mov %ah,%bh
; POP R2
32: 5f pop %edi
; BRANCHLT 3
33: 8a e7 mov %bh,%ah
35: 9e sahf
36: 0f 8c cb ff ff ff jl 0x7
; LOAD R1 1
3c: be 01 00 00 00 mov $0x1,%esi
; ADD R2 R1
41: 01 f7 add %esi,%edi
; LOAD R1 10000
43: be 10 27 00 00 mov $0x2710,%es
; CMP R2 R1
48: 39 f7 cmp %esi,%edi
4a: 9f lahf
4b: 8a fc mov %ah,%bh
; LOAD R1 0
4d: 33 f6 xor %esi,%esi
; BRANCHLT 2
4f: 8a e7 mov %bh,%ah
51: 9e sahf
52: 0f 8c ad ff ff ff jl 0x5
; copy R0 to X
58: 89 e8 mov %ebp,%eax
5a: a3 28 5b 42 00 mov %eax,0x425b
; copy R1 to Y
5f: 89 f0 mov %esi,%eax
61: a3 38 55 44 00 mov %eax,0x4455
; copy R2 to Z
66: 89 f8 mov %edi,%eax
68: a3 40 55 44 00 mov %eax,0x4455
; exit
6d: 5d pop %ebp
6e: c3 ret
Ungolfed:
C[9999],J[9999],i,j,k,y,c,X,Y,Z;
char *R,a,b[9];
main(x){
// 6 is PROC_WRITE|PROC_EXEC
// 34 is MAP_ANON|MAP_PRIVATE
R=mmap(0,'~~',6,34,-1,0);
N=0x55;
while(scanf("%c%s%*[ R]%d%*[ R]%d",&a,b,&x,&y)&&~getchar())
a-=65,
a-1? // B[RANCH**]
a-15? // P[USH/OP]
a-9? // J[MP]
a? // A[DD]
a-2? // C[MP]
a-3? // D[IV]
a-11? // L[OAD]
a-12? // M[UL]
a-17? // R[LOAD]
// SUB
(N=0x29,g(G,H))
:(N=0x89,g(G,H))
:(N=0x89,g(0,G),N=0xF7,g(H,4),N=0x8B,g(0,G))
:(y?N=0xBD+x,s(y):(N=0x33,g(G,G)))
:(N=0x89,g(0,G),N=0xF7,g(H,6),N=0x8B,g(0,G))
:(N=0x39,g(G,H),s(0xfc8a9f),--j)
:(N=0x1,g(G,H))
:(N=0xE9,J[k++]=i,s(x))
:b[1]-80?
N=0x55+x // PUSH
:(N=0x5D+x) // POP
:(c=b[5],s(0x0f9ee78a),N=(
c-69? // EQ
c-71? // GT
c-76? // LT
1 // NE
:8
:11
:0
)+0x84,J[k++]=i,s(x)),
C[++i]=j
;
// transfer registers to X,Y,Z
s(0xA3E889),--j,s(&X);
s(0xA3F089),--j,s(&Y);
s(0xA3F889),--j,s(&Z);
// pop and ret
s(0xC35D);
i=j;
// fix distances for jmp/branch**
while(k--)
j=C[J[k]]+1,R[j-1]-0xE9&&(j+=4),
s(C[*(int*)(R+j)]-j-4);
// call
((int(*)())R)();
// output
printf("%u %u %u\n",X,Y,Z);
}
C, Score = 854byte × (~0.8sec / 2) × 0.5 [JIT] × 0.9 [Extensions] = ~154byte sec
#define G getchar()
#define L for(i=0;i<3;++i)
#define N*(int*)
#define M(x)"P\x8a\xe7\x9e\xf"#x" KL"
*T[1<<20],**t=T,*F[1<<20],**f=F,R[3],r[]={1,6,7};char*I[]={"L\xb8 GGJH","I\x8b\xc0HHGH","H\x50GG","H\x58GG","I\3\xc0HHGH","I\53\xc0HHGH","M\x8b\xc0\xf7\xe0\x8b\xc0IHLGJ","O\63\xd2\x8b\xc0\xf7\xf0\x8b\xc0IJNGL","L\xe9 KH","L\73\xc0\x9f\x8a\xfcHHGH",M(\x82),M(\x84),M(\x87),M(\x85)},C[1<<24],*c=C;main(i,o,l,g){N c=0xb7ec8b60;c[4]=70;c+=5;while((o=G)>=0){char*s=I[o];l=*s-'G';memcpy(c,s+1,l);for(s+=l+1;o=*s++;){o-='G';if(o<3){g=r[G];c[*s++-'G']|=g<<3*(o&1);if(o>1)c[*s++-'G']|=g<<3;}else{if(o>3)*f++=c+*s-'G';for(i=4;i;--i)c[*s-'G'+i-1]=G;++s;}}*t++=c;c+=l;}*t=c;while(f>F)--f,**f=(int)T[**f]-(int)*f-4;L N&c[7*i]=0x5893e|r[i]<<19,N&c[3+7*i]=R+i;N&c[21]=0xc361e58b;mprotect((int)C>>12<<12,1<<24,7);((void(*)())C)();L printf("R%d %u\n",i,R[i]);}
Compile with gcc vm.c -ovm -m32 -w
on a x86 POSIX compliant OS.
Run with ./vm < program
, where program
is a binary program file.
Going for the speed. The program performs a pretty straight-forward translation of the input program to x86 machine code and lets the CPU do the rest.
For instance, here's the translation of the test program.
ecx
, esi
and edi
correspond to R0
, R1
and R2
, respectively; bh
holds the status flags; eax
and edx
are scratch registers; the call-stack corresponds to the VM's stack:
# Prologue
0: 60 pusha
1: 8b ec mov ebp,esp
3: b7 46 mov bh,0x46
# LOAD R0 0
5: b9 00 00 00 00 mov ecx,0x0
# LOAD R2 0 <--outer loop value
a: bf 00 00 00 00 mov edi,0x0
# LOAD R1 0 <--inner loop value
f: be 00 00 00 00 mov esi,0x0
# --Begin inner loop--
# PUSH R1 <--push inner loop value to the stack
14: 56 push esi
# MUL R1 R2 <--(i*j)
15: 8b c6 mov eax,esi
15: f7 e7 mul edi
19: 8b f0 mov esi,eax
# PUSH R2
1b: 57 push edi
# LOAD R2 3
1c: bf 03 00 00 00 mov edi,0x3
# DIV R1 R2 <-- / 3
21: 33 d2 xor edx,edx
23: 8b c6 mov eax,esi
25: f7 f7 div edi
27: 8b f0 mov esi,eax
# POP R2
29: 5f pop edi
# ADD R0 R1 <-- s+=
2a: 03 ce add ecx,esi
# POP R1
2c: 5e pop esi
# PUSH R2
2d: 57 push edi
# LOAD R2 1
2e: bf 01 00 00 00 mov edi,0x1
# ADD R1 R2 <--j++
33: 03 f7 add esi,edi
# POP R2
35: 5f pop edi
# PUSH R2
36: 57 push edi
# LOAD R2 10000
37: bf 10 27 00 00 mov edi,0x2710
# CMP R1 R2 <-- j < 10000
3c: 3b f7 cmp esi,edi
3e: 9f lahf
3f: 8a fc mov bh,ah
# POP R2
41: 5f pop edi
# BRANCHLT 4 <--Go back to beginning inner loop
42: 8a e7 mov ah,bh
44: 9e sahf
45: 0f 82 c9 ff ff ff jb 0x14
# --Drop To outer loop--
# LOAD R1 1
4b: be 01 00 00 00 mov esi,0x1
# ADD R2 R1 <--i++
50: 03 fe add edi,esi
# LOAD R1 10000
52: be 10 27 00 00 mov esi,0x2710
# CMP R2 R1 <-- i < 10000
57: 3b fe cmp edi,esi
59: 9f lahf
5a: 8a fc mov bh,ah
# LOAD R1 0 <--Reset inner loop
5c: be 00 00 00 00 mov esi,0x0
# BRANCHLT 3
61: 8a e7 mov ah,bh
63: 9e sahf
64: 0f 82 a5 ff ff ff jb 0xf
# Epilogue
6a: 3e 89 0d 60 ac 04 09 mov DWORD PTR ds:0x904ac60,ecx
71: 3e 89 35 64 ac 04 09 mov DWORD PTR ds:0x904ac64,esi
78: 3e 89 3d 68 ac 04 09 mov DWORD PTR ds:0x904ac68,edi
7f: 8b e5 mov esp,ebp
81: 61 popa
82: c3 ret
Ungolfed
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <sys/mman.h>
#define MAX_INST_COUNT (1 << 20)
#define MAX_CODE_SIZE (8 * MAX_INST_COUNT)
#define PAGE_SIZE (1 << 12)
uintptr_t *targets[MAX_INST_COUNT], **targets_ptr = targets;
int32_t *fixups[MAX_INST_COUNT], **fixups_ptr = fixups;
char code[MAX_CODE_SIZE], *code_ptr = code;
uint32_t R[3];
const char* inst_defs[] = {
/* LOAD */ "\5\xb8 \1\0\4\1",
/* RLOAD */ "\2\x8b\xc0\2\1\1\1",
/* PUSH */ "\1\x50\1\0",
/* POP */ "\1\x58\1\0",
/* ADD */ "\2\x03\xc0\2\1\1\1",
/* SUB */ "\2\x2b\xc0\2\1\1\1",
/* MUL */ "\6\x8b\xc0\xf7\xe0\x8b\xc0\3\1\5\1\3",
/* DIV */ "\x8\x33\xd2\x8b\xc0\xf7\xf0\x8b\xc0\3\3\7\1\5",
/* JMP */ "\5\xe9 \5\1",
/* CMP */ "\5\x3b\xc0\x9f\x8a\xfc\2\1\1\1",
/* BRANCHLT */ "\x9\x8a\xe7\x9e\x0f\x82 \5\5",
/* BRANCHEQ */ "\x9\x8a\xe7\x9e\x0f\x84 \5\5",
/* BRANCHGT */ "\x9\x8a\xe7\x9e\x0f\x87 \5\5",
/* BRANCHNE */ "\x9\x8a\xe7\x9e\x0f\x85 \5\5"
};
int main() {
int i;
{
const char prologue[] =
/* PUSHAD */ "\x60"
/* MOV EBP, ESP */ "\x8b\xec"
/* MOV BH, 46h */ "\xb7\x46"
;
memcpy(code_ptr, prologue, sizeof(prologue) - 1);
code_ptr += sizeof(prologue) - 1;
}
{
const char reg[] = {1, 6, 7};
char r;
int op;
while ((op = getchar()) != EOF) {
const char* def = inst_defs[op];
int len = def[0];
memcpy(code_ptr, def + 1, len);
for (def += len + 1; *def; ) {
switch (*def++) {
case 1: code_ptr[*def++] |= reg[getchar()]; break;
case 2: code_ptr[*def++] |= reg[getchar()] << 3; break;
case 3:
r = reg[getchar()];
code_ptr[*def++] |= r; code_ptr[*def++] |= r << 3;
break;
case 5: *fixups_ptr = code_ptr + *def; ++fixups_ptr;
case 4:
for (i = 4; i; --i) code_ptr[*def + i - 1] = getchar();
++def;
break;
}
}
*targets_ptr = code_ptr; ++targets_ptr;
code_ptr += len;
}
*targets_ptr = code_ptr; ++targets_ptr;
while (fixups_ptr != fixups) {
--fixups_ptr;
**fixups_ptr = (char*)targets[**fixups_ptr] - (char*)*fixups_ptr - 4;
}
}
{
const char epilogue[] =
/* MOV R[0], ECX */ "\x3e\x89\x0d "
/* MOV R[1], ESI */ "\x3e\x89\x35 "
/* MOV R[2], EDI */ "\x3e\x89\x3d "
/* MOV ESP, EBP */ "\x8b\xe5"
/* POPAD */ "\x61"
/* RET */ "\xc3"
;
memcpy(code_ptr, epilogue, sizeof(epilogue) - 1);
for (i = 0; i < 3; ++i) *(uintptr_t*)&code_ptr[3 + 7 * i] = &R[i];
code_ptr += sizeof(epilogue) - 1;
}
mprotect(
(uintptr_t)code / PAGE_SIZE * PAGE_SIZE, MAX_CODE_SIZE,
PROT_READ | PROT_WRITE | PROT_EXEC
);
{ void (*program)(void) = code; program(); }
for (i = 0; i < 3; ++i) printf("R%d %u\n", i, R[i]);
return 0;
}
CJam, 222 187 185 bytes * (too slow / 2)
I just wanted to see how short I can get a bytecode VM by writing it in CJam. Less than 200 bytes seems pretty decent. It's damn slow though, because CJam itself is interpreted. It takes ages to run the test program.
304402480 6b:P;q:iD-);{(_P=@/(\L*@@+\}h;]:P;TTT]:R;{_Rf=~}:Q;{4G#%R@0=@t:R;}:O;{TP=("R\(\GG*bt:R; ~R= R\~@t:R; Q+O Q4G#+-O Q*O Q/O ~(:T; Rf=~-:U; GG*bU0<{(:T}*;"S/=~T):TP,<}g3,{'R\_S\R=N}/
To run it, download the Java interpreter at this sourceforge link, save the code in vm.cjam
and run it with
java -jar cjam-0.6.2.jar vm.cjam
The program expects the bytecode on STDIN. I haven't found a way yet to pipe binary data into a program, without PowerShell adding a trailing line break and converting 0x0a
to 0x0d 0x0a
, which is really annoying. The code includes 4 bytes to fix that (D-);
), which I haven't included in the total count, because it's not something the program should need to do if it actually received the bytecode itself on STDIN, instead of some weirdly encoded version of it. If someone knows a fix for that, please let me know.
Slightly ungolfed:
304402480 6b:P; "Create lookup table for instruction sizes. Store in P.";
q:i "Read program and convert bytes to integers.";
D-); "Remove spurious carriage returns. This shouldn't be necessary.";
{(_P=@/(\L*@@+\}h;]:P; "Split into instructions. Store in P.";
"We'll use T for the instruction pointer as it's initialised to 0.";
"Likewise, we'll use U for the CMP flag.";
TTT]:R; "Store [0 0 0] in R for the registers.";
{_Rf=~}:Q; "Register lookup block.";
{4G#%R@0=@t:R;}:O; "Save in register block.";
{TP=("R\(\GG*bt:R;
~R=
R\~@t:R;
Q+O
Q4G#+-O
Q*O
Q/O
~(:T;
Rf=~-:U;
GG*bU0<{(:T}*;"N/=~T):TP,<}g "Run program.";
3,{'R\_S\R=N}/
I'll add a proper explanation tomorrow.
In short, I'm storing all the registers, the instruction pointer and the comparison flag in variables, so that I can keep CJam's stack free to use as the VM's stack.