Write a brainfuck compiler
Python, 1974 chars
import sys
s='\x11\x75\x30\xbc\x08\x4b\x03\x3c'
k=[]
for c in sys.stdin.read():
if'>'==c:s+='\x84\x01\x01'
if'<'==c:s+='\x84\x01\xff'
if'+'==c:s+='\x2a\x1b\x5c\x33\x04\x60\x91\x54'
if'-'==c:s+='\x2a\x1b\x5c\x33\x04\x64\x91\x54'
if'['==c:k+=[len(s)];s+='\x2a\x1b\x33\x99\x00\x00'
if']'==c:a=k[-1];k=k[:-1];d=len(s)-a;s=s[:a+4]+'%c%c'%(d>>8,d&255)+s[a+6:]+'\xa7%c%c'%(-d>>8&255,-d&255)
if','==c:s+='\x2a\x1b\xb2\x00\x02\xb6\x00\x03\x91\x54'
if'.'==c:s+='\xb2\x00\x04\x59\x2a\x1b\x33\xb6\x00\x05\xb6\x00\x06'
s+='\xb1'
n=len(s)
sys.stdout.write('\xca\xfe\xba\xbe\x00\x03\x00-\x00+\n\x00\x08\x00\x13\t\x00\x14\x00\x15\n\x00\x16\x00\x17\t\x00\x14\x00\x18\n\x00\x19\x00\x1a\n\x00\x19\x00\x1b\x07\x00\x1c\x07\x00\x1d\x01\x00\x06<init>\x01\x00\x03()V\x01\x00\x04Code\x01\x00\x0fLineNumberTable\x01\x00\x04main\x01\x00\x16([Ljava/lang/String;)V\x01\x00\nExceptions\x07\x00\x1e\x01\x00\nSourceFile\x01\x00\x06B.java\x0c\x00\t\x00\n\x07\x00\x1f\x0c\x00 \x00!\x07\x00"\x0c\x00#\x00$\x0c\x00%\x00&\x07\x00\'\x0c\x00(\x00)\x0c\x00*\x00\n\x01\x00\x01B\x01\x00\x10java/lang/Object\x01\x00\x13java/io/IOException\x01\x00\x10java/lang/System\x01\x00\x02in\x01\x00\x15Ljava/io/InputStream;\x01\x00\x13java/io/InputStream\x01\x00\x04read\x01\x00\x03()I\x01\x00\x03out\x01\x00\x15Ljava/io/PrintStream;\x01\x00\x13java/io/PrintStream\x01\x00\x05write\x01\x00\x04(I)V\x01\x00\x05flush\x00!\x00\x07\x00\x08\x00\x00\x00\x00\x00\x02\x00\x01\x00\t\x00\n\x00\x01\x00\x0b\x00\x00\x00\x1d\x00\x01\x00\x01\x00\x00\x00\x05*\xb7\x00\x01\xb1\x00\x00\x00\x01\x00\x0c\x00\x00\x00\x06\x00\x01\x00\x00\x00\x03\x00\t\x00\r\x00\x0e\x00\x02\x00\x0b\x00\x00'+'%c%c'%((n+60)>>8,(n+60)&255)+'\x00\x04\x00\x03\x00\x00'+'%c%c'%(n>>8,n&255)+s+'\x00\x00\x00\x01\x00\x0c\x00\x00\x00*\x00\n\x00\x00\x00\x05\x00\x06\x00\x06\x00\x08\x00\t\x00\x0b\x00\x0b\x00\x13\x00\r\x00\x1d\x00\x0f\x00&\x00\x11\x00,\x00\x12\x002\x00\x14\x008\x00\x15\x00\x0f\x00\x00\x00\x04\x00\x01\x00\x10\x00\x01\x00\x11\x00\x00\x00\x02\x00\x12')
Below are the translations to java bytecode. local 0 is a byte array representing the tape, local 1 is the data pointer.
> iinc 1,+1
< iinc 1,-1
+ aload_0;iload_1;dup2;baload;iconst_1;iadd;i2b;bastore
- aload_0;iload_1;dup2;baload;iconst_1;isub;i2b;bastore
[ aload_0;iload_1;baload;ifeq xx xx
] goto xx xx
, aload_0;iload_1;getstatic #2;invokevirtual #3;i2b;bastore
. getstatic #4;dup;aload_0;iload_1;baload;invokevirtual #5;invokevirtual #6
The xx xx
are offsets to reach the matching bracket. #2 is System.in
, #3 is read()
, #4 is System.out
, #5 is write()
, and #6 is flush()
.
The preamble allocates a 30000 byte array and initializes the tape position to 0.
The giant wrapper at the end was generated by compiling a dummy B.java
file with code for one of each opcode (to induce generation of the correct constant tables and other junk), then performing delicate surgery on it.
Run it like
python bfc.py < input.b > B.class
java B
Disassemble with
javap -c B
I'm sure it could be golfed some more. I'm just happy it works...
C, 866 783 bytes
Since my code outputs 32 bit ELF executable I can't promise that it will work on everyones setup. It took enough tweaking to get the executable to stop segfaulting on my computer.
For anyone trying to run this:
$ uname --all
Linux 4.4.0-24-generic #43-Ubuntu SMP Wed Jun 8 19:27:37 UTC 2016 x86_64 x86_64 x86_64 GNU/Linux
A Brainfuck program is read from stdin and the compiled ELF is written to stdout.
#define P *(t++)
#define C case
#define B break
char a[30000],b[65535],f,*t=b;*c[100];**d=c;main(g){P=188;t+=4;while((f=getchar())!=-1)switch(f){C'>':P=68;B;C'<':P=76;B;C'+':P=254;P=4;P=36;B;C'-':P=254;P=12;P=36;B;C'.':P=187;t+=4;P=137;P=225;P=186;P=1;t+=3;P=184;P=4;t+=3;P=205;P=128;B;C',':P=187;P=1;t+=3;P=137;P=225;P=186;P=1;t+=3;P=184;P=3;t+=3;P=205;P=128;B;C'[':P=138;P=4;P=36;P=133;P=192;P=15;P=132;t+=4;*d=(int*)t-1;d++;B;C']':P=138;P=4;P=36;P=133;P=192;P=15;P=133;t+=4;d--;g=((char*)(*d+1))-t;*((int*)t-1)=g;**d=-g;B;}P=184;P=1;t+=3;P=187;t+=4;P=205;P=128;*(int*)(b+1)=0x8048054+t-b;long long z[]={282579962709375,0,4295163906,223472812116,0,4297064500,4294967296,577727389698621440,36412867248128,30064779550,140720308490240};write(1,&z,84);write(1,b,t-b);write(1,a,30000);}
Ungolfed
In the ungolfed version of the code, you can get a better idea of what's going on. The character array at the end of the golfed code is an encoding of the ELF and program header in the ungolfed code. This code also show how each Brainfuck instruction is translated into bytecode.
#include <linux/elf.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#include <stdio.h>
#include <stdint.h>
#define MAX_BIN_LEN 65535
#define MAX_JUMPS 100
unsigned int org = 0x08048000;
unsigned char move_right[] = {0x44}; /*inc esp */
unsigned char move_left[] = {0x4c}; /*dec esp */
unsigned char inc_cell[] = {0xfe,0x04,0x24}; /*inc [esp] */
unsigned char dec_cell[] = {0xfe,0x0c,0x24}; /*dec [esp] */
unsigned char read_char[] = {0xbb,0x00,0x00,0x00,0x00, /*mov ebx, 0 */
0x89,0xe1, /*mov ecx, esp */
0xba,0x01,0x00,0x00,0x00, /*mov edx, 1 */
0xb8,0x03,0x00,0x00,0x00, /*mov eax, 3 */
0xcd,0x80}; /*int 0x80 */
unsigned char print_char[] = {0xbb,0x01,0x00,0x00,0x00, /*mov ebx, 1 */
0x89,0xe1, /*mov ecx, esp */
0xba,0x01,0x00,0x00,0x00, /*mov edx, 1 */
0xb8,0x04,0x00,0x00,0x00, /*mov eax, 4 */
0xcd,0x80}; /*int 0x80 */
unsigned char loop_start[] = {0x8a,0x04,0x24, /*mov eax, [esp] */
0x85,0xc0, /*test eax, eax */
0x0f,0x84,0x00,0x00,0x00,0x00}; /*je int32_t */
unsigned char loop_end[] = {0x8a,0x04,0x24, /*mov eax, [esp] */
0x85,0xc0, /*test eax, eax */
0x0f,0x85,0x00,0x00,0x00,0x00}; /*jne int32_t */
unsigned char call_exit[] = {0xb8,0x01,0x00,0x00,0x00, /*mov eax, 1 */
0xbb,0x00,0x00,0x00,0x00, /*mov ebx, 0 */
0xcd,0x80}; /*int 0x80 */
unsigned char prelude[] = {0xbc,0x00,0x00,0x00,0x00}; /*mov esp, int32_t*/
unsigned char tape[100];
int main(){
unsigned char text[MAX_BIN_LEN];
unsigned char *txt_ptr = text;
int32_t *loop_jmps[MAX_JUMPS];
int32_t **loop_jmps_ptr = loop_jmps;
Elf32_Off entry;
entry = org + sizeof(Elf32_Ehdr) + 1 * sizeof(Elf32_Phdr);
memcpy(txt_ptr,prelude,sizeof(prelude));
txt_ptr += sizeof(prelude);
char input;
while((input = getchar()) != -1){
switch(input){
case '>':
memcpy(txt_ptr,move_right,sizeof(move_right));
txt_ptr += sizeof(move_right);
break;
case '<':
memcpy(txt_ptr,move_left,sizeof(move_left));
txt_ptr += sizeof(move_left);
break;
case '+':
memcpy(txt_ptr,inc_cell,sizeof(inc_cell));
txt_ptr += sizeof(inc_cell);
break;
case '-':
memcpy(txt_ptr,dec_cell,sizeof(dec_cell));
txt_ptr += sizeof(dec_cell);
break;
case '.':
memcpy(txt_ptr,print_char,sizeof(print_char));
txt_ptr += sizeof(print_char);
break;
case ',':
memcpy(txt_ptr,read_char,sizeof(read_char));
txt_ptr += sizeof(read_char);
break;
case '[':
memcpy(txt_ptr,loop_start,sizeof(loop_start));
txt_ptr += sizeof(loop_start);
*loop_jmps_ptr = (int32_t*) txt_ptr - 1;
loop_jmps_ptr++;
break;
case ']':
memcpy(txt_ptr,loop_end,sizeof(loop_end));
txt_ptr += sizeof(loop_end);
loop_jmps_ptr--;
int32_t offset = ((unsigned char*) (*loop_jmps_ptr + 1)) - txt_ptr;
*((int32_t*)txt_ptr - 1) = offset;
**loop_jmps_ptr = -offset;
break;
}
}
memcpy(txt_ptr,call_exit,sizeof(call_exit));
txt_ptr += sizeof(call_exit);
*(int32_t*)(text + 1) = entry + (txt_ptr - text);
Elf32_Ehdr ehdr = {
{0x7F,'E','L','F',ELFCLASS32,ELFDATA2LSB,EV_CURRENT,0,0,0,0,0,0,0,0,0},
ET_EXEC,
EM_386,
EV_CURRENT,
entry,
sizeof(Elf32_Ehdr),
0,
0,
sizeof(Elf32_Ehdr),
sizeof(Elf32_Phdr),
1,
0,
0,
SHN_UNDEF,
};
Elf32_Phdr phdr = {
PT_LOAD,
0,
org,
org,
sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) + (txt_ptr - text),
sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) + (txt_ptr - text),
PF_R | PF_X | PF_W,
0x1000,
};
int out = open("a.out",O_CREAT|O_TRUNC|O_WRONLY,S_IRWXU);
write(out,&ehdr,sizeof(Elf32_Ehdr));
write(out,&phdr,sizeof(Elf32_Phdr));
write(out,text,txt_ptr-text);
write(out,tape,sizeof(tape));
close(out);
}
Self Modifying BrainFuck
In order to save on bytes, the tape for my compiler isn't allocated in a .bss
section or anything fancy like that. Instead, the tape is 30,000 null bytes written directly after the compiled byte code of the Brainfuck program. Knowing this, and being aware of what byte code is generated by my compiler means that you can generate or modify byte code at runtime. A simple illustration of this 'feature' is a Brainfuck program that sets its own exit value.
<<<<<<+
The program goes off the left edge of the tape into the byte code to the point that the exit code is normally set 0. Incrementing this byte causes the exit code to be set to 1 instead of 0 when the program eventually exits. With persistence, this could be used to do system level programming in Brainfuck.
16-bit x86 assembly code, 104 bytes
This code is from 2014, but I just found the task.
;compliant version, non-commands are ignored, but 104 bytes long
[bits 16]
[org 0x100]
; assume bp=091e used
; assume di=fffe
; assume si=0100
; assume dx=cs (see here)
; assume cx=00ff
; assume bx=0000
; assume ax=0000 used (ah)
; assume sp=fffe
start:
mov al, code_nothing - start
code_start:
mov ch, 0x7f ; allow bigger programs
mov bx, cx
mov di, cx
rep stosb
mov bp, find_right + start - code_start ;cache loop head for smaller compiled programs
jmp code_start_end
find_right:
pop si
dec si
dec si ;point to loop head
cmp [bx], cl
jne loop_right_end
loop_right:
lodsb
cmp al, 0xD5 ; the "bp" part of "call bp" (because 0xFF is not unique, watch for additional '[')
jne loop_left
inc cx
loop_left:
cmp al, 0xC3 ; ret (watch for ']')
jne loop_right
loop loop_right ;all brackets matched when cx==0
db 0x3c ;cmp al, xx (mask push)
loop_right_end:
push si
lodsw ; skip "call" or dummy "dec" instruction, depending on context
push si
code_sqright:
ret
code_dec:
dec byte [bx]
code_start_end:
db '$' ;end DOS string, also "and al, xx"
code_inc:
inc byte [bx]
db '$'
code_right:
inc bx ;al -> 2
code_nothing:
db '$'
code_left:
dec bx
db '$'
code_sqleft:
call bp
db '$'
; create lookup table
real_start:
inc byte [bx+'<'] ;point to code_left
dec byte [bx+'>'] ;point to code_right
mov byte [bx+'['], code_sqleft - start
mov byte [bx+']'], code_sqright - start
lea sp, [bx+45+2] ;'+' + 4 (2b='+', 2c=',', 2d='-', 2e='.')
push (code_dec - start) + (code_dot - start) * 256
push (code_inc - start) + (code_comma - start) * 256
pre_write:
mov ah, code_start >> 8
xchg dx, ax
; write
mov ah, 9
int 0x21
; read
code_comma:
mov dl, 0xff
db 0x3d ; cmp ax, xxxx (mask mov)
code_dot:
mov dl, [bx]
mov ah, 6
int 0x21
mov [bx], al
db '$'
db 0xff ; parameter for '$', doubles as test for zero
; switch
xlatb
jne pre_write
; next two lines can also be removed
; if the program ends with extra ']'
; and then we are at 100 bytes... :-)
the_end:
mov dl, 0xC3
int 0x21
int 0x20