After slaving over a warm laptop (while getting lost on the train), I re-wrote my bf interpreter to use a kind of psudo-bytecode to optimise jumps, and big sets of increments and movement operations. Below is the new code.


""" a simple (and hopefully easy to understand)
Brainfuck interpreter written in Python.

Official page:
Program archive: .

Copyright (C) 2007  Matthew Davey

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  
02110-1301, USA.

from time import time
import sys
import re
import os

class BytecodeInstruction:

    OP_MOV        = 1  
    OP_INC        = 2
    OP_LOOP_END   = 4
    OP_INPUT      = 5
    OP_OUTPUT     = 6

    def __init__(self, opcode, value):
        self.opcode = opcode
        self.value  = value

    def __str__(self):
        if self.opcode == self.OP_MOV:
            return "MOVE %d" % (self.value,)
        elif self.opcode == self.OP_INC:
            return "INCREMENT %d" % (self.value,)
        elif self.opcode == self.OP_LOOP_START:
            return "OPEN LOOP (END: %d)" % (self.value,)
        elif self.opcode == self.OP_LOOP_END:
            return "CLOSE LOOP (START: %d)" % (self.value,)
        elif self.opcode == self.OP_INPUT:
            return "INPUT"
        elif self.opcode == self.OP_OUTPUT:
            return "OUTPUT"

class BytecodeException(Exception):

class BrainfuckedInterpreter:

    def compile_to_bytecode(self, program):
        Turn the passed brainfuck program into our pretend bytecode  

        TODO: Check for NO_OPs  (OP_MOV, 0)  or  (OP_INC, 0)
        TODO: Try to find some patterns that can be expressed simplier

        # Remove all non-instructions
        program = re.findall("[[\]<>+-.,]", program)

        # Holds the generated bytecode
        bytecode = []

        # Prime the byte code to simplify the checks when compiling the first
        # instruction.  NOTE:  This should be remove at the end if it's a NOOP
        bytecode.append(BytecodeInstruction(BytecodeInstruction.OP_MOV, 0))

        # A stack for open brackets.  Holds the bytecode location, no program_data
        bracket_stack  = []

        for instruction in program:

            if instruction in ['+', '-']:

                # If the last instruction wasn't a OP_INC, then create a new
                # OP_INC instruction initilised to '0'
                if bytecode[-1].opcode != BytecodeInstruction.OP_INC:
                    bytecode.append(BytecodeInstruction(BytecodeInstruction.OP_INC, 0))

                if instruction == '+':
                    bytecode[-1].value += 1
                    bytecode[-1].value -= 1

            elif instruction in ['>', '<']:

                # Just like above, if the previous instruction was the same
                # type, then just change the amoutn moved                
                if bytecode[-1].opcode != BytecodeInstruction.OP_MOV:
                    bytecode.append(BytecodeInstruction(BytecodeInstruction.OP_MOV, 0))

                if instruction == '>':
                    bytecode[-1].value += 1
                    bytecode[-1].value -= 1

            elif instruction == ',':
                bytecode.append(BytecodeInstruction(BytecodeInstruction.OP_INPUT, False))

            elif instruction == '.':
                bytecode.append(BytecodeInstruction(BytecodeInstruction.OP_OUTPUT, False))

            elif instruction == '[':

                # We don't know where to jump to yet, so just store False
                bytecode.append(BytecodeInstruction(BytecodeInstruction.OP_LOOP_START, False))

                # Use a stack to store our current location so we can match
                # up brackets correctly

            elif instruction == ']':

                # The location (bytecode, not program) of the open bracket
                    bracket_location = bracket_stack.pop()
                except IndexError:
                    raise BytecodeException("Unmatched ']' encoutered")

                # Add the closeing bracket here, and point it to the location
                # of the opening bracket               
                bytecode.append(BytecodeInstruction(BytecodeInstruction.OP_LOOP_END, bracket_location))

                # Now, we go back to the open bracket and fill in the location
                # that is should jump too when the cell = 0
                bytecode[bracket_location].value = len(bytecode) - 1

        # Make sure all the brackets have been matched
        if len(bracket_stack) != 0:
            raise BytecodeException("Unmatched '[' encoutered")

        return bytecode

    def run_bytecode(self, bytecode, use_stdout = False, debug = False):

        # Program output
        output = ''

        # Out Tape/Memory
        memory         = [0]
        memory_pointer = 0

        # How we are doing processing the bytecode
        bytecode_length  = len(bytecode)
        bytecode_pointer = 0

        while bytecode_pointer < bytecode_length:

            instruction = bytecode[bytecode_pointer]

            if debug:
                print instruction

            if instruction.opcode == BytecodeInstruction.OP_INC:
                memory[memory_pointer] += instruction.value

            elif instruction.opcode == BytecodeInstruction.OP_MOV:

                # Can't move before cell: 0
                if memory_pointer + instruction.value < 0:
                    raise Exception("Tried to move before the start of the memory block")

                # Are we going past the end of the list?  Then we need to expand it
                if memory_pointer + instruction.value > len(memory)-1:

                    # Wow, talk about naive :)
                    # FIXME later
                    for i in range(0, instruction.value):

                memory_pointer += instruction.value

            elif instruction.opcode == BytecodeInstruction.OP_LOOP_START:
                if memory[memory_pointer] == 0:
                    bytecode_pointer = instruction.value

            elif instruction.opcode == BytecodeInstruction.OP_LOOP_END:
                if memory[memory_pointer] != 0:
                    bytecode_pointer = instruction.value

            elif instruction.opcode == BytecodeInstruction.OP_INPUT:
                char =

                if char == '':
                    memory[memory_pointer] = 0
                    # Remember to turn character 'A' into it's ASCII number
                    memory[memory_pointer] = ord(char)            

            elif instruction.opcode == BytecodeInstruction.OP_OUTPUT:     
                output += chr(memory[memory_pointer])

                if use_stdout:

            if debug:
                print memory

            bytecode_pointer += 1

        return output

if __name__ == '__main__':

    usage = "Usage: %s <filename> [enable_timer: True | False]" % (sys.argv[0],)
    timer = False

    if len(sys.argv) not in [2,3] or sys.argv[1] == 'help':
        print usage

    if not os.path.exists(sys.argv[1]):
        print usage
        print "File not found"

        program = open(sys.argv[1]).read()
    except Exception, e:
        print usage
        print "Unable to open file: %s" % (e.__str__(),)

    if len(sys.argv) == 3:
        if sys.argv[2] not in ['True', 'False']:
            print usage
            print "Second argument must be 'True' or 'False'"

        if sys.argv[2] == 'True':
            timer = True

    if timer:
        start_time = time()

    bf = BrainfuckedInterpreter()

    bytecode = bf.compile_to_bytecode(program)

    if timer:
        compile_time = time()

    bf.run_bytecode(bytecode, use_stdout=True)
    # print bf.run_bytecode(bytecode, use_stdout=False)

    if timer:
        print "Elapsed time: %0.2f  Compile time: %0.2f" % (time() - start_time, compile_time - start_time)

Now, proof that all my work paid off…

[email protected]:~/brainfucked$ time echo 50 | python
Primes up to: 2 3 5 7 11 13 17 19 23 29 31 37 41 43 47

real    0m13.706s
user    0m13.653s
sys     0m0.028s
[email protected]:~/brainfucked$ time echo 50 | python
Primes up to: 2 3 5 7 11 13 17 19 23 29 31 37 41 43 47

real    0m16.305s
user    0m16.293s
sys     0m0.000s

I can assure you, that’s not what I expected either.

‘Optimsing’ this program has actually taught me a valuable lesson: ‘Never assume you know what needs fixing or speeding up’. I added more complexity to this program trying to improve it’s speed without once profiling or even adding a single extra time() statement. I blindly assumed I knew what the problem was, and dived straight into fixing it without another thought.

Still, it will make it easier to implement a Ook!, whitespace, or other Brainfuck derivatives now :-)