// JAMMER firmware for JAM3D dynamic translation
//    Chris Fallin <cfallin> 2008-02-14
        
.include defs.inc

// jump vectors at addr 0
        jmp main
        jmp trap_handler
        jmp uflow_handler
        jmp oflow_handler

// global data
malloc_ptr:
        0x1000

// JAM3D regs: vars stays in r9, pc is implicit, other two are here
frame:  0x7000
optop:  0x7000

main:
        // set up I, L
        ldim ra0
        0x0090   // I = 0, L = 9 (r0 - r8 is stack, r9 is vars)
        il ra0
        addi ra0 0      // NOP -- ensure I/L update makes it through before
        addi ra0 0      // NOP -- the jmp below

        // start it off!
	ldim ra9	// vars
	0x7000
        jmp 0x7000

xlate_insn:
        0               // tmp storage

trap_handler:
        // we have r10 - r14 at our disposal; r15 is return addr

        // we have the following setup:
        // r10 = scratch
        // r11 = translated code ptr (ptr to next free mem loc) -
        //       save to malloc_ptr when done
        // r12 = jam3d code ptr (ptr to next insn to translate)
        // r13 = remaining insns in block to translate
        // r14 = scratch
        // r15 = return addr
        
        ldim ra14
        malloc_ptr
        ld ra11 rb14    // r11 = [malloc_ptr]
	mov ra12 rb15	// r12 = r15
        ldim ra13       // r13 = 256
        256

xlate_loop:
        skz ra13        // if(remaining == 0) goto xlate_done
        jmp xlate_notdone
        jmp xlate_done
xlate_notdone:  
        subi ra13 1     // remaining--

        // load insn word
        ld ra10 rb12

        // if opcode == 0 (ie, this might be the preamble data of a method),
        // just skip it
        ldim ra14
        0x0ff0
        and ra14 rb10
        skz ra14
        jmp xlate_notzero
        addi ra12 1
        jmp xlate_loop
xlate_notzero:  
        
        // replace with jmp to translated code (jmp r11)
        ldim ra14
        0x8000
        or ra14 rb11
        st ra14 rb12

        // save the insn (translator might need it)
        ldim ra14
        xlate_insn
        st ra10 rb14

        // vector into translation jump table
        srl ra10
        srl ra10
        srl ra10
        srl ra10
        ldim ra14
        0xff
        and ra10 rb14
        ldim ra14
        xlate_table
        add ra10 rb14
        jr ra10
        
xlate_table:    // translation table: 256 words long
        // 0x00        
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
         // 0x10
        jmp xlate_bipush        // bipush
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_iload         // iload
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_aload         // aload
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        // 0x20
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        // 0x30
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_istore        // istore
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        // 0x40
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        // 0x50
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_pop           // pop
        jmp xlate_loop
        jmp xlate_dup           // dup
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_swap          // swap
        // 0x60
        jmp xlate_iadd          // iadd
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_isub          // isub
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_imul          // imul
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_idiv          // idiv
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        // 0x70
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_ineg          // ineg
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_iand          // iand
        jmp xlate_loop
        // 0x80
        jmp xlate_ior           // ior
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_iinc          // iinc
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        // 0x90
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_ifeq          // ifeq
        jmp xlate_ifne          // ifne
        jmp xlate_iflt          // iflt
        jmp xlate_ifge          // ifge
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        // 0xA0
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_goto          // goto
        jmp xlate_jsr           // jsr
        jmp xlate_ret           // ret
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_ireturn       // ireturn
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        // 0xB0
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_invoke        // invoke
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        // 0xC0
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_break         // break
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        // 0xD0
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        // 0xE0
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        // 0xF0
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_loop
        jmp xlate_out
        jmp xlate_rload         // rload
        jmp xlate_rstore        // rstore

        // coming into xlate_<insn> tails, we have:
        // r10 = scratch
        // r11 = translated code ptr (ptr to next free mem loc) -
        //       save to malloc_ptr when done
        // r12 = jam3d code ptr (ptr to next insn to translate)
        // r13 = remaining insns in block to translate
        // r14 = scratch

        
xlate_bipush:
	// ldim inc ra0 / X
	ldim ra10
	ldim inc ra0	// the data for ldim above
	st ra10 rb11
	addi ra11 1	// inc dest ptr
	addi ra12 1	// inc src ptr
	ld ra10 rb12	// copy the data word
	st ra10 rb11
	addi ra11 1
	addi ra12 1
	jmp xlate_loop

xlate_iload:
	// movb inc ra0 rb9 / subi rel ra15 (insn & 15) / high rel ra15
	// ld rel ra15 rb15
	ldim ra10
	movb inc ra0 rb9        // insn 1
	st ra10 rb11
	addi ra11 1
        ldim ra14
        xlate_insn
	ld ra14 rb14
	andi ra14 0xF           // get insn & 15
	ldim ra10
	subi rel ra15           // insn 2
	add ra10 rb14
	st ra10 rb11
	addi ra11 1
        ldim ra10
        high rel ra15           // insn 3
        st ra10 rb11
        addi ra11 1
	ldim ra10
	ld rel ra15 rb15        // insn 4
	st ra10 rb11
	addi ra11 1
	addi ra12 1
	jmp xlate_loop

xlate_aload:
	// movb inc ra0 rb9 / subi rel ra15 (insn & 15) / high rel ra15
	ldim ra10
	movb inc ra0 rb9 // insn 1
	st ra10 rb11
	addi ra11 1	// inc dest ptr
        ldim ra14
        xlate_insn
        ld ra14 rb14    // load insn
	andi ra14 0xF   // pull out the var index
	ldim ra10
	subi rel ra15   // insn 2
	add ra10 rb14	// insert the var index
	st ra10 rb11
	addi ra11 1	// inc dest ptr
        ldim ra10
        high rel ra15   // insn 3
        st ra10 rb11
        addi ra11 1
	addi ra12 1	// inc src ptr
	jmp xlate_loop
	
xlate_istore:
	// movb inc ra0 rb9 / subi dec ra15 (insn & 15) / high rel ra0
	// st dec ra15 ra0
	ldim ra10
	movb inc ra0 rb9        // insn 1
	st ra10 rb11
	addi ra11 1
        ldim ra14
        xlate_insn
        ld ra14 rb14
	andi ra14 0xF           // insn & 15
	ldim ra10
	subi dec ra15           // insn 2
	add ra10 rb14
	st ra10 rb11
	addi ra11 1
        ldim ra10
        high rel ra0            // insn 3
        st ra10 rb11
        addi ra11 1
	ldim ra10
	st dec ra15 ra0         // insn 4
	st ra10 rb11
	addi ra11 1
	addi ra12 1
	jmp xlate_loop
	
xlate_pop:
	// mova dec ra14 rb15
	ldim ra10
	mova dec ra14 rb15
	st ra10 rb11
	addi ra11 1
	addi ra12 1
	jmp xlate_loop
	
xlate_dup:
	// mov inc ra0 rb15
	ldim ra10
	mov inc ra0 rb15
	st ra10 rb11
	addi ra11 1
	addi ra12 1
	jmp xlate_loop

xlate_swap:
	// mova rel ra10 rb15 / mov rel ra15 rb14 / movb rel ra14 rb10
	ldim ra10
	mova rel ra10 rb15
	st ra10 rb11
	addi ra11 1
	ldim ra10
	mov rel ra15 rb14
	st ra10 rb11
	addi ra11 1
	ldim ra10
	movb rel ra14 rb10
	st ra10 rb11
	addi ra11 1
	addi ra12 1
	jmp xlate_loop
	
xlate_iadd:
	// add dec ra14 rb15
	ldim ra10
	add dec ra14 rb15
	st ra10 rb11
	addi ra11 1
	addi ra12 1
	jmp xlate_loop
	
xlate_isub:
	// sub dec ra14 rb15
	ldim ra10
	sub dec ra14 rb15
	st ra10 rb11
	addi ra11 1
	addi ra12 1
	jmp xlate_loop
	
xlate_imul:
	// ldpc ra15 / jmp imul_handler
	ldim ra10
	ldpc ra15
	st ra10 rb11
	addi ra11 1
	ldim ra10
	jmp imul_handler
	st ra10 rb11
	addi ra11 1
	addi ra12 1
	jmp xlate_loop
	
xlate_idiv:
	// ldpc ra15 / jmp idiv_handler
	ldim ra10
	ldpc ra15
	st ra10 rb11
	addi ra11 1
	ldim ra10
	jmp idiv_handler
	st ra10 rb11
	addi ra11 1
	addi ra12 1
	jmp xlate_loop
	
xlate_ineg:
        // not rel ra15 / addi rel ra15 1
        ldim ra10
        not rel ra15
        st ra10 rb11
        addi ra11 1
        ldim ra10
        addi rel ra15 1
        st ra10 rb11
        addi ra11 1
        addi ra12 1
	jmp xlate_loop
	
xlate_iand:
        // and dec ra14 rb15
        ldim ra10
        and dec ra14 rb15
        st ra10 rb11
        addi ra11 1
        addi ra12 1
	jmp xlate_loop
	
xlate_ior:
        // or dec ra14 rb15
        ldim ra10
        or dec ra14 rb15
        st ra10 rb11
        addi ra11 1
        addi ra12 1
	jmp xlate_loop
	
xlate_iinc:
        // movb inc ra0 rb9
        // subi rel ra15 (insn & 0xF)
        // high rel ra15
        // ld inc ra0 rb15
        // ldim rel ra0 <constant>
        // add  dec ra15 rb0
        // st dec ra0 rb15
        ldim ra10
        movb inc ra0 rb9        // insn 1
        st ra10 rb11
        addi ra11 1
        ldim ra10
        subi rel ra15           // insn 2
        ldim ra14
        xlate_insn
        ld ra14 rb14
        andi ra14 0xF           // insn & 0xF
        add ra10 rb14
        st ra10 rb11
        addi ra11 1
        ldim ra10
        high rel ra15           // insn 3
        st ra10 rb11
        addi ra11 1
        ldim ra10
        ld inc ra0 rb15         // insn 4
        st ra10 rb11
        addi ra11 1
        ldim ra10
        ldim rel ra0            // insn 5
        st ra10 rb11
        addi ra11 1
        addi ra12 1
        ld ra10 rb12
        st ra10 rb11            // insn 5 word 2 (const)
        addi ra11 1
        ldim ra10
        add dec ra15 rb0        // insn 6
        st ra10 rb11
        addi ra11 1
        ldim ra10
        st dec ra0 rb15         // insn 7
        st ra10 rb11
        addi ra11 1
        addi ra12 1
	jmp xlate_loop
	
xlate_ifeq:
        // skz dec ra15 / jmp L1 / jmp X / L1:...
        ldim ra10
        skz dec ra15    // insn 1
        st ra10 rb11
        addi ra11 1
        ldim ra10
        jmp             // insn 2
        add ra10 rb11
        addi ra10 2
        st ra10 rb11
        addi ra11 1
        addi ra12 1
        ld ra14 rb12    // get dest addr for jmp
        ldim ra10
        jmp 0x7000      // insn 3: jmp 0x7000 (jam3d space)
        or ra10 rb14    // OR in the addr
        st ra10 rb11
        addi ra11 1
        addi ra12 1
	jmp xlate_loop
	
xlate_ifne:
        // skz dec ra15 / jmp X
        ldim ra10
        skz dec ra15    // insn 1
        st ra10 rb11
        addi ra11 1
        addi ra12 1
        ld ra14 rb12    // get dest addr for jmp
        ldim ra10
        jmp 0x7000      // insn 2: jmp 0x7000 (jam3d space)
        or ra10 rb14    // OR in the addr
        st ra10 rb11
        addi ra11 1
        addi ra12 1
	jmp xlate_loop
	
xlate_iflt:
        // not rel ra15 / skn dec ra15 / jmp X
        ldim ra10
        not rel ra15    // insn 1
        st ra10 rb11
        addi ra11 1
        ldim ra10
        skn dec ra15    // insn 2
        st ra10 rb11
        addi ra11 1
        addi ra12 1
        ld ra14 rb12    // get dest
        ldim ra10
        jmp 0x7000      // insn 3
        or ra10 rb14    // OR in the addr
        st ra10 rb11
        addi ra11 1
        addi ra12 1
	jmp xlate_loop
	
xlate_ifge:
        // skn dec ra15 / jmp X
        ldim ra10
        skn dec ra15    // insn 1
        st ra10 rb11
        addi ra11 1
        addi ra12 1
        ld ra14 rb12    // get dest
        ldim ra10
        jmp 0x7000      // insn 2
        or ra10 rb14    // OR in the addr
        st ra10 rb11
        addi ra11 1
        addi ra12 1
	jmp xlate_loop
	
xlate_goto:
        // jmp X
        ldim ra10
        jmp 0x7000      // insn 1
        addi ra12 1
        ld ra14 rb12    // get dest
        or ra10 rb14    // OR it in
        st ra10 rb11
        addi ra11 1
        addi ra12 1
	jmp xlate_loop
	
xlate_jsr:
        // ldpc ra10 / addi ra10 4 / movb inc ra0 rb10 / jmp X
        ldim ra10
        ldpc  ra10      // insn 1
        st ra10 rb11
        addi ra11 1
        ldim ra10
        addi ra10 4     // insn 2
        st ra10 rb11
        addi ra11 1
        ldim ra10
        movb inc ra0 rb10 // insn 3
        st ra10 rb11
        addi ra11 1
        addi ra12 1
        ld ra14 rb12    // get dest
        ldim ra10
        jmp 0x7000      // insn 4
        or ra10 rb14    // OR in dest
        st ra10 rb11
        addi ra11 1
        addi ra12 1
	jmp xlate_loop
	
xlate_ret:
        // jr dec ra15
        ldim ra10
        jr dec ra15     // insn 1
        st ra10 rb11
        addi ra11 1
        addi ra12 1
	jmp xlate_loop
	
xlate_ireturn:
        // ldpc ra15 / jmp ireturn_handler
        ldim ra10
        ldpc ra15       // insn 1
        st ra10 rb11
        addi ra11 1
        ldim ra10
        jmp ireturn_handler // insn 2
        st ra10 rb11
        addi ra11 1
        addi ra12 1
	jmp xlate_loop
	
xlate_invoke:
        // ldim ra15 / PC / jmp invoke_handler
        ldim ra10
        ldim ra15       // insn 1
        st ra10 rb11
        addi ra11 1
        st ra12 rb11    // insn 1 constant
        addi ra11 1
        ldim ra10
        jmp invoke_handler // insn 2
        st ra10 rb11
        addi ra11 1
        addi ra12 2     // invoke is 2-word insn
	jmp xlate_loop
	
xlate_break:
        // jmp break_handler
        ldim ra10
        jmp break_handler // insn 1
        st ra10 rb11
        addi ra11 1
        addi ra12 1
	jmp xlate_loop
	
xlate_rload:
        // high rel ra15 / ld rel ra15 rb15
        ldim ra10
        high rel ra15           // insn 1
        st ra10 rb11
        addi ra11 1
        ldim ra10
        ld rel ra15 rb15        // insn 2
        st ra10 rb11
        addi ra11 1
        addi ra12 1
	jmp xlate_loop
	
xlate_rstore:
        // high rel ra14 / st dec ra15 rb14 / addi dec ra15 0
        ldim ra10
        high rel ra14           // insn 1
        st ra10 rb11
        addi ra11 1
        ldim ra10
        st dec ra15 rb14        // insn 2
        st ra10 rb11
        addi ra11 1
        ldim ra10
        addi dec ra15 0         // insn 3
        st ra10 rb11
        addi ra11 1
        addi ra12 1
      	jmp xlate_loop

xlate_out: // debug insn - jam3d op 0xfd0, writes TOS to port6000
        // ldpc ra15 / jmp out_handler
        ldim ra10
        ldpc ra15               // insn 1
        st ra10 rb11
        addi ra11 1
        ldim ra10
        jmp out_handler         // insn 2
        st ra10 rb11
        addi ra11 1
        addi ra12 1
	jmp xlate_loop
        
xlate_done:
        // generate a jmp back to next jam3d insn
	ldim ra10
	0x8000
	or ra10 rb12	// r10 = 0x8000 | addr
	st ra10 rb11	// append the insn
	addi ra11 1
        // save translated code ptr
        ldim ra14
        malloc_ptr
        st ra11 rb14
        // if we're done, return through the replaced vector
        jr ra15

// stack management:
// everything at optop and above is in mem
// r0 to r8 hold the top 9 words on the operand stack
// invoke flushes reg stack to mem, for keeping our sanity
// optop points to the word above r0, so optop[-1] <-> r0
// stack page flips happen in 4-word units

        // NOTE TODO: high the addr after every sub; only worry about
	// below regs, not above them (one-way shift)
        
uflow_handler:
        //     I += 4
        //     optop += 4
        //     r8, r7, r6, r5, r4 = r4, r3, r2, r1, r0
        //     r0, r1, r2, r3 = optop[-1, -2, -3, -4]

        // r10, r11 are clobbered
        mov ra8 rb4     // shift regs
        mov ra7 rb3
        mov ra6 rb2
        mov ra5 rb1
        mov ra4 rb0
        ldim ra10       // r10 = &optop
        optop
        ld ra11 rb10    // r11 = optop
        ld ra3 rb11     // ld r3
        addi ra11 1
        high ra11
        ld ra2 rb11     // ld r2
        addi ra11 1
        high ra11
        ld ra1 rb11     // ld r1
        addi ra11 1
        high ra11
        ld ra0 rb11     // ld r0
        addi ra11 1
        high ra11
        st ra11 rb10    // save optop
        il ra11         // I += 4
        addi ra11 4
        il ra11
        jr ra15
        
oflow_handler:
        // optop[-1] is r0, optop[-2] is r1, ...
        // so: optop[-1, -2, -3, -4] = r0, r1, r2, r3
        //     r0, r1, r2, r3, r4 = r4, r5, r6, r7, r8
        //     optop -= 4
       //     I -= 4
        
        // r10, r11 are clobbered
        ldim ra10       // r10 = &optop
        optop
        ld ra11 rb10    // r11 = optop
        subi ra11 1
        high ra11
        st ra0 rb11     // st r0
        subi ra11 1
        high ra11
        st ra1 rb11     // st r1
        subi ra11 1
        high ra11
        st ra2 rb11     // st r2
        subi ra11 1
        high ra11
        st ra3 rb11     // st r3
        mov ra0 rb4     // shift regs...
        mov ra1 rb5
        mov ra2 rb6
        mov ra3 rb7
        mov ra4 rb8
        st ra11 rb10    // store optop
        il ra11
        subi ra11 4     // I -= 4
        il ra11
        jr ra15

// these are called from generated code
// upon entry: r15 = return - 2 (-3 for invoke), r9 = vars
// (but r11 - r15 clobbered if we have a stack trap, so watch out!)
imul_handler:
	addi dec ra15 0 // just dec I for now
        addi ra15 2
	jr ra15

idiv_handler:
	addi dec ra15 0 // just dec I for now
        addi ra15 2
	jr ra15

invoke_handler:
        // flush stack regs to stack; I = 0
        
        // first, flush stack regs to stack
        ldim ra10       // r10 = &optop
        optop
        ld ra11 rb10    // r11 = optop
        il ra13
        mov ra12 rb13
        il ra13
        andi ra12 15    // r12 = I
        sub ra11 rb12   // r11 = optop - I
        high ra11
        mov ra13 rb11   // r13 = optop - I (this one is saved)
        // compute offset
        ldim ra10
        9
        sub ra10 rb12   // r10 = 9 - I
        mov ra14 rb10
        sll ra14 1
        add ra14 rb10   // r14 = 3*r10 = 3*(9-I)
        ldim ra10
        invoke_handler_regsave
        add ra10 rb14
        jr ra10         // jump to the appropriate offset in the sequence below
        
invoke_handler_regsave:
        st ra8 rb11
        addi ra11 1
        high ra11
        st ra7 rb11
        addi ra11 1
        high ra11
        st ra6 rb11
        addi ra11 1
        high ra11
        st ra5 rb11
        addi ra11 1
        high ra11
        st ra4 rb11
        addi ra11 1
        high ra11
        st ra3 rb11
        addi ra11 1
        high ra11
        st ra2 rb11
        addi ra11 1
        high ra11
        st ra1 rb11
        addi ra11 1
        high ra11
        st ra0 rb11
        addi ra11 1
        high ra11

        // now save optop
        ldim ra10
        optop
        st ra13 rb10

        // I = 0 (and L = 9)
        ldim ra10
        0x90
        il ra10

        // t = M[R15 + 1] ; high(t)
        // optop -= M[t]  ; high(optop)
        // M[optop] = R15 + 2
        // M[optop - 1] = vars
        // vars = optop + M[t+1] ; high(vars)
        // M[optop - 2] = frame
        // optop -= 2; high(optop)
        // frame = optop
        // jmp high(t + 2)
        //
        // at this point we have: R13 = optop, R9 = vars, R15 = ret, R10=&optop

        mov ra11 rb15
        addi ra11 1     // R11 = R15 + 1
        ld ra12 rb11    // R12 = M[R15 + 1] = t
        high ra12
        ld ra14 rb12    // R14 = M[t]
        sub ra13 rb14   // optop -= M[t]
        high ra13
        addi ra11 1     // R11 = R15 + 2
        st ra11 rb13    // M[optop] = R15 + 2
        subi ra13 1
        st ra9 rb13     // M[optop - 1] = vars
        addi ra13 1
        addi ra12 1     // R12 = t + 1
        ld ra9 rb12     // vars = M[t + 1]
        add ra9 rb13    // vars = optop + M[t + 1]
        high ra9
        subi ra13 2     // optop -= 2
        high ra13
        ldim ra14
        frame
        ld ra15 rb14    // r15 = frame
        st ra15 rb13    // M[optop] = frame
        st ra13 rb14    // frame = optop
        ldim ra14
        optop
        st ra13 rb14    // save optop
        addi ra12 1      // ra12 = t + 2
        high ra12
        jr ra12         // jr high(t + 2)

ireturn_handler:
        // optop = vars; I = 0
        // retpc = M[frame + 2] 
        // M[vars] = <TOS>
        // vars = M[frame + 1]
        // frame = M[frame]
        // jr retpc

        mova rel ra14 rb15      // R14 = <TOS> = retval
                                // do that first because possible stack fault
                                // would clobber r10/r11, which are used below
        ldim ra10
        optop
        st ra9 rb10             // optop = vars
        ldim ra10
        0x90
        il ra10                 // I = 0 (and L = 9)
        ldim ra10
        frame
        ld ra10 rb10            // R10 = frame
        ld ra11 rb10            // R11 = M[frame]
        addi ra10 2
        high ra10
        ld ra15 rb10             // R15 = M[frame + 2]
        st ra14 rb9             // M[vars] = retval
        subi ra10 1
        high ra10
        ld ra9 rb10             // vars = M[frame + 1]
        ldim ra10
        frame
        st ra11 rb10            // frame = M[frame]
        jr ra15                 // jr retpc

break_handler:
        ldim ra10
        0xffff
        ldim ra11
        0x6000
        st ra10 rb11            // write 0xffff to port6000
break_done:
        0x7000
        jmp break_done

out_handler:
        // save ret addr (may be clobbered by possible stack fault)
        mov ra14 rb15
        mova dec ra10 rb15 // pop TOS into ra10
        mov ra15 rb14      // restore ret addr
        ldim ra11
        0x6000
        st ra10 rb11       // write to port6000
        addi ra15 2
        jr ra15
