module pipe2(in_insn, in_A, in_B, in_C, in_K, in_Pred,
             out_insn, out_Q, out_C,
             BranchWrong, BranchWrongOff, Branch);

  input[15:0] in_insn, in_A, in_B, in_C, in_K;
  input in_Pred;
  output[15:0] out_insn, out_Q, out_C;
  output BranchWrong;
  output[15:0] BranchWrongOff;

  // stage 2: ALU/Execute

  // this is where the fun happens!

  assign out_insn = in_insn;
  assign out_C = in_C;
  
  // sign-extended immediate constants
  wire[15:0] K_sign8;
  wire[15:0] K_sign4;
  wire[15:0] K;
  assign K = in_K;
  assign K_sign8 = {K[7], K[7], K[7], K[7], K[7], K[7], K[7], K[7],
                    K[7:0]};
  assign K_sign4 = {K[3], K[3], K[3], K[3], K[3], K[3], K[3], K[3],
                    K[3], K[3], K[3], K[3],
                    K[3:0]};
  
  output reg Branch;
  assign BranchWrong = (Branch && !in_Pred) || (!Branch && in_Pred);
  assign BranchWrongOff = K_sign8;
  
  // ALU instance
  reg[15:0] ALU_A, ALU_B;
  reg[2:0] ALU_op;
  wire[15:0] ALU_Q;
  wire ALU_Z, ALU_N;

  alu alu_0(.A(ALU_A),
            .B(ALU_B),
            .D(ALU_Q),
            .op(ALU_op),
            .Z(ALU_Z),
            .LT(ALU_N)
            );

  assign out_Q = ALU_Q;

  wire[3:0] op;
  assign op = in_insn[15:12];

  // ALU input muxes and branch-offset mux are implied here
  always @(op or in_A or in_B or in_C or in_K or in_Pred or ALU_N or ALU_Z)
    begin
      Branch = 0;
      ALU_op = 0;
      
      case(op)
        0: // load-at-immediate
          begin
            // compute address: 8-bit immediate
            ALU_A = in_K;
            ALU_B = 0;
            ALU_op = 3'b000; // pass-A
          end
        1: // store-at-immediate
          begin
            // compute address: 8-bit immediate
            ALU_A = in_K;
            ALU_B = 0;
            ALU_op = 3'b000; // pass-A
          end
        2: // add
          begin
            ALU_A = in_A;
            ALU_B = in_B;
            ALU_op = 3'b001; // add
          end
        3: // load-constant
          begin
            ALU_A = in_K;
            ALU_B = 0;
            ALU_op = 3'b000; // pass-A
          end
        4: // subtract
          begin
            $display("SUB: A = %4h, B = %4h, rC = %h", in_A, in_B, in_insn[11:8]);
            ALU_A = in_A;
            ALU_B = in_B;
            ALU_op = 3'b010; // sub
          end
        5: // jmpz
          begin
            ALU_A = in_C;
            ALU_B = 0;
            ALU_op = 3'b000; // pass-A
            Branch = ALU_Z;
          end
        6: // jmpn
          begin
            ALU_A = in_C;
            ALU_B = 0;
            ALU_op = 3'b000; // pass-A
            Branch = ALU_N;
          end
        7: // jmp
          begin
            Branch = 1;
          end
        8: // mul
          begin
            ALU_A = in_A;
            ALU_B = in_B;
            ALU_op = 3'b100; // mul
          end
        9: // div
          begin
            ALU_A = in_A;
            ALU_B = in_B;
            ALU_op = 3'b101; // div
          end
        10: // mod
          begin
            ALU_A = in_A;
            ALU_B = in_B;
            ALU_op = 3'b110; // mod
          end
        13: // ldr
          begin
            // compute address
            ALU_A = in_A;
            ALU_B = K_sign4;
            ALU_op = 3'b001; // add
          end
        14: // str
          begin
            ALU_A = in_A;
            ALU_B = K_sign4;
            ALU_op = 3'b001; // add
          end
      endcase
    end

endmodule
