`timescale 1ns / 1ps

module cpu(clk, reset);
  input clk;
  input reset;

  // --- memory instances ---
  
  wire[15:0] IM_addr;
  wire[15:0] IM_data;
  inst_mem IM(.clka(clk),
              .addra(IM_addr),
              .douta(IM_data)
              );

  wire[15:0] DM_addr; 
  wire[15:0] DM_data;
  wire[15:0] DM_wdata;
  wire DM_write;
  data_mem DM(.clka(clk),
              .addra(DM_addr),
              .douta(DM_data),
              .dina(DM_wdata),
              .wea(DM_write)
              );

  // register file
  wire[3:0] RF_A_addr, RF_B_addr, RF_C_addr; // read ports
  wire[15:0] RF_A_data, RF_B_data, RF_C_data;
  wire[3:0] RF_W_addr; // write port
  wire[15:0] RF_W_data;
  wire RF_W_write;
  wire branch_taken;
  reg_file RF(.clk(clk),
              .A_addr(RF_A_addr),
              .B_addr(RF_B_addr),
              .A_data(RF_A_data),
              .B_data(RF_B_data),
              .C_addr(RF_C_addr),
              .C_data(RF_C_data),
              .W_addr(RF_W_addr),
              .W_data(RF_W_data),
              .W_write(RF_W_write)
              );

  // PC
  reg[15:0] pc;
  


  // --- PIPELINE ---

  // feedback to control logic coming from various stages
  wire BranchPred;
  wire[15:0] BranchPredOff;
  wire BranchWrong;
  wire[15:0] BranchWrongOff;
  
  // -- stage 0: fetch
  
  // in: pc
  // (reg defined aboved)
  // out: insn
  wire[15:0] pipe0_out_insn;
  
  pipe0 p0(.in_pc_addr(pc),
           .out_insn(pipe0_out_insn),
           // stage 0 owns IM
           .IM_addr(IM_addr),
           .IM_data(IM_data)
           );

  // -- stage 1: regread

  // in: insn
  wire[15:0] pipe1_in_insn;
  // out: insn, A, B, C, K, Pred
  wire[15:0] pipe1_out_insn, pipe1_out_A, pipe1_out_B,
             pipe1_out_C, pipe1_out_K;
  wire pipe1_out_Pred;


  pipe1 p1(.in_insn(pipe1_in_insn),
           .out_insn(pipe1_out_insn),
           .out_A(pipe1_out_A),
           .out_B(pipe1_out_B),
           .out_C(pipe1_out_C),
           .out_K(pipe1_out_K),
           .out_Pred(pipe1_out_Pred),
           // stage 1 owns read-ports of RF
           .RF_A_addr(RF_A_addr),
           .RF_B_addr(RF_B_addr),
           .RF_A_data(RF_A_data),
           .RF_B_data(RF_B_data),
           .RF_C_addr(RF_C_addr),
           .RF_C_data(RF_C_data),
           // to control logic
           .BranchPred(BranchPred),
           .BranchPredOff(BranchPredOff)
           );

  // -- stage 2: ALU

  // in: insn, A, B, C, K, Pred
  wire[15:0] pipe2_in_insn, pipe2_in_A, pipe2_in_B,
  pipe2_in_C, pipe2_in_K;
  wire pipe2_in_Pred;
  // out: insn, Q, C
  wire[15:0] pipe2_out_insn, pipe2_out_Q, pipe2_out_C;

  // data forwarding muxes
  // (reg so that always block below can assign)
  reg[1:0] fwd_Asrc, fwd_Bsrc, fwd_Csrc; // 00=normal, 01=p3_in,
                                         // 10=p4_in
						
  wire[3:0] fwd_p2_op, fwd_p3_op, fwd_p4_op;
  wire[3:0] fwd_p2_rA, fwd_p2_rB, fwd_p2_rC, fwd_p3_R, fwd_p4_R;
  wire fwd_needA, fwd_needB, fwd_needC;
  wire fwd_p3_have, fwd_p3_stall;
  wire fwd_p4_have;
  wire[15:0] fwd_p3_data, fwd_p4_data;
  
  wire[15:0] pipe2_in_A_mux, pipe2_in_B_mux, pipe2_in_C_mux;

  
  assign pipe2_in_A_mux = (fwd_Asrc == 2'b00) ?
                          pipe2_in_A :
                          (fwd_Asrc == 2'b01 ?
                           fwd_p3_data :
                           fwd_p4_data);
  
  assign pipe2_in_B_mux = (fwd_Bsrc == 2'b00) ?
                          pipe2_in_B :
                          (fwd_Bsrc == 2'b01 ?
                           fwd_p3_data :
                           fwd_p4_data);

  assign pipe2_in_C_mux = (fwd_Csrc == 2'b00) ?
                          pipe2_in_C :
                          (fwd_Csrc == 2'b01 ?
                           fwd_p3_data :
                           fwd_p4_data);

  
  pipe2 p2(.in_insn(pipe2_in_insn),
           .in_A(pipe2_in_A_mux),
           .in_B(pipe2_in_B_mux),
           .in_C(pipe2_in_C_mux),
           .in_K(pipe2_in_K),
           .in_Pred(pipe2_in_Pred),
           .out_insn(pipe2_out_insn),
           .out_Q(pipe2_out_Q),
           .out_C(pipe2_out_C),
           // to control logic
           .BranchWrong(BranchWrong),
           .BranchWrongOff(BranchWrongOff),
           .Branch(branch_taken)
           );

  // -- stage 3: mem

  // in: insn, Q, C
  wire[15:0] pipe3_in_insn, pipe3_in_Q, pipe3_in_C;
  // out: insn, Q
  wire[15:0] pipe3_out_insn, pipe3_out_Q;
  // out to data forwarding
  assign fwd_p3_data = pipe3_in_Q;

  pipe3 p3(.in_insn(pipe3_in_insn),
           .in_Q(pipe3_in_Q),
           .in_C(pipe3_in_C),
           .out_insn(pipe3_out_insn),
           .out_Q(pipe3_out_Q),
           // stage 3 owns DM
           .DM_addr(DM_addr),
           .DM_data(DM_data),
           .DM_wdata(DM_wdata),
           .DM_write(DM_write)
           );

  // -- stage 4: writeback

  // in: insn, Q
  wire[15:0] pipe4_in_insn, pipe4_in_Q;
  // out to data forwarding
  assign fwd_p4_data = pipe4_in_Q;

  pipe4 p4(.in_insn(pipe4_in_insn),
           .in_Q(pipe4_in_Q),
           // stage 4 owns RF's write port
           .RF_W_addr(RF_W_addr),
           .RF_W_data(RF_W_data),
           .RF_W_write(RF_W_write)
           );


  // --- PIPELINE REGISTERS ---
  parameter nop_insn = 16'hF000; // used for pipeline bubbles

  wire pipe0_stall, pipe0_bubble;
  wire pipe1_stall, pipe1_bubble;
  wire pipe2_stall, pipe2_bubble;
  wire pipe3_stall, pipe3_bubble;

  // stage 0 -> stage 1
  
  preg16 p0_1(.clk(clk),
              .reset(reset),
              .in(pipe0_out_insn),
              .out(pipe1_in_insn),
              .stall(pipe0_stall),
              .bubble(pipe0_bubble),
              .nop(nop_insn)
              );

  // stage 1 -> stage 2

  preg16 p1_1(.clk(clk),
              .reset(reset),
              .in(pipe1_out_insn),
              .out(pipe2_in_insn),
              .stall(pipe1_stall),
              .bubble(pipe1_bubble),
              .nop(nop_insn)
              );
  preg16 p1_2(.clk(clk),
              .reset(reset),
              .in(pipe1_out_A),
              .out(pipe2_in_A),
              .stall(pipe1_stall),
              .bubble(pipe1_bubble),
              .nop(0)
              );
  preg16 p1_3(.clk(clk),
              .reset(reset),
              .in(pipe1_out_B),
              .out(pipe2_in_B),
              .stall(pipe1_stall),
              .bubble(pipe1_bubble),
              .nop(0)
              );
  preg16 p1_4(.clk(clk),
              .reset(reset),
              .in(pipe1_out_C),
              .out(pipe2_in_C),
              .stall(pipe1_stall),
              .bubble(pipe1_bubble),
              .nop(0)
              );
  preg16 p1_5(.clk(clk),
              .reset(reset),
              .in(pipe1_out_K),
              .out(pipe2_in_K),
              .stall(pipe1_stall),
              .bubble(pipe1_bubble),
              .nop(0)
              );
  preg1 p1_6(.clk(clk),
             .reset(reset),
             .in(pipe1_out_Pred),
             .out(pipe2_in_Pred),
             .stall(pipe1_stall),
             .bubble(pipe1_bubble),
             .nop(0)
             );

  // stage 2 -> stage 3

  preg16 p2_1(.clk(clk),
              .reset(reset),
              .in(pipe2_out_insn),
              .out(pipe3_in_insn),
              .stall(pipe2_stall),
              .bubble(pipe2_bubble),
              .nop(nop_insn)
              );
  preg16 p2_2(.clk(clk),
              .reset(reset),
              .in(pipe2_out_Q),
              .out(pipe3_in_Q),
              .stall(pipe2_stall),
              .bubble(pipe2_bubble),
              .nop(0)
              );
  preg16 p2_3(.clk(clk),
              .reset(reset),
              .in(pipe2_out_C),
              .out(pipe3_in_C),
              .stall(pipe2_stall),
              .bubble(pipe2_bubble),
              .nop(0)
              );

  // stage 3 -> stage 4

  preg16 p3_1(.clk(clk),
              .reset(reset),
              .in(pipe3_out_insn),
              .out(pipe4_in_insn),
              .stall(pipe3_stall),
              .bubble(pipe3_bubble),
              .nop(nop_insn)
              );
  preg16 p3_2(.clk(clk),
              .reset(reset),
              .in(pipe3_out_Q),
              .out(pipe4_in_Q),
              .stall(pipe3_stall),
              .bubble(pipe3_bubble),
              .nop(0)
              );

assign BranchPred = 0; // stubbed out

  // --- CONTROL ---

  // PC management
  
  // we have: BranchPred, BranchPredOff, BranchWrong, BranchWrongOff

  // carry PC through stage 2 as well
  wire[15:0] pc_stage1, pc_stage2;
  preg16 pc_1(.clk(clk),
              .reset(reset),
              .in(pc),
              .out(pc_stage1),
              .stall(pipe0_stall),
              .bubble(pipe0_bubble),
              .nop(0)
              );
  preg16 pc_2(.clk(clk),
              .reset(reset),
              .in(pc_stage1),
              .out(pc_stage2),
              .stall(pipe1_stall),
              .bubble(pipe1_bubble),
              .nop(0)
              );
  
  always @(posedge clk)
    begin
      if(reset)
          pc = 0;
      else
        begin
          $display("---");
          $display("PC is %4h", pc);
          $display("p1: insn = %4h", pipe1_in_insn);
          $display("p2: insn = %4h", pipe2_in_insn);
          $display("p3: insn = %4h", pipe3_in_insn);
          $display("p4: insn = %4h", pipe4_in_insn);
          
          if(BranchWrong)
            // could be wrong two ways: branch predicted but not taken,
            // or branch not predicted but actually taken
            pc = pc_stage2 + (pipe2_in_Pred ? 1 : BranchWrongOff);
          else if(BranchPred)
            pc = pc_stage1 + BranchPredOff;
          else if(!pipe0_stall && !pipe0_bubble)
            pc = pc + 1;
          else
            pc = pc;
        end
    end

  // STALL/BUBBLE LOGIC

  reg HazardStall; // ALU stall -- bubble after p2, stall before it
  
  // on BranchPred or BranchWrong, bubble after p0
  assign pipe0_bubble = BranchPred || BranchWrong;
  assign pipe0_stall = HazardStall;

  // on BranchWrong, bubble after p1
  assign pipe1_bubble = BranchWrong;

  assign pipe1_stall = HazardStall;
  assign pipe2_bubble = HazardStall;
  assign pipe2_stall = 0;
  assign pipe3_bubble = 0;
  assign pipe3_stall = 0;

  // DATA FORWARDING

  // we derive the signals below from insns in p2, p3, p4
  // we output HazardStall and fwd_{A,B,C}src
  // src selects: 0'b00 is normal, 0'b01 is p3_in, 0'b10 is p4_in
  


  assign fwd_p2_op = pipe2_in_insn[15:12];
  assign fwd_p3_op = pipe3_in_insn[15:12];
  assign fwd_p4_op = pipe4_in_insn[15:12];

  assign fwd_p2_rA = pipe2_in_insn[7:4];
  assign fwd_p2_rB = pipe2_in_insn[3:0];
  assign fwd_p2_rC = pipe2_in_insn[11:8];

  assign fwd_p3_R = pipe3_in_insn[11:8];
  assign fwd_p4_R = pipe4_in_insn[11:8];

  // need* are instructions that need A,B,C in p2 (ALU stage)
  
  assign fwd_needA = fwd_p2_op == 2  || // ADD
                     fwd_p2_op == 4  || // SUB
                     fwd_p2_op == 8  || // MUL
                     fwd_p2_op == 9  || // DIV
                     fwd_p2_op == 10;   // MOD
  
  assign fwd_needB = fwd_p2_op == 2  || // ADD
                     fwd_p2_op == 4  || // SUB
                     fwd_p2_op == 8  || // MUL
                     fwd_p2_op == 9  || // DIV
                     fwd_p2_op == 10 || // MOD
                     fwd_p2_op == 13 || // LDR
                     fwd_p2_op == 14;   // STR

  assign fwd_needC = fwd_p2_op == 1  || // ST
                     fwd_p2_op == 5  || // JMPZ
                     fwd_p2_op == 6  || // JMPN
                     fwd_p2_op == 14;   // STR

  // p3_have are instructions that have Q ready *before* or *after* p3 (mem)
  // (p3_stall below differentiates the insns that neeed a stall -- ie, ld)
  assign fwd_p3_have = fwd_p3_op == 0  || // LD
                       fwd_p3_op == 2  || // ADD
                       fwd_p3_op == 3  || // LDC
                       fwd_p3_op == 4  || // SUB
                       fwd_p3_op == 8  || // MUL
                       fwd_p3_op == 9  || // DIV
                       fwd_p3_op == 10 || // MOD
                       fwd_p3_op == 13;   // LDR

  // p3_stall are instructions that don't have Q ready until *after* p3
  assign fwd_p3_stall = fwd_p3_op == 0  || // LD
                        fwd_p3_op == 13;   // LDR

  // p4_have are instructions that have Q ready *before* p4 (writeback)
  // these are *all* insns that write back to regfile
  assign fwd_p4_have = fwd_p4_op == 0  || // LD
                       fwd_p4_op == 2  || // ADD
                       fwd_p4_op == 3  || // LDC
                       fwd_p4_op == 4  || // SUB
                       fwd_p4_op == 8  || // MUL
                       fwd_p4_op == 9  || // DIV
                       fwd_p4_op == 10 || // MOD
                       fwd_p4_op == 13;   // LDR

  // now the fun part! assigning muxes and stalls based on input above.
  // This is the Magic Box(tm)
  always @(fwd_p3_have or fwd_p3_stall or fwd_p4_have or
           fwd_needA or fwd_needB or fwd_needC or
           fwd_p2_rA or fwd_p2_rB or fwd_p2_rC or
           fwd_p3_R or fwd_p4_R)
    begin
      HazardStall = 0;
      fwd_Asrc = 0'b00;
      fwd_Bsrc = 0'b00;
      fwd_Csrc = 0'b00;
      
      // precedence:
      // p3 first, then p4 (later result preferred over earlier)

      // also, NB:
      // load stall always overrides others
      // (load's second cycle is handled naturally when the load
      // reaches p4 with the loaded data; nothign special needs to be
      // done except stall the pipeline when a load with a needed
      // result is in p3.)

      // handle inputs one at a time.

      if(fwd_needA)
        begin
          if(fwd_p3_have && (fwd_p3_R == fwd_p2_rA))
            begin
              if(fwd_p3_stall)
                HazardStall = 1;
              else
                fwd_Asrc = 2'b01; // p3
            end
          else if(fwd_p4_have && (fwd_p4_R == fwd_p2_rA))
            fwd_Asrc = 2'b10; // p4
        end

      if(fwd_needB)
        begin
          if(fwd_p3_have && (fwd_p3_R == fwd_p2_rB) )
            begin
              if(fwd_p3_stall)
                HazardStall = 1;
              else
                fwd_Bsrc = 2'b01; // p3
            end
          else if(fwd_p4_have && (fwd_p4_R == fwd_p2_rB))
            fwd_Bsrc = 2'b10; // p4
        end

      if(fwd_needC)
        begin
          if(fwd_p3_have && (fwd_p3_R == fwd_p2_rC))
            begin
              if(fwd_p3_stall)
                HazardStall = 1;
              else
                fwd_Csrc = 2'b01; // p3
            end
          else if(fwd_p4_have && (fwd_p4_R == fwd_p2_rC))
            fwd_Csrc = 2'b10; // p4
        end
    end

  // --- INSTRUMENTATION ---
  reg[31:0] count_tick;
  reg[31:0] count_insn;

  initial count_tick = 0;
  initial count_insn = 0;

  always @(posedge clk)
    begin
      count_tick <= count_tick + 1;
      if(pipe4_in_insn[15:12] != 4'hF)
        count_insn <= count_insn + 1;

      $display("Ticks: %d ; Instructions: %d", count_tick, count_insn);
    end

endmodule
