10 files changed, 231 insertions, 54 deletions
diff --git a/instructions.md b/instructions.md
index 87d4dd5..fbd5559 100644
--- a/instructions.md
+++ b/instructions.md
@@ -33,20 +33,20 @@ CPU Arithmetic Instruction
 
 CPU Branch and Jump Instructions
 --------------------------------
-* [ ] B
+* [x] B
 * [ ] BAL
-* [ ] BEQ
-* [ ] BGEZ
+* [x] BEQ
+* [x] BGEZ
 * [ ] BGEZAL
-* [ ] BGTZ
-* [ ] BLEZ
-* [ ] BLTZ
+* [x] BGTZ
+* [x] BLEZ
+* [x] BLTZ
 * [ ] BLTZAL
-* [ ] BNE
-* [ ] J
+* [x] BNE
+* [x] J
 * [ ] JAL
 * [ ] JALR
-* [ ] JR
+* [x] JR
 
 CPU No Instructions
 -------------------
diff --git a/qtmips_machine/alu.cpp b/qtmips_machine/alu.cpp
index 0350412..73c49e4 100644
--- a/qtmips_machine/alu.cpp
+++ b/qtmips_machine/alu.cpp
@@ -17,6 +17,10 @@ std::uint32_t alu_operate(enum AluOp operation, std::uint32_t s, std::uint32_t t
         case ALU_OP_SRAV:
             // TODO is this correct implementation? (Should we be masking top most bit?)
             return ((t & 0x7fffffff) >> s) | (t & 0x80000000);
+        case ALU_OP_JR:
+        case ALU_OP_JALR:
+            // Do nothing as we solve this when we are handling program counter in instruction decode (handle_pc)
+            return 0;
         case ALU_OP_MOVZ:
             // We do this just to implement valid alu operation but we have to evaluate comparison way before this to disable register write
             return t == 0 ? s : 0;
diff --git a/qtmips_machine/alu.h b/qtmips_machine/alu.h
index 4818bb1..95042f4 100644
--- a/qtmips_machine/alu.h
+++ b/qtmips_machine/alu.h
@@ -14,6 +14,8 @@ enum AluOp : std::uint8_t {
     ALU_OP_SLLV,
     ALU_OP_SRLV = 6,
     ALU_OP_SRAV,
+    ALU_OP_JR,
+    ALU_OP_JALR,
     ALU_OP_MOVZ = 10,
     ALU_OP_MOVN,
     ALU_OP_MFHI = 16,
diff --git a/qtmips_machine/core.cpp b/qtmips_machine/core.cpp
index 20c1b74..93b83dd 100644
--- a/qtmips_machine/core.cpp
+++ b/qtmips_machine/core.cpp
@@ -7,7 +7,6 @@
 #define DM_ALUSRC (1L<<3)
 #define DM_REGD (1L<<4)
 #define DM_REGWRITE (1L<<5)
-#define DM_BRANCH (1L<<6)
 
  struct DecodeMap {
     long flags;
@@ -16,20 +15,21 @@
 
 
 // This is temporally operation place holder
-#define NOPE { .flags = 0, .alu = ALU_OP_SLL }
+#define NOALU .alu = ALU_OP_SLL
+#define NOPE { .flags = 0, NOALU }
 
 #define FLAGS_ALU_I (DM_SUPPORTED | DM_ALUSRC | DM_REGWRITE)
 
 // This is map from opcode to signals.
 static const struct DecodeMap dmap[]  = {
-    { .flags = DM_SUPPORTED | DM_REGD | DM_REGWRITE, .alu = ALU_OP_SLL }, // Alu operations
-    NOPE, // Branch on alu operations
-    NOPE, // J
+    { .flags = DM_SUPPORTED | DM_REGD | DM_REGWRITE, NOALU }, // Alu operations (aluop is decoded from function explicitly)
+    { .flags = DM_SUPPORTED, NOALU }, // REGIMM (BLTZ, BGEZ, )
+    { .flags = DM_SUPPORTED, NOALU }, // J
     NOPE, // JAL
-    NOPE, // BEQ
-    NOPE, // BNE
-    NOPE, // BLEZ
-    NOPE, // BGTZ
+    { .flags = DM_SUPPORTED, NOALU }, // BEQ
+    { .flags = DM_SUPPORTED, NOALU }, // BNE
+    { .flags = DM_SUPPORTED, NOALU }, // BLEZ
+    { .flags = DM_SUPPORTED, NOALU }, // BGTZ
     { .flags = FLAGS_ALU_I, .alu = ALU_OP_ADD }, // ADDI
     { .flags = FLAGS_ALU_I, .alu = ALU_OP_ADDU }, // ADDIU
     { .flags = FLAGS_ALU_I, .alu = ALU_OP_SLT }, // SLTI
@@ -96,7 +96,6 @@ Core::Core(Registers *regs, MemoryAccess *mem) {
 struct Core::dtFetch Core::fetch() {
     // TODO signals
     Instruction inst(mem->read_word(regs->read_pc()));
-    regs->pc_inc();
     return {
         .inst = inst
     };
@@ -104,10 +103,14 @@ struct Core::dtFetch Core::fetch() {
 
 struct Core::dtDecode Core::decode(struct dtFetch dt) {
     struct DecodeMap dec = dmap[dt.inst.opcode()];
-    if (!dec.flags & DM_SUPPORTED)
+    if (!(dec.flags & DM_SUPPORTED))
         // TODO message
         throw QTMIPS_EXCEPTION(UnsupportedInstruction, "", "");
 
+    // TODO solve forwarding somehow in here
+    std::uint32_t rs = regs->read_gp(dt.inst.rs());
+    std::uint32_t rt = regs->read_gp(dt.inst.rt());
+
     return {
         .inst = dt.inst,
         .mem2reg = dec.flags & DM_MEM2REG,
@@ -115,10 +118,9 @@ struct Core::dtDecode Core::decode(struct dtFetch dt) {
         .alusrc = dec.flags & DM_ALUSRC,
         .regd = dec.flags & DM_REGD,
         .regwrite = dec.flags & DM_REGWRITE,
-        .branch = dec.flags & DM_BRANCH,
         .aluop = dt.inst.opcode() == 0 ? (enum AluOp)dt.inst.funct() : dec.alu,
-        .val_rs = regs->read_gp(dt.inst.rs()),
-        .val_rt = regs->read_gp(dt.inst.rt()),
+        .val_rs = rs,
+        .val_rt = rt,
     };
     // TODO on jump there should be delay slot. Does processor addes it or compiler. And do we care?
 }
@@ -154,9 +156,88 @@ void Core::writeback(struct dtMemory dt) {
     }
 }
 
+void Core::handle_pc(struct dtDecode dt) {
+    // TODO signals
+    bool branch = false;
+    bool link = false;
+    // TODO implement link
+
+    switch (dt.inst.opcode()) {
+    case 0: // JR (JALR)
+        if (dt.inst.funct() == ALU_OP_JR || dt.inst.funct() == ALU_OP_JALR) {
+            regs->pc_abs_jmp(dt.val_rs);
+            return;
+        }
+        break;
+    case 1: // REGIMM instruction
+        //switch (dt.inst.rt() & 0xF) { // Should be used when linking is supported
+        switch (dt.inst.rt()) {
+        case 0: // BLTZ(AL)
+            branch = (std::int32_t)dt.val_rs < 0;
+            break;
+        case 1: // BGEZ(AL)
+            branch = (std::int32_t)dt.val_rs >= 0;
+            break;
+        default:
+            throw QTMIPS_EXCEPTION(UnsupportedInstruction, "REGIMM instruction with unknown rt code", QString::number(dt.inst.rt(), 16));
+        }
+        link = dt.inst.rs() & 0x10;
+        break;
+    case 2: // J
+    case 3: // JAL
+        regs->pc_abs_jmp_28(dt.inst.address() << 2);
+        return;
+    case 4: // BEQ
+        branch = dt.val_rs == dt.val_rt;
+        break;
+    case 5: // BNE
+        branch = dt.val_rs != dt.val_rt;
+        break;
+    case 6: // BLEZ
+        branch = (std::int32_t)dt.val_rs <= 0;
+        break;
+    case 7: // BGTZ
+        branch = (std::int32_t)dt.val_rs > 0;
+        break;
+    }
+
+    if (branch)
+        regs->pc_jmp((std::int32_t)(((dt.inst.immediate() & 0x7fff) << 2) | ((dt.inst.immediate() & 0x8000) << 16)));
+    else
+        regs->pc_inc();
+}
+
+void Core::dtFetchInit(struct dtFetch &dt) {
+    dt.inst = Instruction(0x00);
+}
+
+void Core::dtDecodeInit(struct dtDecode &dt) {
+    dt.inst = Instruction(0x00);
+    dt.mem2reg = false;
+    dt.memwrite = false;
+    dt.alusrc = false;
+    dt.regd = false;
+    dt.regwrite = false;
+    dt.aluop = ALU_OP_SLL;
+    dt.val_rs = 0;
+    dt.val_rt = 0;
+}
+
+void Core::dtExecuteInit(struct dtExecute &dt) {
+    dt.regwrite = false;
+    dt.rwrite = false;
+    dt.alu_val = 0;
+}
+
+void Core::dtMemoryInit(struct dtMemory &dt) {
+    dt.regwrite = false;
+    dt.rwrite = false;
+    dt.alu_val = 0;
+}
+
 CoreSingle::CoreSingle(Registers *regs, MemoryAccess *mem) : \
     Core(regs, mem) {
-    // Nothing to do
+    dtDecodeInit(jmp_delay_decode);
 }
 
 void CoreSingle::step() {
@@ -165,32 +246,16 @@ void CoreSingle::step() {
     struct dtExecute e = execute(d);
     struct dtMemory m = memory(e);
     writeback(m);
+    handle_pc(jmp_delay_decode);
+    jmp_delay_decode = d; // Copy current decode
 }
 
 CorePipelined::CorePipelined(Registers *regs, MemoryAccess *mem) : \
     Core(regs, mem) {
-    // Initialize to NOPE //
-    // dtFetch
-    dt_f.inst = Instruction(0x00);
-    // dtDecode
-    dt_d.inst = dt_f.inst;
-    dt_d.mem2reg = false;
-    dt_d.memwrite = false;
-    dt_d.alusrc = false;
-    dt_d.regd = false;
-    dt_d.regwrite = false;
-    dt_d.branch = false;
-    dt_d.aluop = ALU_OP_SLL;
-    dt_d.val_rs = 0;
-    dt_d.val_rt = 0;
-    // dtExecute
-    dt_e.regwrite = dt_d.regwrite;
-    dt_e.rwrite = dt_d.regwrite;
-    dt_e.alu_val = 0;
-    // dtMemory
-    dt_m.regwrite = dt_e.regwrite;
-    dt_m.rwrite = dt_e.rwrite;
-    dt_m.alu_val = dt_e.alu_val;
+    dtFetchInit(dt_f);
+    dtDecodeInit(dt_d);
+    dtExecuteInit(dt_e);
+    dtMemoryInit(dt_m);
 }
 
 void CorePipelined::step() {
@@ -200,4 +265,5 @@ void CorePipelined::step() {
     dt_e = execute(dt_d);
     dt_d = decode(dt_f);
     dt_f = fetch();
+    handle_pc(dt_d);
 }
diff --git a/qtmips_machine/core.h b/qtmips_machine/core.h
index b566a38..6366088 100644
--- a/qtmips_machine/core.h
+++ b/qtmips_machine/core.h
@@ -31,7 +31,6 @@ protected:
         bool alusrc; // If second value to alu is immediate value (rt used otherwise)
         bool regd; // If rd is used (otherwise rt is used for write target)
         bool regwrite; // If output should be written back to register (which one depends on regd)
-        bool branch; // If this is branch instruction
         enum AluOp aluop; // Decoded ALU operation
         std::uint32_t val_rs; // Value from register rs
         std::uint32_t val_rt; // Value from register rt
@@ -54,7 +53,13 @@ protected:
     struct dtExecute execute(struct dtDecode);
     struct dtMemory memory(struct dtExecute);
     void writeback(struct dtMemory);
+    void handle_pc(struct dtDecode);
 
+    // Initialize structures to NOPE instruction
+    void dtFetchInit(struct dtFetch &dt);
+    void dtDecodeInit(struct dtDecode &dt);
+    void dtExecuteInit(struct dtExecute &dt);
+    void dtMemoryInit(struct dtMemory &dt);
 };
 
 class CoreSingle : public Core {
@@ -62,6 +67,9 @@ public:
     CoreSingle(Registers *regs, MemoryAccess *mem);
 
     void step();
+
+private:
+    struct Core::dtDecode jmp_delay_decode;
 };
 
 class CorePipelined : public Core {
diff --git a/qtmips_machine/memory.cpp b/qtmips_machine/memory.cpp
index ef4c7e2..282a9e9 100644
--- a/qtmips_machine/memory.cpp
+++ b/qtmips_machine/memory.cpp
@@ -240,8 +240,9 @@ union MemoryTree *Memory::copy_section_tree(const union MemoryTree *mt, size_t d
     union MemoryTree *nmt = allocate_section_tree();
     if (depth < (MEMORY_TREE_H - 1))  { // Following level is memory tree
         for (int i = 0; i < MEMORY_TREE_LEN; i++) {
-            if (mt[i].mt != nullptr)
+            if (mt[i].mt != nullptr) {
                 nmt[i].mt = copy_section_tree(mt[i].mt, depth + 1);
+            }
         }
     } else { // Following level is memory section
         for (int i = 0; i < MEMORY_TREE_LEN; i++) {
diff --git a/qtmips_machine/registers.cpp b/qtmips_machine/registers.cpp
index 5bb852e..a18421e 100644
--- a/qtmips_machine/registers.cpp
+++ b/qtmips_machine/registers.cpp
@@ -46,6 +46,10 @@ void Registers::pc_abs_jmp(std::uint32_t address) {
     this->pc = address;
 }
 
+void Registers::pc_abs_jmp_28(std::uint32_t address) {
+    this->pc_abs_jmp((pc & 0xF0000000) | (address & 0x0FFFFFFF));
+}
+
 std::uint32_t Registers::read_gp(std::uint8_t i) const {
     SANITY_ASSERT(i < 32, QString("Trying to read from register ") + QString(i));
     if (!i) // $0 always reads as 0
diff --git a/qtmips_machine/registers.h b/qtmips_machine/registers.h
index 387b59d..dd7e393 100644
--- a/qtmips_machine/registers.h
+++ b/qtmips_machine/registers.h
@@ -14,6 +14,7 @@ public:
     std::uint32_t pc_inc(); // Increment program counter by four bytes
     std::uint32_t pc_jmp(std::int32_t offset); // Relative jump from current location in program counter
     void pc_abs_jmp(std::uint32_t address); // Absolute jump in program counter (write to pc)
+    void pc_abs_jmp_28(std::uint32_t address); // Absolute jump in current 256MB section (basically J implementation)
 
     std::uint32_t read_gp(std::uint8_t i) const; // Read general-purpose register
     void write_gp(std::uint8_t i, std::uint32_t value); // Write general-purpose register
diff --git a/qtmips_machine/tests/testcore.cpp b/qtmips_machine/tests/testcore.cpp
index 27a49b4..593483a 100644
--- a/qtmips_machine/tests/testcore.cpp
+++ b/qtmips_machine/tests/testcore.cpp
@@ -175,9 +175,6 @@ void MachineTests::pipecore_regs_data() {
     core_regs_data();
 }
 
-#include <iostream>
-using namespace std;
-
 void MachineTests::singlecore_regs() {
     QFETCH(Instruction, i);
     QFETCH(Registers, init);
@@ -208,16 +205,106 @@ void MachineTests::pipecore_regs() {
     res.pc_jmp(0x14);
 
     CorePipelined core(&init, &mem_used);
-    for (int i = 0; i < 4; i++) {
+    for (int i = 0; i < 5; i++)
         core.step(); // Fire steps for five pipelines stages
-    }
-    core.step();
 
     //cout << "well:" << init.read_gp(26) << ":" << regs_used.read_gp(26) << endl;
     QCOMPARE(init, res); // After doing changes from initial state this should be same state as in case of passed expected result
     QCOMPARE(mem, mem_used); // There should be no change in memory
 }
 
+static void core_jmp_data() {
+    QTest::addColumn<Instruction>("i");
+    QTest::addColumn<Registers>("regs");
+    QTest::addColumn<std::uint32_t>("pc");
+
+    Registers regs;
+    regs.write_gp(14, -22);
+    regs.write_gp(15, 22);
+    regs.write_gp(16, -22);
+    QTest::newRow("B") << Instruction(4, 0, 0, 61) \
+                         << regs \
+                         << regs.read_pc() + 4 + (61 << 2);
+    QTest::newRow("BEQ") << Instruction(4, 14, 16, 61) \
+                         << regs \
+                         << regs.read_pc() + 4 + (61 << 2);
+    QTest::newRow("BNE") << Instruction(5, 14, 15, 61) \
+                         << regs \
+                         << regs.read_pc() + 4 + (61 << 2);
+    QTest::newRow("BGEZ") << Instruction(1, 15, 1, 61) \
+                         << regs \
+                         << regs.read_pc() + 4 + (61 << 2);
+    QTest::newRow("BGTZ") << Instruction(7, 15, 0, 61) \
+                         << regs \
+                         << regs.read_pc() + 4 + (61 << 2);
+    QTest::newRow("BLEZ") << Instruction(6, 14, 0, 61) \
+                         << regs \
+                         << regs.read_pc() + 4 + (61 << 2);
+    QTest::newRow("BLTZ") << Instruction(1, 14, 0, 61) \
+                         << regs \
+                         << regs.read_pc() + 4 + (61 << 2);
+    QTest::newRow("J") << Instruction(2, 24) \
+                         << regs \
+                         << 0x80000000 + (24 << 2);
+    /*
+    QTest::newRow("JR") << Instruction(1, 15, 0, 61) \
+                         << regs \
+                         << regs.read_pc() + (24 << 2);
+                         */
+}
+
+void MachineTests::singlecore_jmp_data() {
+    core_jmp_data();
+}
+
+void MachineTests::pipecore_jmp_data() {
+    core_jmp_data();
+}
+
+void MachineTests::singlecore_jmp() {
+    QFETCH(Instruction, i);
+    QFETCH(Registers, regs);
+    QFETCH(std::uint32_t, pc);
+
+    Memory mem;
+    mem.write_word(regs.read_pc(), i.data());
+    Memory mem_used(mem);
+    Registers regs_used(regs);
+
+    CoreSingle core(&regs_used, &mem_used);
+    core.step();
+    QCOMPARE(regs.read_pc() + 4, regs_used.read_pc()); // First execute delay slot
+    core.step();
+    QCOMPARE(pc, regs_used.read_pc()); // Now do jump
+
+    QCOMPARE(mem, mem_used); // There should be no change in memory
+    regs_used.pc_abs_jmp(regs.read_pc()); // Reset program counter before we do registers compare
+    QCOMPARE(regs, regs_used); // There should be no change in registers now
+}
+
+void MachineTests::pipecore_jmp() {
+    QFETCH(Instruction, i);
+    QFETCH(Registers, regs);
+    QFETCH(std::uint32_t, pc);
+
+    Memory mem;
+    mem.write_word(regs.read_pc(), i.data());
+    Memory mem_used(mem);
+    Registers regs_used(regs);
+
+    CorePipelined core(&regs_used, &mem_used);
+    core.step();
+    QCOMPARE(regs.read_pc() + 4, regs_used.read_pc()); // First just fetch
+    core.step();
+    QCOMPARE(pc, regs_used.read_pc()); // Now do jump
+    for (int i = 0; i < 3; i++)
+        core.step(); // Follow up with three other steps to complete pipeline to be sure that instruction has no side effects
+
+    QCOMPARE(mem, mem_used); // There should be no change in memory
+    regs.pc_abs_jmp(pc + 12); // Set reference pc to three more instructions later (where regs_used should be)
+    QCOMPARE(regs, regs_used); // There should be no change in registers now (except pc)
+}
+
 void MachineTests::core_mem_data() {
 
 }
diff --git a/qtmips_machine/tests/tst_machine.h b/qtmips_machine/tests/tst_machine.h
index 3feaddd..9b33e68 100644
--- a/qtmips_machine/tests/tst_machine.h
+++ b/qtmips_machine/tests/tst_machine.h
@@ -34,6 +34,10 @@ private Q_SLOTS:
     void singlecore_regs_data();
     void pipecore_regs();
     void pipecore_regs_data();
+    void singlecore_jmp();
+    void singlecore_jmp_data();
+    void pipecore_jmp();
+    void pipecore_jmp_data();
     void core_mem();
     void core_mem_data();
 };