From c72e3ad5824eb9385b62a51633cff817b825f06d Mon Sep 17 00:00:00 2001
From: Pavel Pisa <pisa@cmp.felk.cvut.cz>
Date: Tue, 26 Mar 2019 23:40:21 +0100
Subject: Change single cycle core with delay slot to use separate fetch stage.

When instructions are visualized then it is even more
misleading to keep old instruction in decode phase delay buffer.

The single cycle core with delay slot is upgraded
to the variant with fetch and execute phases.
This way the structure is logical and delay slot
has purpose.

Signed-off-by: Pavel Pisa <pisa@cmp.felk.cvut.cz>
---
 qtmips_machine/core.cpp           | 67 ++++++++++++++++++++++-----------------
 qtmips_machine/core.h             |  5 +--
 qtmips_machine/tests/testcore.cpp |  4 +++
 3 files changed, 45 insertions(+), 31 deletions(-)

(limited to 'qtmips_machine')

diff --git a/qtmips_machine/core.cpp b/qtmips_machine/core.cpp
index 64c309b..187ca2f 100644
--- a/qtmips_machine/core.cpp
+++ b/qtmips_machine/core.cpp
@@ -554,7 +554,7 @@ void Core::writeback(const struct dtMemory &dt) {
         regs->write_gp(dt.rwrite, dt.towrite_val);
 }
 
-bool Core::handle_pc(const struct dtDecode &dt, int32_t rel_adj) {
+bool Core::handle_pc(const struct dtDecode &dt) {
     bool branch = false;
     emit instruction_program_counter(dt.inst, dt.inst_addr, EXCAUSE_NONE);
 
@@ -593,7 +593,7 @@ bool Core::handle_pc(const struct dtDecode &dt, int32_t rel_adj) {
         std::int32_t rel_offset = dt.inst.immediate() << 2;
         if (rel_offset & (1 << 17))
             rel_offset -= 1 << 18;
-        regs->pc_jmp(rel_offset + rel_adj);
+        regs->pc_abs_jmp(dt.inst_addr + rel_offset + 4);
     } else {
         regs->pc_inc();
     }
@@ -672,57 +672,66 @@ CoreSingle::CoreSingle(Registers *regs, MemoryAccess *mem_program, MemoryAccess
                        bool jmp_delay_slot, unsigned int min_cache_row_size, Cop0State *cop0state) :
     Core(regs, mem_program, mem_data, min_cache_row_size, cop0state) {
     if (jmp_delay_slot)
-        jmp_delay_decode = new struct Core::dtDecode();
+        dt_f = new struct Core::dtFetch();
     else
-        jmp_delay_decode = nullptr;
+        dt_f = nullptr;
     reset();
 }
 
 CoreSingle::~CoreSingle() {
-    if (jmp_delay_decode != nullptr)
-        delete jmp_delay_decode;
+    if (dt_f != nullptr)
+        delete dt_f;
 }
 
 void CoreSingle::do_step(bool skip_break) {
-    bool in_delay_slot = false;
-    std::uint32_t jump_branch_pc;
+    bool branch_taken = false;
 
     struct dtFetch f = fetch(skip_break);
+    if (dt_f != nullptr) {
+        struct dtFetch f_swap = *dt_f;
+        *dt_f = f;
+        f = f_swap;
+    }
     struct dtDecode d = decode(f);
+    struct dtExecute e = execute(d);
+    struct dtMemory m = memory(e);
+    writeback(m);
 
     // Handle PC before instruction following jump leaves decode stage
-    if (jmp_delay_decode != nullptr) {
-        in_delay_slot = handle_pc(*jmp_delay_decode);
-        if (jmp_delay_decode->nb_skip_ds && !in_delay_slot) {
-            // Discard processing of instruction in delay slot
-            // for BEQL, BNEL, BLEZL, BGTZL, BLTZL, BGEZL, BLTZALL, BGEZALL
-            dtDecodeInit(d);
-        }
-        jump_branch_pc = jmp_delay_decode->inst_addr;
-        *jmp_delay_decode = d; // Copy current decode
+
+    if ((m.stop_if || (m.excause != EXCAUSE_NONE)) && dt_f != nullptr) {
+        dtFetchInit(*dt_f);
+        emit instruction_fetched(dt_f->inst, dt_f->inst_addr, dt_f->excause);
+        emit fetch_inst_addr_value(STAGEADDR_NONE);
     } else {
-        handle_pc(d, 4);
-        jump_branch_pc = d.inst_addr;
+        branch_taken = handle_pc(d);
+        if (dt_f != nullptr) {
+            dt_f->in_delay_slot = branch_taken;
+            if (d.nb_skip_ds && !branch_taken) {
+                // Discard processing of instruction in delay slot
+                // for BEQL, BNEL, BLEZL, BGTZL, BLTZL, BGEZL, BLTZALL, BGEZALL
+                dtFetchInit(*dt_f);
+            }
+        }
     }
 
-    struct dtExecute e = execute(d);
-    struct dtMemory m = memory(e);
-    writeback(m);
-
     if (m.excause != EXCAUSE_NONE) {
-        if (jmp_delay_decode != nullptr)
-            dtDecodeInit(*jmp_delay_decode);
+        if (dt_f != nullptr) {
+            regs->pc_abs_jmp(dt_f->inst_addr);
+        }
         handle_exception(this, regs, m.excause, m.inst_addr, regs->read_pc(),
-                         jump_branch_pc, in_delay_slot, m.mem_addr);
+                         prev_inst_addr, m.in_delay_slot, m.mem_addr);
         return;
     }
+    prev_inst_addr = m.inst_addr;
 }
 
 void CoreSingle::do_reset() {
-    if (jmp_delay_decode != nullptr) {
-        Core::dtDecodeInit(*jmp_delay_decode);
-        jmp_delay_decode->inst_addr = 0;
+    if (dt_f != nullptr) {
+        Core::dtFetchInit(*dt_f);
+        dt_f->inst_addr = 0;
     }
+    prev_inst_addr = 0;
 }
 
 CorePipelined::CorePipelined(Registers *regs, MemoryAccess *mem_program, MemoryAccess *mem_data,
diff --git a/qtmips_machine/core.h b/qtmips_machine/core.h
index 5658726..6b025db 100644
--- a/qtmips_machine/core.h
+++ b/qtmips_machine/core.h
@@ -259,7 +259,7 @@ protected:
     struct dtExecute execute(const struct dtDecode&);
     struct dtMemory memory(const struct dtExecute&);
     void writeback(const struct dtMemory&);
-    bool handle_pc(const struct dtDecode&, std::int32_t rel_adj = 0);
+    bool handle_pc(const struct dtDecode&);
 
     enum ExceptionCause memory_special(enum AccessControl memctl,
                            int mode, bool memread, bool memwrite,
@@ -298,7 +298,8 @@ protected:
     void do_reset();
 
 private:
-    struct Core::dtDecode *jmp_delay_decode;
+    struct Core::dtFetch *dt_f;
+    std::uint32_t prev_inst_addr;
 };
 
 class CorePipelined : public Core {
diff --git a/qtmips_machine/tests/testcore.cpp b/qtmips_machine/tests/testcore.cpp
index 68da14c..7ba185d 100644
--- a/qtmips_machine/tests/testcore.cpp
+++ b/qtmips_machine/tests/testcore.cpp
@@ -230,7 +230,9 @@ void MachineTests::singlecore_regs() {
 
     CoreSingle core(&init, &mem_used, &mem_used, true);
     core.step(); // Single step should be enought as this is risc without pipeline
+    core.step();
 
+    res.pc_inc();
     res.pc_inc(); // We did single step	so increment program counter accordingly
     QCOMPARE(init, res); // After doing changes from initial state this should be same state as in case of passed expected result
     QCOMPARE(mem, mem_used); // There should be no change in memory
@@ -444,7 +446,9 @@ void MachineTests::singlecore_mem() {
 
     CoreSingle core(&regs_init, &mem_init, &mem_init, true);
     core.step();
+    core.step();
 
+    regs_res.pc_inc();
     regs_res.pc_inc();
     QCOMPARE(regs_init, regs_res);
     QCOMPARE(mem_init, mem_res);
-- 
cgit v1.3