Skip to content

Multiple LSU pipeline support #216

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 36 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
8538bdc
adding store buffer
MayneMei Nov 1, 2024
22dda9f
modified return type of some store function, and reorder the constructor
MayneMei Nov 1, 2024
9037295
change forwarding at handleCacheLookupReq_, roll back handleCacheRead_
MayneMei Nov 1, 2024
8819e27
passed regression test, mode test case needed
MayneMei Nov 18, 2024
14a3a70
modified test case, also disable store sned cache lookup req when retire
MayneMei Nov 25, 2024
dc379a1
modified test to check forwarding cycle
MayneMei Nov 25, 2024
0eb3a6a
Update Lsu_test.cpp
MayneMei Nov 29, 2024
5404063
Update LSU.hpp, add parameter for data forwarding, set default value …
MayneMei Nov 29, 2024
5cf7454
Update Lsu_test.cpp
MayneMei Nov 29, 2024
622a0d2
data forwarding parameterize, modified testcase
MayneMei Nov 29, 2024
5bdeefa
typo fixing
MayneMei Nov 29, 2024
60d5fba
typo fixing
MayneMei Nov 29, 2024
0b94a35
test typo fixing
MayneMei Nov 29, 2024
662f00c
add helpfer function for test
MayneMei Nov 29, 2024
0d52a0e
syntax error
MayneMei Nov 29, 2024
a27fa71
modified test_case and little syntax in lsu
MayneMei Nov 29, 2024
d7df290
chagne erase to pop front(), don't have local machine right now so on…
MayneMei Dec 2, 2024
8084f56
change store buffer to deque
MayneMei Dec 2, 2024
d6c92b5
added debug info
MayneMei Dec 2, 2024
b4b60d3
different debug info
MayneMei Dec 2, 2024
575256d
store_buffer_initialization
MayneMei Dec 2, 2024
67bb0cc
initalization sequence change
MayneMei Dec 2, 2024
b3980ec
comment cout
MayneMei Dec 2, 2024
020b493
cout debugging
MayneMei Dec 2, 2024
8034e14
roll back ti using buffer instead of deque
MayneMei Dec 2, 2024
e949057
modified test, cycle matched expectation
MayneMei Dec 6, 2024
b01d7e2
added documentation
MayneMei Dec 13, 2024
7db7fcc
rebased again and resolved comments
MayneMei Mar 23, 2025
423ba58
Update core/lsu/LSU.hpp
MayneMei Apr 7, 2025
69a3e41
Update Lsu_test.cpp
MayneMei Apr 7, 2025
82ce2e7
modifying forward logic
MayneMei May 18, 2025
411ef1c
grammar issue fixed
MayneMei May 18, 2025
4994499
grammar issue fixed
MayneMei May 18, 2025
711f66a
grammar issue fixed
MayneMei May 18, 2025
287b102
fixed syntax error
MayneMei May 20, 2025
4d1270a
deleted unused variable
MayneMei May 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 91 additions & 7 deletions core/lsu/LSU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ namespace olympia
replay_buffer_("replay_buffer", p->replay_buffer_size, getClock()),
replay_buffer_size_(p->replay_buffer_size),
replay_issue_delay_(p->replay_issue_delay),
store_buffer_("store_buffer", p->ldst_inst_queue_size, getClock()), // Add this line
store_buffer_size_(p->ldst_inst_queue_size),
ready_queue_(),
load_store_info_allocator_(sparta::notNull(OlympiaAllocators::getOlympiaAllocators(node))
->load_store_info_allocator),
Expand All @@ -35,7 +37,8 @@ namespace olympia
+ p->cache_read_stage_length), // Complete stage is after the cache read stage
ldst_pipeline_("LoadStorePipeline", (complete_stage_ + 1),
getClock()), // complete_stage_ + 1 is number of stages
allow_speculative_load_exec_(p->allow_speculative_load_exec)
allow_speculative_load_exec_(p->allow_speculative_load_exec),
allow_data_forwarding_(p->allow_data_forwarding)
{
sparta_assert(p->mmu_lookup_stage_length > 0,
"MMU lookup stage should atleast be one cycle");
Expand All @@ -48,6 +51,7 @@ namespace olympia
ldst_pipeline_.enableCollection(node);
ldst_inst_queue_.enableCollection(node);
replay_buffer_.enableCollection(node);
store_buffer_.enableCollection(node);

// Startup handler for sending initial credits
sparta::StartupEvent(node, CREATE_SPARTA_HANDLER(LSU, sendInitialCredits_));
Expand Down Expand Up @@ -177,6 +181,12 @@ namespace olympia
{
ILOG("New instruction added to the ldst queue " << inst_ptr);
allocateInstToIssueQueue_(inst_ptr);
// allocate to Store buffer
if (inst_ptr->isStoreInst())
{
allocateInstToStoreBuffer_(inst_ptr);
}

handleOperandIssueCheck_(inst_ptr);
lsu_insts_dispatched_++;
}
Expand Down Expand Up @@ -265,7 +275,19 @@ namespace olympia
sparta_assert(inst_ptr->getStatus() == Inst::Status::RETIRED,
"Get ROB Ack, but the store inst hasn't retired yet!");

++stores_retired_;
if (inst_ptr->isStoreInst())
{
auto oldest_store = getOldestStore_();
sparta_assert(oldest_store && oldest_store->getInstPtr()->getUniqueID() == inst_ptr->getUniqueID(),
"Attempting to retire store out of order! Expected: "
<< (oldest_store ? oldest_store->getInstPtr()->getUniqueID() : 0)
<< " Got: " << inst_ptr->getUniqueID());

// Remove from store buffer -> don't actually need to send cache request
store_buffer_.erase(store_buffer_.begin());;
++stores_retired_;
}


updateIssuePriorityAfterStoreInstRetire_(inst_ptr);
if (isReadyToIssueInsts_())
Expand Down Expand Up @@ -447,7 +469,7 @@ namespace olympia
{
updateInstReplayReady_(load_store_info_ptr);
}
// There might not be a wake up because the cache cannot handle nay more instruction
// There might not be a wake up because the cache cannot handle any more instruction
// Change to nack wakeup when implemented
if (!load_store_info_ptr->isInReadyQueue())
{
Expand All @@ -468,7 +490,7 @@ namespace olympia
// If have passed translation and the instruction is a store,
// then it's good to be retired (i.e. mark it completed).
// Stores typically do not cause a flush after a successful
// translation. We now wait for the Retire block to "retire"
// translation. We now wait for the Retire block to "retire"
// it, meaning it's good to go to the cache
if (inst_ptr->isStoreInst() && (inst_ptr->getStatus() == Inst::Status::SCHEDULED))
{
Expand All @@ -483,21 +505,36 @@ namespace olympia
return;
}

// Loads dont perform a cache lookup if there are older stores present in the load store
// queue
if (!inst_ptr->isStoreInst() && olderStoresExists_(inst_ptr)
// Loads don't perform a cache lookup if there are older stores haven't issued in the load store queue
if (!inst_ptr->isStoreInst() && !allOlderStoresIssued_(inst_ptr)
&& allow_speculative_load_exec_)
{
ILOG("Dropping speculative load " << inst_ptr);
load_store_info_ptr->setState(LoadStoreInstInfo::IssueState::READY);
ldst_pipeline_.invalidateStage(cache_lookup_stage_);
// TODO: double check whether "allow_speculative_load_exec_" means not allow
if (allow_speculative_load_exec_)
{
updateInstReplayReady_(load_store_info_ptr);
}
return;
}

// Add store forwarding check here for loads
if (!inst_ptr->isStoreInst() && allow_data_forwarding_)
{
const uint64_t load_addr = inst_ptr->getTargetVAddr();
auto forwarding_store = findYoungestMatchingStore_(load_addr);

if (forwarding_store)
{
ILOG("Found forwarding store for load " << inst_ptr);
mem_access_info_ptr->setDataReady(true);
mem_access_info_ptr->setCacheState(MemoryAccessInfo::CacheState::HIT);
return;
}
}

const bool is_already_hit =
(mem_access_info_ptr->getCacheState() == MemoryAccessInfo::CacheState::HIT);
const bool is_unretired_store =
Expand Down Expand Up @@ -790,6 +827,7 @@ namespace olympia
flushIssueQueue_(criteria);
flushReplayBuffer_(criteria);
flushReadyQueue_(criteria);
flushStoreBuffer_(criteria);

// Cancel replay events
auto flush = [&criteria](const LoadStoreInstInfoPtr & ldst_info_ptr) -> bool
Expand Down Expand Up @@ -894,6 +932,36 @@ namespace olympia
ILOG("Append new load/store instruction to issue queue!");
}

void LSU::allocateInstToStoreBuffer_(const InstPtr & inst_ptr)
{
const auto & store_info_ptr = createLoadStoreInst_(inst_ptr);

sparta_assert(store_buffer_.size() < ldst_inst_queue_size_,
"Appending store buffer causes overflows!");

store_buffer_.push_back(store_info_ptr);
ILOG("Store added to store buffer: " << inst_ptr);
}

LoadStoreInstInfoPtr LSU::findYoungestMatchingStore_(const uint64_t addr) const
{
LoadStoreInstInfoPtr matching_store = nullptr;

auto it = std::find_if(store_buffer_.rbegin(), store_buffer_.rend(),
[addr](const auto& store) {
return store->getInstPtr()->getTargetVAddr() == addr;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is actually incorrect. You're missing the size. If the store is only 1 byte and the load is expecting 4... boom.

The way this should be done is with a proper store queue or combining buffer that can mask which bytes overlap. If the overlap is partial, it's a miss.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @klingaard, is there any attribute I can use for the load/store inst's size? Attached is my idea of modifying this function.

LoadStoreInstInfoPtr LSU::findYoungestMatchingStore_(const uint64_t load_addr, const uint32_t load_size) const
    {
        // Implementation with find_if
        auto it = std::find_if(store_buffer_.rbegin(), store_buffer_.rend(),
            [load_addr, load_size](const auto& store) {
                const auto& store_inst = store->getInstPtr();
                uint64_t store_addr = store_inst->getTargetVAddr();
                uint32_t store_size = store_inst->getMemAccessSize();

                // Check if store fully covers the load
                return (store_addr <= load_addr) && 
                    (store_addr + store_size >= load_addr + load_size);
            });

        return (it != store_buffer_.rend()) ? *it : nullptr;
    }

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You will have to add the method to Inst.hpp that gets the size:

    // Get the data size in bytes
    uint32_t getMemAccessSize() const { opcode_info->getDataSize() / 8; }  // opcode_info's data size is in bits

Your implementation almost works. 😄

What if I have two stores that can supply the data to one load?

block-beta
columns 1
  block:ID
    StoreA ["Store A of 2 bytes"]
    StoreB ["Store B of 2 bytes"]
  end
  space
  LoadC ["Load C of 4 bytes"]
  ID --> LoadC

Loading

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @klingaard , I added a new function called tryStoreToLoadForwarding. Basically is adding a mask so that we make sure only all the bits needed by load are covered by store, then we mark could forward. Also added a paragraph under LSU.adoc to explain how it works

However, there is a build error,
"make[2]: Leaving directory '/home/runner/work/riscv-perf-model/riscv-perf-model/Release'
make[1]: *** [CMakeFiles/Makefile2:2159: test/CMakeFiles/regress.dir/rule] Error 2
make[1]: Leaving directory '/home/runner/work/riscv-perf-model/riscv-perf-model/Release'
make: *** [Makefile:839: regress] Error 2

BUILD_OLYMPIA=2
'[' 2 -ne 0 ']'
echo 'ERROR: build/regress of olympia FAILED!!!'
exit 1
ERROR: build/regress of olympia FAILED!!!"
Could you point me to where I could check this and try to fix it? I checked save artifacts but don't know what to look at. Really appreciate it!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! Everything should be fixed now. :)

});
return (it != store_buffer_.rend()) ? *it : nullptr;
}

LoadStoreInstInfoPtr LSU::getOldestStore_() const
{
if(store_buffer_.empty()) {
return nullptr;
}
return store_buffer_.read(0);
}

bool LSU::allOlderStoresIssued_(const InstPtr & inst_ptr)
{
for (const auto & ldst_info_ptr : ldst_inst_queue_)
Expand Down Expand Up @@ -1368,4 +1436,20 @@ namespace olympia
}
}

void LSU::flushStoreBuffer_(const FlushCriteria & criteria)
{
auto sb_iter = store_buffer_.begin();
while(sb_iter != store_buffer_.end()) {
auto inst_ptr = (*sb_iter)->getInstPtr();
if(criteria.includedInFlush(inst_ptr)) {
auto delete_iter = sb_iter++;
// store buffer didn't return an iterator
store_buffer_.erase(delete_iter);
ILOG("Flushed store from store buffer: " << inst_ptr);
} else {
++sb_iter;
}
}
}

} // namespace olympia
27 changes: 26 additions & 1 deletion core/lsu/LSU.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,14 @@ namespace olympia
PARAMETER(uint32_t, ldst_inst_queue_size, 8, "LSU ldst inst queue size")
PARAMETER(uint32_t, replay_buffer_size, ldst_inst_queue_size, "Replay buffer size")
PARAMETER(uint32_t, replay_issue_delay, 3, "Replay Issue delay")
// PARAMETER(uint32_t, store_buffer_size, ldst_inst_queue_size, "Size of the store buffer")
// LSU microarchitecture parameters
PARAMETER(
bool, allow_speculative_load_exec, true,
bool, allow_speculative_load_exec, false,
"Allow loads to proceed speculatively before all older store addresses are known")
PARAMETER(
bool, allow_data_forwarding, true,
"Allow data forwarding to bypass the cache look up / memory access")
// Pipeline length
PARAMETER(uint32_t, mmu_lookup_stage_length, 1, "Length of the mmu lookup stage")
PARAMETER(uint32_t, cache_lookup_stage_length, 1, "Length of the cache lookup stage")
Expand All @@ -73,6 +77,11 @@ namespace olympia
//! name of this resource.
static const char name[];

// return allow_data_forwarding for test
bool allow_data_forwarding_ex() {
return allow_data_forwarding_;
}

////////////////////////////////////////////////////////////////////////////////
// Type Name/Alias Declaration
////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -137,6 +146,10 @@ namespace olympia
const uint32_t replay_buffer_size_;
const uint32_t replay_issue_delay_;

// Store Buffer
sparta::Buffer<LoadStoreInstInfoPtr> store_buffer_;
const uint32_t store_buffer_size_;

sparta::PriorityQueue<LoadStoreInstInfoPtr> ready_queue_;
// MMU unit
bool mmu_busy_ = false;
Expand Down Expand Up @@ -169,6 +182,7 @@ namespace olympia

// LSU Microarchitecture parameters
const bool allow_speculative_load_exec_;
const bool allow_data_forwarding_;

// ROB stopped simulation early, transactions could still be inflight.
bool rob_stopped_simulation_ = false;
Expand Down Expand Up @@ -258,6 +272,15 @@ namespace olympia

void allocateInstToIssueQueue_(const InstPtr & inst_ptr);

// allocate store inst to store buffer
void allocateInstToStoreBuffer_(const InstPtr & inst_ptr);

// Search store buffer in FIFO order for youngest matching store
LoadStoreInstInfoPtr findYoungestMatchingStore_(const uint64_t addr) const ;

// get oldest store
LoadStoreInstInfoPtr getOldestStore_() const;

bool olderStoresExists_(const InstPtr & inst_ptr);

bool allOlderStoresIssued_(const InstPtr & inst_ptr);
Expand Down Expand Up @@ -315,6 +338,8 @@ namespace olympia
// Flush Replay Buffer
void flushReplayBuffer_(const FlushCriteria &);

void flushStoreBuffer_(const FlushCriteria &);

// Counters
sparta::Counter lsu_insts_dispatched_{getStatisticSet(), "lsu_insts_dispatched",
"Number of LSU instructions dispatched",
Expand Down
Loading