Skip to content

Multiple LSU pipeline support #216

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 36 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
8538bdc
adding store buffer
MayneMei Nov 1, 2024
22dda9f
modified return type of some store function, and reorder the constructor
MayneMei Nov 1, 2024
9037295
change forwarding at handleCacheLookupReq_, roll back handleCacheRead_
MayneMei Nov 1, 2024
8819e27
passed regression test, mode test case needed
MayneMei Nov 18, 2024
14a3a70
modified test case, also disable store sned cache lookup req when retire
MayneMei Nov 25, 2024
dc379a1
modified test to check forwarding cycle
MayneMei Nov 25, 2024
0eb3a6a
Update Lsu_test.cpp
MayneMei Nov 29, 2024
5404063
Update LSU.hpp, add parameter for data forwarding, set default value …
MayneMei Nov 29, 2024
5cf7454
Update Lsu_test.cpp
MayneMei Nov 29, 2024
622a0d2
data forwarding parameterize, modified testcase
MayneMei Nov 29, 2024
5bdeefa
typo fixing
MayneMei Nov 29, 2024
60d5fba
typo fixing
MayneMei Nov 29, 2024
0b94a35
test typo fixing
MayneMei Nov 29, 2024
662f00c
add helpfer function for test
MayneMei Nov 29, 2024
0d52a0e
syntax error
MayneMei Nov 29, 2024
a27fa71
modified test_case and little syntax in lsu
MayneMei Nov 29, 2024
d7df290
chagne erase to pop front(), don't have local machine right now so on…
MayneMei Dec 2, 2024
8084f56
change store buffer to deque
MayneMei Dec 2, 2024
d6c92b5
added debug info
MayneMei Dec 2, 2024
b4b60d3
different debug info
MayneMei Dec 2, 2024
575256d
store_buffer_initialization
MayneMei Dec 2, 2024
67bb0cc
initalization sequence change
MayneMei Dec 2, 2024
b3980ec
comment cout
MayneMei Dec 2, 2024
020b493
cout debugging
MayneMei Dec 2, 2024
8034e14
roll back ti using buffer instead of deque
MayneMei Dec 2, 2024
e949057
modified test, cycle matched expectation
MayneMei Dec 6, 2024
b01d7e2
added documentation
MayneMei Dec 13, 2024
7db7fcc
rebased again and resolved comments
MayneMei Mar 23, 2025
423ba58
Update core/lsu/LSU.hpp
MayneMei Apr 7, 2025
69a3e41
Update Lsu_test.cpp
MayneMei Apr 7, 2025
82ce2e7
modifying forward logic
MayneMei May 18, 2025
411ef1c
grammar issue fixed
MayneMei May 18, 2025
4994499
grammar issue fixed
MayneMei May 18, 2025
711f66a
grammar issue fixed
MayneMei May 18, 2025
287b102
fixed syntax error
MayneMei May 20, 2025
4d1270a
deleted unused variable
MayneMei May 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions core/Inst.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,9 @@ namespace olympia

uint32_t getOpCode() const { return static_cast<uint32_t>(opcode_info_->getOpcode()); }

// Get the data size in bytes
uint32_t getMemAccessSize() const { return static_cast<uint32_t>(opcode_info_->getDataSize() / 8); } // opcode_info's data size is in bits

mavis::InstructionUniqueID getMavisUid() const
{
return opcode_info_->getInstructionUniqueID();
Expand Down
151 changes: 144 additions & 7 deletions core/lsu/LSU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ namespace olympia
replay_buffer_("replay_buffer", p->replay_buffer_size, getClock()),
replay_buffer_size_(p->replay_buffer_size),
replay_issue_delay_(p->replay_issue_delay),
store_buffer_("store_buffer", p->ldst_inst_queue_size, getClock()), // Add this line
store_buffer_size_(p->ldst_inst_queue_size),
ready_queue_(),
load_store_info_allocator_(sparta::notNull(OlympiaAllocators::getOlympiaAllocators(node))
->load_store_info_allocator),
Expand All @@ -35,7 +37,8 @@ namespace olympia
+ p->cache_read_stage_length), // Complete stage is after the cache read stage
ldst_pipeline_("LoadStorePipeline", (complete_stage_ + 1),
getClock()), // complete_stage_ + 1 is number of stages
allow_speculative_load_exec_(p->allow_speculative_load_exec)
allow_speculative_load_exec_(p->allow_speculative_load_exec),
allow_data_forwarding_(p->allow_data_forwarding)
{
sparta_assert(p->mmu_lookup_stage_length > 0,
"MMU lookup stage should atleast be one cycle");
Expand All @@ -48,6 +51,7 @@ namespace olympia
ldst_pipeline_.enableCollection(node);
ldst_inst_queue_.enableCollection(node);
replay_buffer_.enableCollection(node);
store_buffer_.enableCollection(node);

// Startup handler for sending initial credits
sparta::StartupEvent(node, CREATE_SPARTA_HANDLER(LSU, sendInitialCredits_));
Expand Down Expand Up @@ -177,6 +181,12 @@ namespace olympia
{
ILOG("New instruction added to the ldst queue " << inst_ptr);
allocateInstToIssueQueue_(inst_ptr);
// allocate to Store buffer
if (inst_ptr->isStoreInst())
{
allocateInstToStoreBuffer_(inst_ptr);
}

handleOperandIssueCheck_(inst_ptr);
lsu_insts_dispatched_++;
}
Expand Down Expand Up @@ -265,7 +275,19 @@ namespace olympia
sparta_assert(inst_ptr->getStatus() == Inst::Status::RETIRED,
"Get ROB Ack, but the store inst hasn't retired yet!");

++stores_retired_;
if (inst_ptr->isStoreInst())
{
auto oldest_store = getOldestStore_();
sparta_assert(oldest_store && oldest_store->getInstPtr()->getUniqueID() == inst_ptr->getUniqueID(),
"Attempting to retire store out of order! Expected: "
<< (oldest_store ? oldest_store->getInstPtr()->getUniqueID() : 0)
<< " Got: " << inst_ptr->getUniqueID());

// Remove from store buffer -> don't actually need to send cache request
store_buffer_.erase(store_buffer_.begin());;
++stores_retired_;
}


updateIssuePriorityAfterStoreInstRetire_(inst_ptr);
if (isReadyToIssueInsts_())
Expand Down Expand Up @@ -447,7 +469,7 @@ namespace olympia
{
updateInstReplayReady_(load_store_info_ptr);
}
// There might not be a wake up because the cache cannot handle nay more instruction
// There might not be a wake up because the cache cannot handle any more instruction
// Change to nack wakeup when implemented
if (!load_store_info_ptr->isInReadyQueue())
{
Expand All @@ -468,7 +490,7 @@ namespace olympia
// If have passed translation and the instruction is a store,
// then it's good to be retired (i.e. mark it completed).
// Stores typically do not cause a flush after a successful
// translation. We now wait for the Retire block to "retire"
// translation. We now wait for the Retire block to "retire"
// it, meaning it's good to go to the cache
if (inst_ptr->isStoreInst() && (inst_ptr->getStatus() == Inst::Status::SCHEDULED))
{
Expand All @@ -483,21 +505,36 @@ namespace olympia
return;
}

// Loads dont perform a cache lookup if there are older stores present in the load store
// queue
if (!inst_ptr->isStoreInst() && olderStoresExists_(inst_ptr)
// Loads don't perform a cache lookup if there are older stores haven't issued in the load store queue
if (!inst_ptr->isStoreInst() && !allOlderStoresIssued_(inst_ptr)
&& allow_speculative_load_exec_)
{
ILOG("Dropping speculative load " << inst_ptr);
load_store_info_ptr->setState(LoadStoreInstInfo::IssueState::READY);
ldst_pipeline_.invalidateStage(cache_lookup_stage_);
// TODO: double check whether "allow_speculative_load_exec_" means not allow
if (allow_speculative_load_exec_)
{
updateInstReplayReady_(load_store_info_ptr);
}
return;
}

// Add store forwarding check here for loads
if (!inst_ptr->isStoreInst() && allow_data_forwarding_)
{
// passing both load address and load size
auto forwarding_store = tryStoreToLoadForwarding(inst_ptr);

if (forwarding_store)
{
ILOG("Found forwarding store for load " << inst_ptr);
mem_access_info_ptr->setDataReady(true);
mem_access_info_ptr->setCacheState(MemoryAccessInfo::CacheState::HIT);
return;
}
}

const bool is_already_hit =
(mem_access_info_ptr->getCacheState() == MemoryAccessInfo::CacheState::HIT);
const bool is_unretired_store =
Expand Down Expand Up @@ -790,6 +827,7 @@ namespace olympia
flushIssueQueue_(criteria);
flushReplayBuffer_(criteria);
flushReadyQueue_(criteria);
flushStoreBuffer_(criteria);

// Cancel replay events
auto flush = [&criteria](const LoadStoreInstInfoPtr & ldst_info_ptr) -> bool
Expand Down Expand Up @@ -894,6 +932,89 @@ namespace olympia
ILOG("Append new load/store instruction to issue queue!");
}

void LSU::allocateInstToStoreBuffer_(const InstPtr & inst_ptr)
{
const auto & store_info_ptr = createLoadStoreInst_(inst_ptr);

sparta_assert(store_buffer_.size() < ldst_inst_queue_size_,
"Appending store buffer causes overflows!");

store_buffer_.push_back(store_info_ptr);
ILOG("Store added to store buffer: " << inst_ptr);
}

bool LSU::tryStoreToLoadForwarding(const InstPtr& load_inst_ptr) const
{
const uint64_t load_addr = load_inst_ptr->getTargetVAddr();
const uint32_t load_size = load_inst_ptr->getMemAccessSize();

// A load must have a non-zero size to access memory.
if (load_size == 0) {
return false;
}

std::vector<bool> coverage_mask(load_size, false);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This defaults to a specialization of std::vector which is really a dynamic std::bitset. Suggest using a boost::dynamic_bitset for the implementation. See my next points...

Since we don't have a concept of a store buffer class, which could maintain the valid bits per cache line, you can kinda simulate that here.

  1. First, you can make a rule that store to load forwarding cannot cross a cache line. Makes things a little easier.
  2. If the load addr + size is good create a bitmask of the bytes that are needed by the load based on the size of the cache line
  3. Iterate the store buffer like you're doing
  4. Create a bitmask for the store (like you did for the load) and clear the bits in the load bit mask that match:
         load_bytes_needed &= ~store_bytes;
         if (load_bytes_needed.none()) {
             return true; // we found all the stores that can forward to this load within the cache line
         }
    

uint32_t bytes_covered_count = 0;

// Iterate through the store_buffer_ from youngest to oldest.
// This ensures that if multiple stores write to the same byte,
// the data from the youngest store is effectively used.
for (auto it = store_buffer_.rbegin(); it != store_buffer_.rend(); ++it)
{
const auto& store_info_ptr = *it; // LoadStoreInstInfoPtr
const InstPtr& store_inst_ptr = store_info_ptr->getInstPtr();

const uint64_t store_addr = store_inst_ptr->getTargetVAddr();
const uint32_t store_size = store_inst_ptr->getMemAccessSize();

if (store_size == 0) {
continue; // Skip stores that don't actually write data.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Which ones are those?

}

// Determine the overlapping region [overlap_start_addr, overlap_end_addr)
// The overlap is in terms of global memory addresses.
uint64_t overlap_start_addr = std::max(load_addr, store_addr);
uint64_t overlap_end_addr = std::min(load_addr + load_size, store_addr + store_size);

// If there's an actual overlap (i.e., the range is not empty)
if (overlap_start_addr < overlap_end_addr)
{
// Iterate over the bytes *within the load's address range* that this store covers.
for (uint64_t current_byte_global_addr = overlap_start_addr; current_byte_global_addr < overlap_end_addr; ++current_byte_global_addr)
{
// Calculate the index of this byte relative to the load's start address.
// This index is used for the coverage_mask.
uint32_t load_byte_idx = static_cast<uint32_t>(current_byte_global_addr - load_addr);

// If this byte within the load's coverage_mask hasn't been marked true yet
// (meaning it hasn't been covered by an even younger store), mark it.
if (!coverage_mask[load_byte_idx])
{
coverage_mask[load_byte_idx] = true;
bytes_covered_count++;
}
}
}

// If all bytes of the load are now covered, no need to check even older stores
if (bytes_covered_count == load_size) {
break;
}
}

// Check if all bytes required by the load were covered by stores in the buffer.
return bytes_covered_count == load_size;
}


LoadStoreInstInfoPtr LSU::getOldestStore_() const
{
if(store_buffer_.empty()) {
return nullptr;
}
return store_buffer_.read(0);
}

bool LSU::allOlderStoresIssued_(const InstPtr & inst_ptr)
{
for (const auto & ldst_info_ptr : ldst_inst_queue_)
Expand Down Expand Up @@ -1368,4 +1489,20 @@ namespace olympia
}
}

void LSU::flushStoreBuffer_(const FlushCriteria & criteria)
{
auto sb_iter = store_buffer_.begin();
while(sb_iter != store_buffer_.end()) {
auto inst_ptr = (*sb_iter)->getInstPtr();
if(criteria.includedInFlush(inst_ptr)) {
auto delete_iter = sb_iter++;
// store buffer didn't return an iterator
store_buffer_.erase(delete_iter);
ILOG("Flushed store from store buffer: " << inst_ptr);
} else {
++sb_iter;
}
}
}

} // namespace olympia
27 changes: 26 additions & 1 deletion core/lsu/LSU.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,14 @@ namespace olympia
PARAMETER(uint32_t, ldst_inst_queue_size, 8, "LSU ldst inst queue size")
PARAMETER(uint32_t, replay_buffer_size, ldst_inst_queue_size, "Replay buffer size")
PARAMETER(uint32_t, replay_issue_delay, 3, "Replay Issue delay")
// PARAMETER(uint32_t, store_buffer_size, ldst_inst_queue_size, "Size of the store buffer")
// LSU microarchitecture parameters
PARAMETER(
bool, allow_speculative_load_exec, true,
bool, allow_speculative_load_exec, false,
"Allow loads to proceed speculatively before all older store addresses are known")
PARAMETER(
bool, allow_data_forwarding, true,
"Allow data forwarding to bypass the cache look up / memory access")
// Pipeline length
PARAMETER(uint32_t, mmu_lookup_stage_length, 1, "Length of the mmu lookup stage")
PARAMETER(uint32_t, cache_lookup_stage_length, 1, "Length of the cache lookup stage")
Expand All @@ -73,6 +77,11 @@ namespace olympia
//! name of this resource.
static const char name[];

// return allow_data_forwarding for test
bool allowDataForwardingEX() const {
return allow_data_forwarding_;
}

////////////////////////////////////////////////////////////////////////////////
// Type Name/Alias Declaration
////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -137,6 +146,10 @@ namespace olympia
const uint32_t replay_buffer_size_;
const uint32_t replay_issue_delay_;

// Store Buffer
sparta::Buffer<LoadStoreInstInfoPtr> store_buffer_;
const uint32_t store_buffer_size_;

sparta::PriorityQueue<LoadStoreInstInfoPtr> ready_queue_;
// MMU unit
bool mmu_busy_ = false;
Expand Down Expand Up @@ -169,6 +182,7 @@ namespace olympia

// LSU Microarchitecture parameters
const bool allow_speculative_load_exec_;
const bool allow_data_forwarding_;

// ROB stopped simulation early, transactions could still be inflight.
bool rob_stopped_simulation_ = false;
Expand Down Expand Up @@ -258,6 +272,15 @@ namespace olympia

void allocateInstToIssueQueue_(const InstPtr & inst_ptr);

// allocate store inst to store buffer
void allocateInstToStoreBuffer_(const InstPtr & inst_ptr);

// check whether load inst could be forwarded by store
bool tryStoreToLoadForwarding(const InstPtr& load_inst_ptr) const ;

// get oldest store
LoadStoreInstInfoPtr getOldestStore_() const;

bool olderStoresExists_(const InstPtr & inst_ptr);

bool allOlderStoresIssued_(const InstPtr & inst_ptr);
Expand Down Expand Up @@ -315,6 +338,8 @@ namespace olympia
// Flush Replay Buffer
void flushReplayBuffer_(const FlushCriteria &);

void flushStoreBuffer_(const FlushCriteria &);

// Counters
sparta::Counter lsu_insts_dispatched_{getStatisticSet(), "lsu_insts_dispatched",
"Number of LSU instructions dispatched",
Expand Down
Loading