Skip to content

Commit

Permalink
mem: add writebuffer stall, writebuffer entry must release after recv…
Browse files Browse the repository at this point in the history
… response
  • Loading branch information
tastynoob committed Feb 11, 2025
1 parent 5944748 commit b6340ae
Show file tree
Hide file tree
Showing 15 changed files with 162 additions and 61 deletions.
10 changes: 8 additions & 2 deletions configs/common/Caches.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ class L1_DCache(L1Cache):

enable_wayprediction = False

write_buffers = 16

class L2Cache(Cache):
mshrs = 64
tgts_per_mshr = 20
Expand All @@ -106,6 +108,8 @@ class L2Cache(Cache):

slice_num = 4

write_buffers = 16

class L3Cache(Cache):
mshrs = 64
tgts_per_mshr = 20
Expand All @@ -122,6 +126,8 @@ class L3Cache(Cache):
cache_level = 3
enable_wayprediction = False

write_buffers = 16

class IOCache(Cache):
assoc = 8
tag_latency = 50
Expand Down Expand Up @@ -201,8 +207,8 @@ class L3ToMemBus(CoherentXBar):
# A handful pipeline stages for each portion of the latency
# contributions.
frontend_latency = 0
forward_latency = 30
response_latency = 30
forward_latency = 48
response_latency = 48
snoop_response_latency = 4

# Use a snoop-filter by default
Expand Down
3 changes: 2 additions & 1 deletion src/cpu/o3/commit.cc
Original file line number Diff line number Diff line change
Expand Up @@ -741,7 +741,8 @@ Commit::tick()

if (cpu->curCycle() - lastCommitCycle > 20000) {
if (maybeStucked) {
warn("[sn:%s] %s", rob->head->get()->seqNum, rob->head->get()->staticInst->disassemble(0));
if (rob->numInstsInROB)
warn("[sn:%s] %s", rob->head->get()->seqNum, rob->head->get()->staticInst->disassemble(0));
panic("cpu stucked!!\n");
}
warn("cpu may be stucked\n");
Expand Down
3 changes: 2 additions & 1 deletion src/mem/abstract_mem.cc
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,8 @@ AbstractMemory::access(PacketPtr pkt)
if (pkt->cmd == MemCmd::CleanEvict || pkt->cmd == MemCmd::WritebackClean) {
DPRINTF(MemoryAccess, "CleanEvict on 0x%x: not responding\n",
pkt->getAddr());
return;
pkt->makeResponse();
return;
}

assert(pkt->getAddrRange().isSubset(range));
Expand Down
61 changes: 52 additions & 9 deletions src/mem/cache/base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -393,8 +393,8 @@ BaseCache::handleTimingReqHit(PacketPtr pkt, CacheBlk *blk, Tick request_time, b
// It is a force hit
assert(pkt->isResponse());
}
DPRINTF(Cache, "Making timing response for %s, schedule it at %llu, is force hit: %i\n",
pkt->print(), request_time, pkt->isResponse());
DPRINTF(Cache, "Making timing response for %s, schedule it at %llu\n",
pkt->print(), request_time);

if (pkt->isRead() && first_acc_after_pf && prefetcher && prefetcher->hasHintDownStream()) {
DPRINTF(Cache, "Notify down stream on pf hit\n");
Expand Down Expand Up @@ -619,7 +619,7 @@ BaseCache::recvTimingReq(PacketPtr pkt)
if (!satisfied && forceHit && !pkt->req->isInstFetch() && pkt->isRead() && pkt->req->hasPC() &&
forceHitPCs.count(pkt->req->getPC())) {
bool mshr_hit = mshrQueue.findMatch(pkt->getAddr(), pkt->isSecure()) != nullptr;
bool wb_hit = writeBuffer.findMatch(pkt->getBlockAddr(blkSize), pkt->isSecure()) != nullptr;
bool wb_hit = writeBuffer.findMatchNoService(pkt->getBlockAddr(blkSize), pkt->isSecure()) != nullptr;

if (!(mshr_hit || wb_hit)) {
DPRINTF(Cache, "%s: generate functional access for PC %#lx\n", __func__, pkt->req->getPC());
Expand Down Expand Up @@ -852,6 +852,32 @@ BaseCache::recvTimingResp(PacketPtr pkt)
DPRINTF(Cache, "%s: Handling response %s\n", __func__,
pkt->print());

if (pkt->isWriteBackResp()) {
DPRINTF(Cache, "Writeback response for addr %s\n", pkt->print());
WriteQueueEntry* wbentry = dynamic_cast<WriteQueueEntry*>(pkt->popSenderState());
panic_if(!wbentry, "Writeback response without sender state\n");
bool wasfull = writeBuffer.isFull();

stats.cmdStats(pkt)
.missLatencyDist.sample(ticksToCycles(curTick() - wbentry->getTarget()->recvTime));

wbentry->popTarget();
writeBuffer.deallocate(wbentry);
if (wasfull && !writeBuffer.isFull()) {
clearBlocked(Blocked_NoWBBuffers);
}

if (pkt->cmd==MemCmd::WritebackResp) {
if (pkt->senderState) {
cpuSidePort.schedTimingResp(pkt, curTick() + pkt->headerDelay);
pkt->headerDelay = 0;
return;
}
delete pkt;
return;
}
}

// if this is a write, we should be looking at an uncacheable
// write
if (pkt->isWrite() && pkt->cmd != MemCmd::LockedRMWWriteResp) {
Expand Down Expand Up @@ -1266,7 +1292,7 @@ BaseCache::getNextQueueEntry()
} else if (miss_mshr) {
// need to check for conflicting earlier writeback
WriteQueueEntry *conflict_mshr = writeBuffer.findPending(miss_mshr);
if (conflict_mshr) {
if (conflict_mshr && !conflict_mshr->inService) {
// not sure why we don't check order here... it was in the
// original code but commented out.

Expand Down Expand Up @@ -1323,7 +1349,7 @@ BaseCache::getNextQueueEntry()
prefetcher->streamPflate();
// free the request and packet
delete pkt;
} else if (writeBuffer.findMatch(pf_addr, pkt->isSecure())) {
} else if (writeBuffer.findMatchNoService(pf_addr, pkt->isSecure())) {
DPRINTF(HWPrefetch, "Prefetch %#x has hit in the "
"Write Buffer, dropped.\n", pf_addr);
prefetcher->pfHitInWB(pf_type);
Expand Down Expand Up @@ -1746,7 +1772,7 @@ BaseCache::access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat,
// generating CleanEvict and Writeback or simply CleanEvict and
// CleanEvict almost simultaneously will be caught by snoops sent out
// by crossbar.
WriteQueueEntry *wb_entry = writeBuffer.findMatch(pkt->getAddr(),
WriteQueueEntry *wb_entry = writeBuffer.findMatchNoService(pkt->getAddr(),
pkt->isSecure());
if (wb_entry) {
assert(wb_entry->getNumTargets() == 1);
Expand All @@ -1773,7 +1799,8 @@ BaseCache::access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat,
// Dirty writeback from above trumps our clean
// writeback... discard here
// Note: markInService will remove entry from writeback buffer.
markInService(wb_entry);
wb_entry->popTarget();
writeBuffer.deallocate(wb_entry);
delete wbPkt;
}
}
Expand Down Expand Up @@ -1843,7 +1870,7 @@ BaseCache::access(PacketPtr pkt, CacheBlk *&blk, Cycles &lat,
blk->setCoherenceBits(CacheBlk::WritableBit);
}
// nothing else to do; writeback doesn't expect response
assert(!pkt->needsResponse());
// assert(!pkt->needsResponse());

updateBlockData(blk, pkt, has_old_data);
DPRINTF(Cache, "%s new state is %s\n", __func__, blk->print());
Expand Down Expand Up @@ -2033,7 +2060,7 @@ BaseCache::handleFill(PacketPtr pkt, CacheBlk *blk, PacketList &writebacks,

// When handling a fill, we should have no writes to this line.
assert(addr == pkt->getBlockAddr(blkSize));
assert(!writeBuffer.findMatch(addr, is_secure));
auto entry = writeBuffer.findMatchNoService(addr, is_secure);

if (!blk) {
// better have read new data...
Expand Down Expand Up @@ -2535,7 +2562,20 @@ BaseCache::sendWriteQueuePacket(WriteQueueEntry* wq_entry)
// it gets retried
return true;
} else {
bool full = writeBuffer.isFull();
assert(tgt_pkt->cmd != MemCmd::ReadReq);
tgt_pkt->pushSenderState(wq_entry);
tgt_pkt->setWriteBackResp();
markInService(wq_entry);
if ((tgt_pkt->isCleanEviction() && tgt_pkt->isBlockCached())
|| (tgt_pkt->cacheResponding() &&
(!tgt_pkt->needsWritable() || tgt_pkt->responderHadWritable()))) {
wq_entry->popTarget();
writeBuffer.deallocate(wq_entry);
}
if (full && !writeBuffer.isFull()) {
clearBlocked(Blocked_NoWBBuffers);
}
return false;
}
}
Expand Down Expand Up @@ -2993,18 +3033,21 @@ BaseCache::CacheStats::regStats()
blockedCycles.init(NUM_BLOCKED_CAUSES);
blockedCycles
.subname(Blocked_NoMSHRs, "no_mshrs")
.subname(Blocked_NoWBBuffers, "no_WBBuffer")
.subname(Blocked_NoTargets, "no_targets")
;


blockedCauses.init(NUM_BLOCKED_CAUSES);
blockedCauses
.subname(Blocked_NoMSHRs, "no_mshrs")
.subname(Blocked_NoWBBuffers, "no_WBBuffer")
.subname(Blocked_NoTargets, "no_targets")
;

avgBlocked
.subname(Blocked_NoMSHRs, "no_mshrs")
.subname(Blocked_NoWBBuffers, "no_WBBuffer")
.subname(Blocked_NoTargets, "no_targets")
;
avgBlocked = blockedCycles / blockedCauses;
Expand Down
7 changes: 2 additions & 5 deletions src/mem/cache/base.hh
Original file line number Diff line number Diff line change
Expand Up @@ -466,12 +466,7 @@ class BaseCache : public ClockedObject, CacheAccessor

void markInService(WriteQueueEntry *entry)
{
bool wasFull = writeBuffer.isFull();
writeBuffer.markInService(entry);

if (wasFull && !writeBuffer.isFull()) {
clearBlocked(Blocked_NoWBBuffers);
}
}

/**
Expand Down Expand Up @@ -1368,6 +1363,8 @@ class BaseCache : public ClockedObject, CacheAccessor
// should only see writes or clean evicts here
assert(pkt->isWrite() || pkt->cmd == MemCmd::CleanEvict);

DPRINTF(Cache, "Write buffer allocation for addr %s\n", pkt->print());

Addr blk_addr = pkt->getBlockAddr(blkSize);

// If using compression, on evictions the block is decompressed and
Expand Down
13 changes: 8 additions & 5 deletions src/mem/cache/cache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -813,8 +813,6 @@ Cache::serviceMSHRTargets(MSHR *mshr, const PacketPtr pkt, CacheBlk *blk)
stats.cmdStats(tgt_pkt)
.missLatency[tgt_pkt->req->requestorId()] +=
completion_time - target.recvTime;
stats.cmdStats(tgt_pkt)
.missLatencyDist.sample((completion_time - target.recvTime)/500);

if (tgt_pkt->cmd == MemCmd::LockedRMWReadReq) {
// We're going to leave a target in the MSHR until the
Expand Down Expand Up @@ -886,6 +884,10 @@ Cache::serviceMSHRTargets(MSHR *mshr, const PacketPtr pkt, CacheBlk *blk)
// carried over to cache above
tgt_pkt->copyResponderFlags(pkt);
}

stats.cmdStats(tgt_pkt)
.missLatencyDist.sample(ticksToCycles(completion_time - target.recvTime));

tgt_pkt->makeTimingResponse();
// if this packet is an error copy that to the new packet
if (is_error)
Expand Down Expand Up @@ -986,7 +988,7 @@ Cache::sendHintViaMSHRTargets(MSHR *mshr, const PacketPtr pkt)
firstTgtDelayed = transfer_offset != 0 && pkt->payloadDelay != 0;
}
Tick sendHintTime = curTick() + ((transfer_offset || firstTgtDelayed) ? pkt->payloadDelay : 0);
DPRINTF(Cache, "sendHintViaMSHRTargets: pkt: %#lx, sendHintTime: %ld", tgt_pkt->getAddr(), sendHintTime);
DPRINTF(Cache, "sendHintViaMSHRTargets: pkt: %#lx, sendHintTime: %ld\n", tgt_pkt->getAddr(), sendHintTime);
if (sendHintTime == curTick()) {
BaseCache::cpuSidePort.sendCustomSignal(tgt_pkt, DcacheRespType::Hint);
} else {
Expand Down Expand Up @@ -1370,7 +1372,7 @@ Cache::recvTimingSnoopReq(PacketPtr pkt)
}

//We also need to check the writeback buffers and handle those
WriteQueueEntry *wb_entry = writeBuffer.findMatch(blk_addr, is_secure);
WriteQueueEntry *wb_entry = writeBuffer.findMatchNoService(blk_addr, is_secure);
if (wb_entry) {
DPRINTF(Cache, "Snoop hit in writeback to addr %#llx (%s)\n",
pkt->getAddr(), is_secure ? "s" : "ns");
Expand Down Expand Up @@ -1426,7 +1428,8 @@ Cache::recvTimingSnoopReq(PacketPtr pkt)
if (invalidate && wb_pkt->cmd != MemCmd::WriteClean) {
// Invalidation trumps our writeback... discard here
// Note: markInService will remove entry from writeback buffer.
markInService(wb_entry);
wb_entry->popTarget();
writeBuffer.deallocate(wb_entry);
delete wb_pkt;
}
}
Expand Down
26 changes: 24 additions & 2 deletions src/mem/cache/write_queue.cc
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,37 @@ WriteQueue::allocate(Addr blk_addr, unsigned blk_size, PacketPtr pkt,
return entry;
}


WriteQueueEntry*
WriteQueue::findMatchNoService(Addr blk_addr, bool is_secure,
bool ignore_uncacheable) const
{
for (const auto& entry : allocatedList) {
// we ignore any entries allocated for uncacheable
// accesses and simply ignore them when matching, in the
// cache we never check for matches when adding new
// uncacheable entries, and we do not want normal
// cacheable accesses being added to an WriteQueueEntry
// serving an uncacheable access
if (!(ignore_uncacheable && entry->isUncacheable()) && !entry->inService &&
entry->matchBlockAddr(blk_addr, is_secure)) {
return entry;
}
}
return nullptr;
}

void
WriteQueue::markInService(WriteQueueEntry *entry)
{
// for a normal eviction, such as a writeback or a clean evict,
// there is no more to do as we are done from the perspective of
// this cache, and for uncacheable write we do not need the entry
// as part of the response handling
entry->popTarget();
deallocate(entry);

entry->inService = true;
readyList.erase(entry->readyIter);
_numInService += 1;
}

} // namespace gem5
3 changes: 3 additions & 0 deletions src/mem/cache/write_queue.hh
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ class WriteQueue : public Queue<WriteQueueEntry>
WriteQueueEntry *allocate(Addr blk_addr, unsigned blk_size,
PacketPtr pkt, Tick when_ready, Counter order);

WriteQueueEntry* findMatchNoService(Addr blk_addr, bool is_secure,
bool ignore_uncacheable = true) const;

/**
* Mark the given entry as in service. This removes the entry from
* the readyList or deallocates the entry if it does not expect a
Expand Down
1 change: 1 addition & 0 deletions src/mem/coherent_xbar.cc
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID cpu_side_port_id)
if (sink_packet) {
DPRINTF(CoherentXBar, "%s: Not forwarding %s\n", __func__,
pkt->print());
expect_response = false;
} else {
// determine if we are forwarding the packet, or responding to
// it
Expand Down
Loading

0 comments on commit b6340ae

Please sign in to comment.