Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Relaxed Swarm implementation from Hive #3

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ Setup
Dependencies:
- Linux on x86_64: We've tested with Ubuntu 14.04, 16.04, and 18.04.
If you want to run this in a VM, see Vagrant setup below.
- GCC: Version 4.8 or newer will suffice to build the simulator itself, which
- GCC: Version >= 4.8 and <= 9.5 will suffice to build the simulator itself, which
is written in C++11 and depends on a particular GCC ABI. (Clang won't work.)
Test applications are written in C++14 so they can build with GCC 5+ or Clang.
- Pin version 2.14: Download this from
Expand Down
2 changes: 2 additions & 0 deletions sim/init/init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -916,6 +916,8 @@ static void InitSystem(const Config& config) {
initinfo->usePreciseAddressSets = ("Precise" == std::string(
config.get<const char*>("sys.robs.addressSet.type", "Bloom")));

initinfo->relaxed = config.get<bool>("sys.robs.relaxedOrder", false);

uint32_t maxFrameDepth =
config.get<uint32_t>("sys.robs.maxFrameDepth", UINT32_MAX);
//FIXME(victory): Remove this hack when we have a less-broken
Expand Down
7 changes: 4 additions & 3 deletions sim/rob.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1017,10 +1017,10 @@ TaskPtr ROB::removeUntiedTaskImpl(const uint64_t taskFn,
// FIXME(dsm): Use multi-index::range()
auto start =
boost::make_reverse_iterator(runQueue.lower_bound(
std::make_tuple(std::cref(maxTS), false)));
std::make_tuple(std::cref(maxTS), 0, false)));
auto end =
boost::make_reverse_iterator(runQueue.lower_bound(
std::make_tuple(std::cref(minTS), false)));
std::make_tuple(std::cref(minTS), 0, false)));

// For liveness: we don't want to spill the minimum task.
// Spilling that task would certainly induce a swarm::requeuer(...) of
Expand Down Expand Up @@ -1284,7 +1284,8 @@ std::pair<TaskPtr, rob::Stall> ROB::taskToRun(ThreadID tid) {
// Find all tasks with the same timestamp, including all producers
TimeStamp ubTs = runQueue.min()->lts();
ubTs.clearTieBreaker();
auto ub = runQueue.upper_bound(std::make_tuple(std::cref(ubTs), true));
auto ub = runQueue.upper_bound(std::make_tuple(std::cref(ubTs),
runQueue.min()->softTs, true));
ub = std::prev(ub);

// Manually check if std::distance(rq.begin(), ub) < underflow.
Expand Down
4 changes: 2 additions & 2 deletions sim/robtypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@ using ExecQ_ty = fixed_capacity_ordered_set<

// The RunQ sorts tasks by their timestamp and deprioritizes producers. It uses
// a global_fun key extractor to allow calls to lower/upper_bound to use keys.
using RunQ_key_ty = std::tuple<const TimeStamp&, uint8_t>;
using RunQ_key_ty = std::tuple<const TimeStamp&, uint64_t, uint8_t>;
inline RunQ_key_ty getRunQKey(const TaskPtr& t) {
// Given an equal choice between running a programmer-defined producer and a
// requeuer, choose the normal producer
return std::make_tuple(std::cref(t->lts()),
return std::make_tuple(std::cref(t->lts()), t->softTs,
(t->isProducer() << 1) | t->isRequeuer());
}
using RunQ_ty = ordered_pointer_set<TaskPtr, ordered_non_unique<global_fun<const TaskPtr&, RunQ_key_ty, getRunQKey>>, RunQ_key_ty>;
Expand Down
8 changes: 5 additions & 3 deletions sim/sim.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -605,7 +605,8 @@ spin::ThreadId HandleSetGvtMagicOp(spin::ThreadId tid, uint64_t cycle, spin::Thr
}

static void DispatchTaskToContext(const Task& task, spin::ThreadContext* ctxt) {
spin::setReg(ctxt, REG::REG_RDI, task.ts.app());
if (task.softTs) spin::setReg(ctxt, REG::REG_RDI, task.softTs);
else spin::setReg(ctxt, REG::REG_RDI, task.ts.app());

const uint32_t numArgs = task.args.size();
constexpr REG regs[] = {REG::REG_RSI, REG::REG_RDX, REG::REG_RCX,
Expand Down Expand Up @@ -875,7 +876,7 @@ spin::ThreadId HandleEnqueueMagicOp(const uint64_t op,
const bool requeuer = op & EnqFlags::REQUEUER;
const bool maySpec = op & EnqFlags::MAYSPEC;
const bool cantSpec = op & EnqFlags::CANTSPEC;
const bool isSoftPrio = op & EnqFlags::ISSOFTPRIO;
const bool isSoftPrio = op & EnqFlags::ISSOFTPRIO || ossinfo->relaxed;
const bool runOnAbort = op & EnqFlags::RUNONABORT;
const bool noTimestamp = op & EnqFlags::NOTIMESTAMP || runOnAbort;
const bool nonSerialHint = op & EnqFlags::NONSERIALHINT;
Expand Down Expand Up @@ -1016,13 +1017,14 @@ spin::ThreadId HandleEnqueueMagicOp(const uint64_t op,
assert(curThread->tid == parent->runningTid);
assert(curThread->state != BLOCKED);
curThread->core->finishTask(curThread->tid);
if (ossinfo->relaxed) parent->softTs = tsApp;
GetCurRob().yieldTask(parent,
// Advance the requeuer's timestamp if its next minimum
// child is timestamped, otherwise be conservative and reuse
// the requeuer's old timestamp.
// N.B. if an ordinary requeuer is yielding while enqueuing
// a frame requeuer, we are really using the tsApp value.
noTimestamp ? parent->ts.app() : tsApp);
noTimestamp || isSoftPrio ? parent->ts.app() : tsApp);
curThread->task = nullptr;

assert(curThread->rspCheckpoint);
Expand Down
2 changes: 2 additions & 0 deletions sim/sim.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,8 @@ struct GlobSimInfo {
std::vector<ROB*> robs;
std::vector<TSB*> tsbs;
std::vector<ThreadThrottlerUnit*> throttlers;

bool relaxed;
};

extern const GlobSimInfo* ossinfo;
Expand Down
2 changes: 1 addition & 1 deletion sim/task.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ class Task : public std::enable_shared_from_this<Task> {
// Unordered tasks indicated as using soft priority will take on the programmer-
// specified timestamp as their soft timestamp, meaning that the tasks will be
// dequeued in the soft timestamp order.
const uint64_t softTs;
uint64_t softTs;

// Must this task be executed speculatively? Can it be run speculatively,
// but also non-speculatively when its parent commits and assuming perfect
Expand Down
42 changes: 28 additions & 14 deletions sim/virt/virt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
/* This file was adapted from zsim. */

#include <syscall.h>
#include <errno.h>
#include "sim/log.h"
#include "sim/sim.h"
#include "sim/virt/virt.h"
Expand Down Expand Up @@ -57,26 +58,38 @@ bool syscallEnter(spin::ThreadId tid, spin::ThreadContext* ctxt) {
uint64_t syscall = spin::getReg(ctxt, REG_RAX);
DEBUG("[%d] syscall %ld", tid, syscall);

// glibc version 2.28+, if built with GCC's -fcf-protection, will have
// init_cpu_features() (which runs early on during the execution of any
// process) attempt to call the nonexisting ARCH_CET_STATUS (0x3001)
// subfunction of arch_prctl. See:
// https://sourceware.org/git/?p=glibc.git;a=commit;h=394df3815e8ceec750fd06583eee4896174ce808
// This became the default in Ubuntu 19.10+. See:
// https://wiki.ubuntu.com/ToolChain/CompilerFlags#A-fcf-protection
// Pin v2.14 crashes when it sees this unexpected arch_prctl subfunction.
// Avoid the crash by just pretending to execute the syscall instruction
// while skipping over it.
if (syscall == SYS_arch_prctl && spin::getReg(ctxt, REG_RDI) == 0x3001) {
spin::setReg(ctxt, REG::REG_RIP,
spin::getReg(ctxt, REG::REG_RIP) +
2/*bytes in fast system call instruction*/);
// glibc version 2.28+, if built with GCC's -fcf-protection, will have
// init_cpu_features() (which runs early on during the execution of any
// process) attempt to call the nonexisting ARCH_CET_STATUS (0x3001)
// subfunction of arch_prctl. See:
// https://sourceware.org/git/?p=glibc.git;a=commit;h=394df3815e8ceec750fd06583eee4896174ce808
// This became the default in Ubuntu 19.10+. See:
// https://wiki.ubuntu.com/ToolChain/CompilerFlags#A-fcf-protection
// Pin v2.14 crashes when it sees this unexpected arch_prctl subfunction.
// Avoid the crash by just pretending to execute the syscall instruction
// while skipping over it.
if (syscall == SYS_arch_prctl && spin::getReg(ctxt, REG_RDI) == 0x3001) {
DEBUG("[%d] ignoring prtcl", tid);
spin::setReg(ctxt, REG::REG_RIP,
spin::getReg(ctxt, REG::REG_RIP) +
2/*bytes in fast system call instruction*/);
spin::setReg(ctxt, REG::REG_RAX,
-1UL/*indicates failure of syscall, as glibc expects*/);
return false;
}

//clone3 syscall used by glibc in ubuntu 22.04 when spawning threads fails
//here, but will fallback to clone if errno is ENOSYS
//
//So pretend to fail with this errno, similar to above
if (syscall == SYS_clone3) {
spin::setReg(ctxt, REG_RAX, -ENOSYS);
spin::setReg(ctxt, REG_RIP, spin::getReg(ctxt, REG_RIP) + 2);
return false;
}

if (!IsInFastForward()) {
DEBUG("[%d] non-ff syscall", tid);
// Perform reads/writes to syscall input/output data to reflect its memory
// behavior. This avoids conflicts on syscall data.
if (syscall == SYS_read) {
Expand Down Expand Up @@ -117,6 +130,7 @@ bool syscallEnter(spin::ThreadId tid, spin::ThreadContext* ctxt) {
default: break;
}
if (keepThreadCaptured) syncSyscallTid = tid;
DEBUG("[%d] returning %d", tid, !keepThreadCaptured);
return !keepThreadCaptured;
}

Expand Down