Skip to content

Commit

Permalink
updates to UQ driver
Browse files Browse the repository at this point in the history
  • Loading branch information
Jeremiah Wilke committed Mar 6, 2016
1 parent 1750550 commit 656d935
Show file tree
Hide file tree
Showing 20 changed files with 369 additions and 58 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,7 @@ bin/missing
bin/py-compile
bin/ar-lib
bin/compile

tests/reference/upd
sstmac/common/config.h.in
sstmac/common/config.h.in~
4 changes: 2 additions & 2 deletions configurations/debug.ini
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ congestion_model = simple
amm_model = amm1
accuracy_parameter = 4096
network_bandwidth = 6GB/s
network_hop_latency = 200ns
network_hop_latency = 50ns
injection_bandwidth = 10GB/s
injection_latency = 2us
injection_latency = 0.1us
memory_bandwidth = 10GB/s
memory_latency = 15ns

Expand Down
2 changes: 1 addition & 1 deletion dumpi
Submodule dumpi updated from 1888f3 to 6de6c5
2 changes: 1 addition & 1 deletion sprockit
1 change: 0 additions & 1 deletion sstmac/common/event_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,6 @@ event_manager::register_stat(
if (stat->registered())
return;


stats_entry& entry = stats_[stat->fileroot()];
entry.collectors.push_back(stat);
entry.reduce_all = reduce_all;
Expand Down
119 changes: 112 additions & 7 deletions sstmac/libraries/uq/uq.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,24 +19,129 @@ sstmac_uq_finalize(void* queue)
delete q;
}

template <class T>
void
tmpl_free_values(T** vals)
{
delete[] vals[0];
delete[] vals;
}

template <class T>
T**
tmpl_allocate_values(int nrows, int ncols)
{
T* vals = new T[nrows*ncols];
T** ptrs = new T*[nrows];
T* ptr = vals;
for (int i=0; i < nrows; ++i, ptr += ncols){
ptrs[i] = ptr;
}
return ptrs;
}

double**
allocate_values(int nrows, int ncols)
{
return tmpl_allocate_values<double>(nrows, ncols);
}

double**
allocate_results(int nrows, int ncols)
{
return tmpl_allocate_values<double>(nrows, ncols);
}

uq_param_t**
allocate_params(int nrows, int ncols)
{
return tmpl_allocate_values<uq_param_t>(nrows, ncols);
}

void
free_params(uq_param_t** params)
{
tmpl_free_values<uq_param_t>(params);
}

void
free_results(double** results)
{
tmpl_free_values<double>(results);
}

void
free_values(double** results)
{
tmpl_free_values<double>(results);
}

static void
wait_sims(Simulation** sims, int nsims, double** results, int nresults)
{
for (int i=0; i < nsims; ++i){
sims[i]->wait();
sims[i]->wait();
results[i] = sims[i]->results();
if (sims[i]->numResults() != nresults){
spkt_abort_printf("got wrong number of results: expected %d, got %d",
nresults, sims[i]->numResults());
spkt_abort_printf("got wrong number of results for sim %d: expected %d, got %d",
i, nresults, sims[i]->numResults());
}
delete sims[i];
}
}

static void
set_param(sprockit::sim_parameters& params, const char* name, uq_param_t& p)
{
switch(p.type){
case ByteLength:
params[name].setByteLength(p.value, p.units);
break;
case Bandwidth:
params[name].setBandwidth(p.value, p.units);
break;
case Frequency:
params[name].setFrequency(p.value, p.units);
break;
case Latency:
case Time:
params[name].setTime(p.value, p.units);
break;
case String:
params[name].set(p.cstr);
break;
case ValueWithUnits:
params[name].setValue(p.value, p.units);
break;
default:
spkt_abort_printf("invalid paramter type - make sure param.type is initialized");
}
}

extern "C" void
sstmac_uq_run(void* queue,
sstmac_uq_run_units(void* queue,
int njobs, int nparams, int nresults, int max_nthread,
const char* param_names[], double* param_values[],
const char* param_units[],
double* results[])
{
uq_param_t** params = allocate_params(njobs, nparams);
for (int j=0; j < njobs; ++j){
for (int p=0; p < nparams; ++p){
params[j][p].value = param_values[j][p];
params[j][p].type = ValueWithUnits;
params[j][p].units = param_units[p];
}
}
sstmac_uq_run(queue, njobs, nparams, nresults, max_nthread,
param_names, params, results);
free_params(params);
}

extern "C" void
sstmac_uq_run(void* queue,
int njobs, int nparams, int nresults, int max_nthread,
const char* param_names[], uq_param_t* param_values[],
double* results[])
{
SimulationQueue* q = (SimulationQueue*) queue;
Expand All @@ -45,15 +150,15 @@ sstmac_uq_run(void* queue,

int num_running = 0;

for (int j=0; j < njobs; ++j, ++num_running){
for (int j=0; j < njobs; ++j){
if (num_running == max_nthread){
int result_offset = j - max_nthread;
wait_sims(sims, max_nthread, results+result_offset, nresults);
num_running = 0;
}
double* param_vals = param_values[j];
uq_param_t* param_vals = param_values[j];
for (int param=0; param < nparams; ++param){
params[param_names[param]] = param_vals[param];
set_param(params, param_names[param], param_vals[param]);
}
sims[num_running++] = q->fork(params);
}
Expand Down
88 changes: 87 additions & 1 deletion sstmac/libraries/uq/uq.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,99 @@
#ifdef __cplusplus
extern "C" {
#endif

typedef enum {
ByteLength,
Bandwidth,
Latency,
Time,
Frequency,
String,
ValueWithUnits
} uq_param_type_t;

typedef struct
{
double value;
const char* units;
const char* cstr;
uq_param_type_t type;
} uq_param_t;

/**
Allocate a 2D double array
@param njobs
@param nresults
*/
double** allocate_results(int njobs, int nresults);

/**
Allocate a 2D double array
@param njobs
@param nparams
*/
double** allocate_values(int njobs, int nparams);

/**
Allocate a 2D array of param structs
@param njobs
@param nparams
*/
uq_param_t** allocate_params(int njobs, int nparams);

/**
Free a 2D array allocated by allocate_results function
@param results
*/
void free_results(double** results);

/**
Free a 2D array allocated by allocate_values function
@param values
*/
void free_values(double** values);

/**
Free a 2D array allocated by allocate_params function
@param params
*/
void free_params(uq_param_t** params);

/**
@param argc The argc that would be used by a standalone SST/macro simulation
@param argv The argv that would be used by a standalone SST/macro simulation
@return A void* pointer to the simulation queue object. This pointer
should NOT be freed. Value is void* for C compatibility.
*/
void* sstmac_uq_init(int argc, char** argv);

/**
Run a set of jobs with particular parameters
@param queue A pointer to a queue object created by sstmac_uq_init
@param njobs The number of jobs (simulations) to run
@param nparams The number of parameters to set for each job
@param nresults The number of results returned by each job
@param max_nthread The maximum number of threads or, i.e.
the max number of jobs that can run simultaneously
@param param_names An array of size nparams. The name of each parameter
to configure for each job
@param param_values A 2D array of size njobs X nparams
The value corresponding to each paramter for all jobs
Indexed as p[jobID][paramID]
@param results A 2D array of size njobs X nresults
Will hold the result values for each job
Indexed as p[jobID][resultID]
*/
void sstmac_uq_run(void* queue,
int njobs, int nparams, int nresults, int max_nthread,
const char* param_names[], double* param_values[],
const char* param_names[], uq_param_t* param_values[],
double* results[]);

/**
@param queue A pointer to a queue object created by sstmac_uq_init
After finalize, the queue is no longer usable and all
resources used by the queue are freed
*/
void sstmac_uq_finalize(void* queue);
#ifdef __cplusplus
}
Expand Down
56 changes: 41 additions & 15 deletions sstmac/main/driver.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,15 @@
#define READ 0
#define WRITE 1

MakeDebugSlot(driver)

namespace sstmac {

double* SimulationQueue::results_ = 0;
int SimulationQueue::num_results_ = 0;

#define driver_debug(...) \
debug_printf(sprockit::dbg::driver, __VA_ARGS__)

void
Simulation::setParameters(sprockit::sim_parameters *params)
Expand All @@ -28,8 +32,42 @@ Simulation::setParameters(sprockit::sim_parameters *params)
void
Simulation::wait()
{
if (complete_)
return;

int status;
driver_debug("wait on pid=%d", pid_);
pid_t result = waitpid(pid_, &status, 0);
finalize();
}

void
Simulation::finalize()
{
sim_stats stats;
int bytes = read(readPipe(), &stats, sizeof(sim_stats));
if (bytes <= 0){
spkt_throw(sprockit::value_error,
"failed reading pipe from simulation");
}
if (stats.numResults){
double* results = new double[stats.numResults];
bytes = read(readPipe(), results, stats.numResults*sizeof(double));
setResults(results, stats.numResults);
driver_debug("finalize nresults=%d", num_results_);
}
close(readPipe());
setSimulatedTime(stats.simulatedTime);
setWallTime(stats.wallTime);

complete_ = true;
}

void
SimulationQueue::publishResults(double* results, int nresults)
{
results_ = results;
num_results_ = nresults;
}

void
Expand Down Expand Up @@ -74,13 +112,14 @@ SimulationQueue::fork(sprockit::sim_parameters* params)
close(pfd[READ]);
write(pfd[WRITE], &stats, sizeof(sim_stats));
if (results_)
write(pfd[WRITE], results_, num_results_);
write(pfd[WRITE], results_, num_results_*sizeof(double));
close(pfd[WRITE]);
exit(0);
return 0;
} else {
close(pfd[WRITE]);
Simulation* sim = new Simulation;
driver_debug("forked process %d", pid);
sim->setPid(pid);
sim->setParameters(&template_params_);
sim->setPipe(pfd);
Expand All @@ -99,21 +138,8 @@ SimulationQueue::waitForCompleted()
int status;
pid_t result = waitpid(sim->pid(), &status, WNOHANG);
if (result > 0){
sim_stats stats;
int bytes = read(sim->readPipe(), &stats, sizeof(sim_stats));
if (bytes <= 0){
spkt_throw(sprockit::value_error,
"failed reading pipe from simulation");
}
if (stats.numResults){
double* results = new double[stats.numResults];
bytes = read(sim->readPipe(), results, stats.numResults*sizeof(double));
sim->setResults(results, stats.numResults);
}
close(sim->readPipe());
sim->setSimulatedTime(stats.simulatedTime);
sim->setWallTime(stats.wallTime);
pending_.erase(it);
sim->finalize();
return sim;
}
}
Expand Down
Loading

0 comments on commit 656d935

Please sign in to comment.