Skip to content

Commit

Permalink
WIP permissive
Browse files Browse the repository at this point in the history
  • Loading branch information
davexparker committed Aug 27, 2021
1 parent 8e42c26 commit 5a95348
Show file tree
Hide file tree
Showing 2 changed files with 353 additions and 0 deletions.
316 changes: 316 additions & 0 deletions prism/src/explicit/MDPModelChecker.java
Original file line number Diff line number Diff line change
Expand Up @@ -746,6 +746,146 @@ public BitSet prob1(MDPGeneric<?> mdp, BitSet remain, BitSet target, boolean min
return u;
}

/**
* Zero total rewards precomputation algorithm.
* i.e. determine the states of an MDP from which the min/max expected total
* cumulative rewards is zero.
* TODO
* {@code min}=true gives Prob1A, {@code min}=false gives Prob1E.
* Optionally, for max only, store optimal (memoryless) strategy info for 1 states.
* @param mdp The MDP
* @param mdpRewards The rewards
* @param min Min or max rewards (true=min, false=max)
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
*/
public BitSet rew0(MDPGeneric<?> mdp, MDPRewards mdpRewards, boolean min, int strat[])
{
int n, iters;
BitSet v, soln, unknown;
boolean v_done;
long timer;

// Start precomputation
timer = System.currentTimeMillis();
if (!silentPrecomputations)
mainLog.println("Starting Rew0??? (" + (min ? "min" : "max") + ")...");

// Initialise vectors
n = mdp.getNumStates();
// u = new BitSet(n);
v = new BitSet(n);
soln = new BitSet(n);

// Find states with positive state reward
BitSet srPos = new BitSet(n);
for (int s = 0; s < n; s++) {
if (mdpRewards.getStateReward(s) > 0) {
srPos.set(s);
}
}

// Determine set of states actually need to perform computation for
// unknown = new BitSet();
// unknown.set(0, n);
// unknown.andNot(target);
// if (remain != null)
// unknown.and(remain);

// Fixed point loop
iters = 0;
v_done = false;
// Least fixed point - should start from 0 but we optimise by
// starting from 'target', thus bypassing first iteration
v.clear();
v.or(srPos);
soln.clear();
soln.or(srPos);
while (!v_done) {
iters++;
// Single step of Prob1
if (min) {
throw new UnsupportedOperationException();
} else {
rew0Astep(mdp, mdpRewards, v, soln);
}
// Check termination (inner)
v_done = soln.equals(v);
// v = soln
v.clear();
v.or(soln);
}

// Negate
v.flip(0, n);

// If we need to generate a strategy, do another iteration of the inner loop for this
// We could do this during the main double fixed point above, but we would generate surplus
// strategy info for non-1 states during early iterations of the outer loop,
// which are not straightforward to remove since this method does not know which states
// already have valid strategy info from Prob0.
// Notice that we only need to look at states in u (since we already know the answer),
// so we restrict 'unknown' further
// unknown.and(u);
// if (!min && strat != null) {
// v_done = false;
// v.clear();
// v.or(target);
// soln.clear();
// soln.or(target);
// while (!v_done) {
// mdp.prob1Estep(unknown, u, v, soln, strat);
// v_done = soln.equals(v);
// v.clear();
// v.or(soln);
// }
// u_done = v.equals(u);
// }

// Finished precomputation
timer = System.currentTimeMillis() - timer;
if (!silentPrecomputations) {
mainLog.print("Prob1 (" + (min ? "min" : "max") + ")");
mainLog.println(" took " + iters + " iterations and " + timer / 1000.0 + " seconds.");
}

return v;
}

// one step of XXXX i.e. exp tot rew is >0 forall
// TODO could add forall (and remove A), and u not v?
private void rew0Astep(MDPGeneric<?> mdp, MDPRewards mdpRewards, BitSet v, BitSet result)
{
//for (OfInt it = new IterableStateSet(subset, getNumStates()).iterator(); it.hasNext();) {
int n = mdp.getNumStates();
for (int s = 0; s < n; s++) {
//final int s = it.nextInt();
if (mdpRewards.getStateReward(s) > 0) {
result.set(s);
continue;
}
// boolean b1 = true;
boolean b1 = false;
for (int choice = 0, numChoices = mdp.getNumChoices(s); choice < numChoices; choice++) {
// if (mdpRewards.getTransitionReward(s, choice) > 0) {
// continue;
// }
// if (!mdp.someSuccessorsInSet(s, choice, v)) {
// b1 = false;
// break;
// }
if (mdpRewards.getTransitionReward(s, choice) > 0) {
b1 = true;
break;
}
if (mdp.someSuccessorsInSet(s, choice, v)) {
b1 = true;
break;
}
}
result.set(s, b1);
}
}

/**
* Compute reachability probabilities using value iteration.
* Optionally, store optimal (memoryless) strategy info.
Expand Down Expand Up @@ -2979,6 +3119,182 @@ public List<Integer> expReachStrategy(MDP mdp, MDPRewards mdpRewards, int state,
return mdp.mvMultRewMinMaxSingleChoices(state, lastSoln, mdpRewards, min, val);
}

public ModelCheckerResult computeMultiStrategy(MDP mdp, MDPRewards mdpRewards, double bound) throws PrismException
{
boolean min = true;

// Start solution
long timer = System.currentTimeMillis();
mainLog.println("Starting linear programming (" + (min ? "min" : "max") + ")...");

// Store num states
int n = mdp.getNumStates();
int sInit = mdp.getFirstInitialState();

BitSet maxRew0 = rew0(mdp, mdpRewards, false, null);
System.out.println("maxRew0 = " + maxRew0);

try {
// Initialise LP solver
GRBEnv env = new GRBEnv("gurobi.log");
env.set(GRB.IntParam.OutputFlag, 0);
GRBModel model = new GRBModel(env);


GRBVar xVars[] = new GRBVar[n];
for (int s = 0; s < n; s++) {
xVars[s] = model.addVar(0.0, maxRew0.get(s) ? 0.0 : GRB.INFINITY, 0.0, GRB.CONTINUOUS, "x" + s);
}
// int nc = mdp.getNumChoices();
GRBVar yaVars[][] = new GRBVar[n][];
for (int s = 0; s < n; s++) {
int nc = mdp.getNumChoices(s);
yaVars[s] = new GRBVar[nc];
for (int j = 0; j < nc; j++) {
yaVars[s][j] = model.addVar(0.0, 1.0, 0.0, GRB.BINARY, "y" + s + "_" + j + mdp.getAction(s, j));
}
}

double scale = 100.0;

// obj function

GRBLinExpr exprObj = new GRBLinExpr();
exprObj.addTerm(-1.0, xVars[sInit]);
//double rhs = 0.0;
for (int s = 0; s < n; s++) {
int nc = mdp.getNumChoices(s);
for (int j = 0; j < nc; j++) {
double phi_s_j = 1.0 * scale;
exprObj.addTerm(-phi_s_j, yaVars[s][j]);
//rhs -= phi_s_j;
}
}
model.setObjective(exprObj, GRB.MINIMIZE);

int counter = 0;

double b = bound;
GRBLinExpr expr = new GRBLinExpr();
expr.addTerm(1.0, xVars[sInit]);
model.addConstr(expr, GRB.GREATER_EQUAL, b, "c" + counter++);

for (int s = 0; s < n; s++) {
expr = new GRBLinExpr();
int nc = mdp.getNumChoices(s);
for (int j = 0; j < nc; j++) {
expr.addTerm(1.0, yaVars[s][j]);
}
model.addConstr(expr, GRB.GREATER_EQUAL, 1.0, "c" + counter++);
}


// Set up arrays for passing LP to solver
double row[] = new double[n + 1];
int colno[] = new int[n + 1];

// Add constraints
for (int s = 0; s < n; s++) {

// solver.setBounds(s + 1, 0, 1);

// if (yes.get(s)) {
//// row[0] = 1.0;
//// colno[0] = s + 1;
// GRBLinExpr expr = new GRBLinExpr();
// expr.addTerm(1.0, xVars[s]);
// model.addConstr(expr, GRB.EQUAL, 1.0, "c" + counter++);
// } else if (no.get(s)) {
//// row[0] = 1.0;
//// colno[0] = s + 1;
//// solver.addConstraintex(1, row, colno, LpSolve.EQ, 0.0);
// GRBLinExpr expr = new GRBLinExpr();
// expr.addTerm(1.0, xVars[s]);
// model.addConstr(expr, GRB.EQUAL, 0.0, "c" + counter++);
// } else {

int numChoices = mdp.getNumChoices(s);
for (int i = 0; i < numChoices; i++) {
int count = 0;
row[count] = 1.0;
colno[count] = s + 1;
count++;
Iterator<Map.Entry<Integer, Double>> iter = mdp.getTransitionsIterator(s, i);
while (iter.hasNext()) {
Map.Entry<Integer, Double> e = iter.next();
int t = e.getKey();
double p = e.getValue();
if (t == s) {
row[0] -= p;
} else {
row[count] = -p;
colno[count] = t + 1;
count++;
}
}
// TODO state rewards?
double r = mdpRewards.getTransitionReward(s, i);
expr = new GRBLinExpr();
for (int j = 0; j < count; j++) {
expr.addTerm(row[j], xVars[colno[j] - 1]);
}
expr.addTerm(scale, yaVars[s][i]);
model.addConstr(expr, GRB.LESS_EQUAL, r + scale, "c" + counter++);
// solver.addConstraintex(count, row, colno, min ? LpSolve.LE : LpSolve.GE, 0.0);
}
}
// }
// Solve LP
model.write("gurobi.lp");
model.optimize();

if (model.get(GRB.IntAttr.Status) == GRB.Status.INFEASIBLE) {
// model.computeIIS();
// System.out.println("\nThe following constraint(s) " + "cannot be satisfied:");
// for (GRBConstr c : model.getConstrs()) {
// if (c.get(GRB.IntAttr.IISConstr) == 1) {
// System.out.println(c.get(GRB.StringAttr.ConstrName));
// }
// }
throw new PrismException("Error solving LP: " + "infeasible");
}
if (model.get(GRB.IntAttr.Status) == GRB.Status.UNBOUNDED) {
throw new PrismException("Error solving LP: " + "unbounded");
}
if (model.get(GRB.IntAttr.Status) != GRB.Status.OPTIMAL) {
throw new PrismException("Error solving LP: " + "non-optimal " + model.get(GRB.IntAttr.Status));
}
double[] soln = new double[n];
for (int s = 0; s < n; s++) {
System.out.println(xVars[s].get(GRB.StringAttr.VarName) + " = "+xVars[s].get(GRB.DoubleAttr.X));
soln[s] = xVars[s].get(GRB.DoubleAttr.X);
}
for (int s = 0; s < n; s++) {
int numChoices = mdp.getNumChoices(s);
for (int i = 0; i < numChoices; i++) {
System.out.println(yaVars[s][i].get(GRB.StringAttr.VarName) + " = " + yaVars[s][i].get(GRB.DoubleAttr.X));
}
}

// int lpRes = solver.solve();
// if (lpRes == lpsolve.LpSolve.OPTIMAL) {
// soln = solver.getPtrVariables();
// } else {
// throw new PrismException("Error solving LP" + (lpRes == lpsolve.LpSolve.INFEASIBLE ? " (infeasible)" : ""));
// }
// Clean up
// solver.deleteLp();
model.dispose();
env.dispose();
} catch (GRBException e) {
throw new PrismException("Error solving LP: " +e.getMessage());
}



return new ModelCheckerResult();
}

/**
* Restrict a (memoryless) strategy for an MDP, stored as an integer array of choice indices,
* to the states of the MDP that are reachable under that strategy.
Expand Down
37 changes: 37 additions & 0 deletions prism/src/explicit/ProbModelChecker.java
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,11 @@ protected StateValues checkExpressionStrategy(Model model, ExpressionStrategy ex
coalition = null;
}

if (1 == 1) {
// should check it is "exists"
return checkExpressionMultiStrategy(model, expr);
}

// For now, just support a single expression (which may encode a Boolean combination of objectives)
List<Expression> exprs = expr.getOperands();
if (exprs.size() > 1) {
Expand All @@ -577,6 +582,38 @@ else if (exprSub instanceof ExpressionReward) {
}
}

protected StateValues checkExpressionMultiStrategy(Model model, ExpressionStrategy expr) throws PrismException
{
// Only support R[C] currently
Expression exprSub = expr.getOperands().get(0);
if (!(exprSub instanceof ExpressionReward)) {
throw new PrismException("Multi-strategy synthesis only supports R[C] properties currently");
}
ExpressionReward exprRew = (ExpressionReward) exprSub;
if (!(exprRew.getExpression() instanceof ExpressionTemporal)) {
throw new PrismException("Multi-strategy synthesis only supports R[C] properties currently");
}
ExpressionTemporal exprTemp = (ExpressionTemporal) exprRew.getExpression();
if (!(exprTemp.getOperator() == ExpressionTemporal.R_C) && !exprTemp.hasBounds()) {
throw new PrismException("Multi-strategy synthesis only supports R[C] properties currently");
}

// Get info from R operator
OpRelOpBound opInfo = exprRew.getRelopBoundInfo(constantValues);
MinMax minMax = opInfo.getMinMax(model.getModelType(), false);

// Build rewards
int r = exprRew.getRewardStructIndexByIndexObject(rewardGen, constantValues);
mainLog.println("Building reward structure...");
Rewards modelRewards = constructRewards(model, r);


ModelCheckerResult res = ((MDPModelChecker) this).computeMultiStrategy((MDP) model, (MDPRewards) modelRewards, opInfo.getBound());


return new StateValues();
}

/**
* Model check a P operator expression and return the values for the statesOfInterest.
* @param statesOfInterest the states of interest, see checkExpression()
Expand Down

0 comments on commit 5a95348

Please sign in to comment.