WIP permissive

prismmodelchecker · Aug 27, 2021 · 5a95348 · 5a95348
1 parent 8e42c26
commit 5a95348
Show file tree

Hide file tree

Showing 2 changed files with 353 additions and 0 deletions.
diff --git a/prism/src/explicit/MDPModelChecker.java b/prism/src/explicit/MDPModelChecker.java
@@ -746,6 +746,146 @@ public BitSet prob1(MDPGeneric<?> mdp, BitSet remain, BitSet target, boolean min
 		return u;
 	}
 
+	/**
+	 * Zero total rewards precomputation algorithm.
+	 * i.e. determine the states of an MDP from which the min/max expected total
+	 * cumulative rewards is zero. 
+	 * TODO
+	 * {@code min}=true gives Prob1A, {@code min}=false gives Prob1E. 
+	 * Optionally, for max only, store optimal (memoryless) strategy info for 1 states. 
+	 * @param mdp The MDP
+	 * @param mdpRewards The rewards
+	 * @param min Min or max rewards (true=min, false=max)
+	 * @param strat Storage for (memoryless) strategy choice indices (ignored if null)
+	 */
+	public BitSet rew0(MDPGeneric<?> mdp, MDPRewards mdpRewards, boolean min, int strat[])
+	{
+		int n, iters;
+		BitSet v, soln, unknown;
+		boolean v_done;
+		long timer;
+
+		// Start precomputation
+		timer = System.currentTimeMillis();
+		if (!silentPrecomputations)
+			mainLog.println("Starting Rew0??? (" + (min ? "min" : "max") + ")...");
+
+		// Initialise vectors
+		n = mdp.getNumStates();
+//		u = new BitSet(n);
+		v = new BitSet(n);
+		soln = new BitSet(n);
+
+		// Find states with positive state reward
+		BitSet srPos = new BitSet(n);
+		for (int s = 0; s < n; s++) {
+			if (mdpRewards.getStateReward(s) > 0) {
+				srPos.set(s);
+			}
+		}
+
+		// Determine set of states actually need to perform computation for
+//		unknown = new BitSet();
+//		unknown.set(0, n);
+//		unknown.andNot(target);
+//		if (remain != null)
+//			unknown.and(remain);
+
+		// Fixed point loop
+		iters = 0;
+		v_done = false;
+		// Least fixed point - should start from 0 but we optimise by
+		// starting from 'target', thus bypassing first iteration
+		v.clear();
+		v.or(srPos);
+		soln.clear();
+		soln.or(srPos);
+		while (!v_done) {
+			iters++;
+			// Single step of Prob1
+			if (min) {
+				throw new UnsupportedOperationException();
+			} else {
+				rew0Astep(mdp, mdpRewards, v, soln);
+			}
+			// Check termination (inner)
+			v_done = soln.equals(v);
+			// v = soln
+			v.clear();
+			v.or(soln);
+		}
+
+		// Negate
+		v.flip(0, n);
+
+		// If we need to generate a strategy, do another iteration of the inner loop for this
+		// We could do this during the main double fixed point above, but we would generate surplus
+		// strategy info for non-1 states during early iterations of the outer loop,
+		// which are not straightforward to remove since this method does not know which states
+		// already have valid strategy info from Prob0.
+		// Notice that we only need to look at states in u (since we already know the answer),
+		// so we restrict 'unknown' further 
+//		unknown.and(u);
+//		if (!min && strat != null) {
+//			v_done = false;
+//			v.clear();
+//			v.or(target);
+//			soln.clear();
+//			soln.or(target);
+//			while (!v_done) {
+//				mdp.prob1Estep(unknown, u, v, soln, strat);
+//				v_done = soln.equals(v);
+//				v.clear();
+//				v.or(soln);
+//			}
+//			u_done = v.equals(u);
+//		}
+
+		// Finished precomputation
+		timer = System.currentTimeMillis() - timer;
+		if (!silentPrecomputations) {
+			mainLog.print("Prob1 (" + (min ? "min" : "max") + ")");
+			mainLog.println(" took " + iters + " iterations and " + timer / 1000.0 + " seconds.");
+		}
+
+		return v;
+	}
+
+	// one step of XXXX i.e. exp tot rew is >0 forall
+	// TODO could add forall (and remove A), and u not v? 
+	private void rew0Astep(MDPGeneric<?> mdp, MDPRewards mdpRewards, BitSet v, BitSet result)
+	{
+		//for (OfInt it = new IterableStateSet(subset, getNumStates()).iterator(); it.hasNext();) {
+		int n = mdp.getNumStates();
+		for (int s = 0; s < n; s++) {
+			//final int s = it.nextInt();
+			if (mdpRewards.getStateReward(s) > 0) {
+				result.set(s);
+				continue;
+			}
+//			boolean b1 = true;
+			boolean b1 = false;
+			for (int choice = 0, numChoices = mdp.getNumChoices(s); choice < numChoices; choice++) {
+//				if (mdpRewards.getTransitionReward(s, choice) > 0) {
+//					continue;
+//				}
+//				if (!mdp.someSuccessorsInSet(s, choice, v)) {
+//					b1 = false;
+//					break;
+//				}
+				if (mdpRewards.getTransitionReward(s, choice) > 0) {
+					b1 = true;
+					break;
+				}
+				if (mdp.someSuccessorsInSet(s, choice, v)) {
+					b1 = true;
+					break;
+				}
+			}
+			result.set(s, b1);
+		}
+	}
+
 	/**
 	 * Compute reachability probabilities using value iteration.
 	 * Optionally, store optimal (memoryless) strategy info. 
@@ -2979,6 +3119,182 @@ public List<Integer> expReachStrategy(MDP mdp, MDPRewards mdpRewards, int state,
 		return mdp.mvMultRewMinMaxSingleChoices(state, lastSoln, mdpRewards, min, val);
 	}
 
+	public ModelCheckerResult computeMultiStrategy(MDP mdp, MDPRewards mdpRewards, double bound) throws PrismException
+	{
+		boolean min = true;
+
+		// Start solution
+		long timer = System.currentTimeMillis();
+		mainLog.println("Starting linear programming (" + (min ? "min" : "max") + ")...");
+
+		// Store num states
+		int n = mdp.getNumStates();
+		int sInit = mdp.getFirstInitialState();
+
+		BitSet maxRew0 = rew0(mdp, mdpRewards, false, null);
+		System.out.println("maxRew0 = " + maxRew0);
+
+		try {
+			// Initialise LP solver
+			GRBEnv env = new GRBEnv("gurobi.log");
+			env.set(GRB.IntParam.OutputFlag, 0);
+			GRBModel model = new GRBModel(env);
+
+
+			GRBVar xVars[] = new GRBVar[n];
+			for (int s = 0; s < n; s++) {
+				xVars[s] = model.addVar(0.0, maxRew0.get(s) ? 0.0 : GRB.INFINITY, 0.0, GRB.CONTINUOUS, "x" + s);
+			}
+//			int nc = mdp.getNumChoices();
+			GRBVar yaVars[][] = new GRBVar[n][];
+			for (int s = 0; s < n; s++) {
+				int nc = mdp.getNumChoices(s);
+				yaVars[s] = new GRBVar[nc];
+				for (int j = 0; j < nc; j++) {
+					yaVars[s][j] = model.addVar(0.0, 1.0, 0.0, GRB.BINARY, "y" + s + "_" + j + mdp.getAction(s, j));
+				}
+			}
+
+			double scale = 100.0;
+
+			// obj function
+
+			GRBLinExpr exprObj = new GRBLinExpr();
+			exprObj.addTerm(-1.0, xVars[sInit]);
+			//double rhs = 0.0;
+			for (int s = 0; s < n; s++) {
+				int nc = mdp.getNumChoices(s);
+				for (int j = 0; j < nc; j++) {
+					double phi_s_j = 1.0 * scale;
+					exprObj.addTerm(-phi_s_j, yaVars[s][j]);
+					//rhs -= phi_s_j;
+				}
+			}
+			model.setObjective(exprObj, GRB.MINIMIZE);
+
+			int counter = 0;
+
+			double b = bound;
+			GRBLinExpr expr = new GRBLinExpr();
+			expr.addTerm(1.0, xVars[sInit]);
+			model.addConstr(expr, GRB.GREATER_EQUAL, b, "c" + counter++);
+
+			for (int s = 0; s < n; s++) {
+				expr = new GRBLinExpr();
+				int nc = mdp.getNumChoices(s);
+				for (int j = 0; j < nc; j++) {
+					expr.addTerm(1.0, yaVars[s][j]);
+				}
+				model.addConstr(expr, GRB.GREATER_EQUAL, 1.0, "c" + counter++);
+			}
+
+
+			// Set up arrays for passing LP to solver
+			double row[] = new double[n + 1];
+			int colno[] = new int[n + 1];
+
+			// Add constraints
+			for (int s = 0; s < n; s++) {
+
+//				solver.setBounds(s + 1, 0, 1);
+
+//				if (yes.get(s)) {
+////					row[0] = 1.0;
+////					colno[0] = s + 1;
+//					GRBLinExpr expr = new GRBLinExpr();
+//					expr.addTerm(1.0, xVars[s]);
+//					model.addConstr(expr, GRB.EQUAL, 1.0, "c" + counter++);
+//				} else if (no.get(s)) {
+////					row[0] = 1.0;
+////					colno[0] = s + 1;
+////					solver.addConstraintex(1, row, colno, LpSolve.EQ, 0.0);
+//					GRBLinExpr expr = new GRBLinExpr();
+//					expr.addTerm(1.0, xVars[s]);
+//					model.addConstr(expr, GRB.EQUAL, 0.0, "c" + counter++);
+//				} else {
+
+					int numChoices = mdp.getNumChoices(s);
+					for (int i = 0; i < numChoices; i++) {
+						int count = 0;
+						row[count] = 1.0;
+						colno[count] = s + 1;
+						count++;
+						Iterator<Map.Entry<Integer, Double>> iter = mdp.getTransitionsIterator(s, i);
+						while (iter.hasNext()) {
+							Map.Entry<Integer, Double> e = iter.next();
+							int t = e.getKey();
+							double p = e.getValue();
+							if (t == s) {
+								row[0] -= p;
+							} else {
+								row[count] = -p;
+								colno[count] = t + 1;
+								count++;
+							}
+						}
+						// TODO state rewards?
+						double r = mdpRewards.getTransitionReward(s, i);
+						expr = new GRBLinExpr();
+						for (int j = 0; j < count; j++) {
+							expr.addTerm(row[j], xVars[colno[j] - 1]);
+						}
+						expr.addTerm(scale, yaVars[s][i]);
+						model.addConstr(expr, GRB.LESS_EQUAL, r + scale, "c" + counter++);
+//						solver.addConstraintex(count, row, colno, min ? LpSolve.LE : LpSolve.GE, 0.0);
+					}
+				}
+//			}
+			// Solve LP
+			model.write("gurobi.lp");
+			model.optimize();
+
+			if (model.get(GRB.IntAttr.Status) == GRB.Status.INFEASIBLE) {
+//				model.computeIIS();
+//				System.out.println("\nThe following constraint(s) " + "cannot be satisfied:");
+//				for (GRBConstr c : model.getConstrs()) {
+//					if (c.get(GRB.IntAttr.IISConstr) == 1) {
+//						System.out.println(c.get(GRB.StringAttr.ConstrName));
+//					}
+//				}
+				throw new PrismException("Error solving LP: " + "infeasible");
+			}
+			if (model.get(GRB.IntAttr.Status) == GRB.Status.UNBOUNDED) {
+				throw new PrismException("Error solving LP: " + "unbounded");
+			}
+			if (model.get(GRB.IntAttr.Status) != GRB.Status.OPTIMAL) {
+				throw new PrismException("Error solving LP: " + "non-optimal " + model.get(GRB.IntAttr.Status));
+			}
+			double[] soln = new double[n];
+			for (int s = 0; s < n; s++) {
+				System.out.println(xVars[s].get(GRB.StringAttr.VarName) + " = "+xVars[s].get(GRB.DoubleAttr.X));
+				soln[s] = xVars[s].get(GRB.DoubleAttr.X);
+			}
+			for (int s = 0; s < n; s++) {
+				int numChoices = mdp.getNumChoices(s);
+				for (int i = 0; i < numChoices; i++) {
+					System.out.println(yaVars[s][i].get(GRB.StringAttr.VarName) + " = " + yaVars[s][i].get(GRB.DoubleAttr.X));
+				}
+			}
+
+//			int lpRes = solver.solve();
+//			if (lpRes == lpsolve.LpSolve.OPTIMAL) {
+//				soln = solver.getPtrVariables();
+//			} else {
+//				throw new PrismException("Error solving LP" + (lpRes == lpsolve.LpSolve.INFEASIBLE ? " (infeasible)" : ""));
+//			}
+			// Clean up
+//			solver.deleteLp();
+			model.dispose();
+			env.dispose();
+		} catch (GRBException e) {
+			throw new PrismException("Error solving LP: " +e.getMessage());
+		}
+
+
+
+		return new ModelCheckerResult();
+	}
+
 	/**
 	 * Restrict a (memoryless) strategy for an MDP, stored as an integer array of choice indices,
 	 * to the states of the MDP that are reachable under that strategy.  

diff --git a/prism/src/explicit/ProbModelChecker.java b/prism/src/explicit/ProbModelChecker.java
@@ -556,6 +556,11 @@ protected StateValues checkExpressionStrategy(Model model, ExpressionStrategy ex
 			coalition = null;
 		}
 
+		if (1 == 1) {
+			// should check it is "exists"
+			return checkExpressionMultiStrategy(model, expr);
+		}
+
 		// For now, just support a single expression (which may encode a Boolean combination of objectives)
 		List<Expression> exprs = expr.getOperands();
 		if (exprs.size() > 1) {
@@ -577,6 +582,38 @@ else if (exprSub instanceof ExpressionReward) {
 		}
 	}
 
+	protected StateValues checkExpressionMultiStrategy(Model model, ExpressionStrategy expr) throws PrismException
+	{
+		// Only support R[C] currently
+		Expression exprSub = expr.getOperands().get(0);
+		if (!(exprSub instanceof ExpressionReward)) {
+			throw new PrismException("Multi-strategy synthesis only supports R[C] properties currently");
+		}
+		ExpressionReward exprRew = (ExpressionReward) exprSub;
+		if (!(exprRew.getExpression() instanceof ExpressionTemporal)) {
+			throw new PrismException("Multi-strategy synthesis only supports R[C] properties currently");
+		}
+		ExpressionTemporal exprTemp = (ExpressionTemporal) exprRew.getExpression();
+		if (!(exprTemp.getOperator() == ExpressionTemporal.R_C) && !exprTemp.hasBounds()) {
+			throw new PrismException("Multi-strategy synthesis only supports R[C] properties currently");
+		}
+
+		// Get info from R operator
+		OpRelOpBound opInfo = exprRew.getRelopBoundInfo(constantValues);
+		MinMax minMax = opInfo.getMinMax(model.getModelType(), false);
+
+		// Build rewards
+		int r = exprRew.getRewardStructIndexByIndexObject(rewardGen, constantValues);
+		mainLog.println("Building reward structure...");
+		Rewards modelRewards = constructRewards(model, r);
+
+
+		ModelCheckerResult res = ((MDPModelChecker) this).computeMultiStrategy((MDP) model, (MDPRewards) modelRewards, opInfo.getBound());
+
+
+		return new StateValues();
+	}
+
 	/**
 	 * Model check a P operator expression and return the values for the statesOfInterest.
  	 * @param statesOfInterest the states of interest, see checkExpression()