diff --git a/docs/_static/css/custom.css b/docs/_static/css/custom.css new file mode 100644 index 0000000..7c0a218 --- /dev/null +++ b/docs/_static/css/custom.css @@ -0,0 +1,4 @@ +.vis-network { + width: 46em; + border: 1px solid lightgray; +} diff --git a/docs/conf.py b/docs/conf.py index c7d030a..eae2519 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -32,3 +32,4 @@ html_theme = "furo" html_static_path = ["_static"] +html_css_files = ["css/custom.css"] diff --git a/docs/getting_started/die.ipynb b/docs/getting_started/00_die.ipynb similarity index 99% rename from docs/getting_started/die.ipynb rename to docs/getting_started/00_die.ipynb index 828d37d..55bf8bf 100644 --- a/docs/getting_started/die.ipynb +++ b/docs/getting_started/00_die.ipynb @@ -39,7 +39,7 @@ "text/html": [ "\n", " fetch('http://127.0.0.1:8892/ifNagYWiMW/MESSAGE/' + 'test message')" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "text/html": [ - "\n", - " " + "" ], "text/plain": [ "" @@ -411,11 +198,59 @@ }, "metadata": {}, "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "fdbfe0ff407a4a308031b5e7e783cf40", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "78d798387dbc4a6b8896a4d6125884ee", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(Output(), Output()))" + ] + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ "vis = show(mdp, layout=Layout(\"layouts/pinkgreen.json\"), name=\"study\", save_and_embed=True)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "260bfa16-9971-4ee9-9744-0478ee31dbe4", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -434,7 +269,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.3" + "version": "3.12.6" }, "widgets": { "application/vnd.jupyter.widget-state+json": { diff --git a/docs/getting_started/naive_value_iteration.ipynb b/docs/getting_started/02_naive_value_iteration.ipynb similarity index 99% rename from docs/getting_started/naive_value_iteration.ipynb rename to docs/getting_started/02_naive_value_iteration.ipynb index 7e8f0da..316705a 100644 --- a/docs/getting_started/naive_value_iteration.ipynb +++ b/docs/getting_started/02_naive_value_iteration.ipynb @@ -11,7 +11,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "7f3692d1-ad64-449d-809c-01ceb84e6bb4", "metadata": {}, "outputs": [ @@ -20,7 +20,7 @@ "text/html": [ "\n", " " + "" ] }, "execution_count": 2, @@ -174,12 +174,12 @@ "Transitions: \t243\n", "Reward Models: (default)\n", "State Labels: \t8 labels\n", + " * deadlock -> 0 item(s)\n", " * init -> 1 item(s)\n", " * whichx -> 42 item(s)\n", + " * target -> 1 item(s)\n", " * selection -> 39 item(s)\n", " * nextnand -> 42 item(s)\n", - " * deadlock -> 0 item(s)\n", - " * target -> 1 item(s)\n", " * gate -> 52 item(s)\n", " * end -> 3 item(s)\n", "Choice Labels: \tnone\n", @@ -246,50 +246,12 @@ "id": "59196c3e-aa2d-4fa6-abc7-b04bf3b5f1f4", "metadata": {}, "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "fcad0cee47184961b79ad066728c85b7", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, { "data": { "text/html": [ "\n", " " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%prism leader\n", + "// asynchronous leader election\n", + "// 4 processes\n", + "// gxn/dxp 29/01/01\n", + "\n", + "mdp\n", + "\n", + "const int N = 3; // number of processes\n", + "\n", + "//----------------------------------------------------------------------------------------------------------------------------\n", + "module process1\n", + "\t\n", + "\t// COUNTER\n", + "\tc1 : [0..3-1];\n", + "\t\n", + "\t// STATES\n", + "\ts1 : [0..4];\n", + "\t// 0 make choice\n", + "\t// 1 have not received neighbours choice\n", + "\t// 2 active\n", + "\t// 3 inactive\n", + "\t// 4 leader\n", + "\t\n", + "\t// PREFERENCE\n", + "\tp1 : [0..1];\n", + "\t\n", + "\t// VARIABLES FOR SENDING AND RECEIVING\n", + "\treceive1 : [0..2];\n", + "\t// not received anything\n", + "\t// received choice\n", + "\t// received counter\n", + "\tsent1 : [0..2];\n", + "\t// not send anything\n", + "\t// sent choice\n", + "\t// sent counter\n", + "\t\n", + "\t// pick value\n", + "\t[pick] (s1=0) -> 0.5 : (s1'=1) & (p1'=0) + 0.5 : (s1'=1) & (p1'=1);\n", + "\n", + "\t// send preference\n", + "\t[p12] (s1=1) & (sent1=0) -> (sent1'=1);\n", + "\t// receive preference\n", + "\t// stay active\n", + "\t[p31] (s1=1) & (receive1=0) & !( (p1=0) & (p3=1) ) -> (s1'=2) & (receive1'=1);\n", + "\t// become inactive\n", + "\t[p31] (s1=1) & (receive1=0) & (p1=0) & (p3=1) -> (s1'=3) & (receive1'=1);\n", + "\t\n", + "\t// send preference (can now reset preference)\n", + "\t[p12] (s1=2) & (sent1=0) -> (sent1'=1) & (p1'=0);\n", + "\t// send counter (already sent preference)\n", + "\t// not received counter yet\n", + "\t[c12] (s1=2) & (sent1=1) & (receive1=1) -> (sent1'=2);\n", + "\t// received counter (pick again)\n", + "\t[c12] (s1=2) & (sent1=1) & (receive1=2) -> (s1'=0) & (p1'=0) & (c1'=0) & (sent1'=0) & (receive1'=0);\n", + "\t\n", + "\t// receive counter and not sent yet (note in this case do not pass it on as will send own counter)\n", + "\t[c31] (s1=2) & (receive1=1) & (sent1<2) -> (receive1'=2);\n", + "\t// receive counter and sent counter\n", + "\t// only active process (decide)\n", + "\t[c31] (s1=2) & (receive1=1) & (sent1=2) & (c3=N-1) -> (s1'=4) & (p1'=0) & (c1'=0) & (sent1'=0) & (receive1'=0);\n", + "\t// other active process (pick again)\n", + "\t[c31] (s1=2) & (receive1=1) & (sent1=2) & (c3 (s1'=0) & (p1'=0) & (c1'=0) & (sent1'=0) & (receive1'=0);\n", + "\t\n", + "\t// send preference (must have received preference) and can now reset\n", + "\t[p12] (s1=3) & (receive1>0) & (sent1=0) -> (sent1'=1) & (p1'=0);\n", + "\t// send counter (must have received counter first) and can now reset\n", + "\t[c12] (s1=3) & (receive1=2) & (sent1=1) -> (s1'=3) & (p1'=0) & (c1'=0) & (sent1'=0) & (receive1'=0);\n", + "\t\n", + "\t// receive preference\n", + "\t[p31] (s1=3) & (receive1=0) -> (p1'=p3) & (receive1'=1);\n", + "\t// receive counter\n", + "\t[c31] (s1=3) & (receive1=1) & (c3 (c1'=c3+1) & (receive1'=2);\n", + "\t\t\n", + "\t// done\n", + "\t[done] (s1=4) -> (s1'=s1);\n", + "\t// add loop for processes who are inactive\n", + "\t[done] (s1=3) -> (s1'=s1);\n", + "\n", + "endmodule\n", + "\n", + "//----------------------------------------------------------------------------------------------------------------------------\n", + "\n", + "// construct further stations through renaming\n", + "module process2=process1[s1=s2,p1=p2,c1=c2,sent1=sent2,receive1=receive2,p12=p23,p31=p12,c12=c23,c31=c12,p3=p1,c3=c1] endmodule\n", + "module process3=process1[s1=s3,p1=p3,c1=c3,sent1=sent3,receive1=receive3,p12=p31,p31=p23,c12=c31,c31=c23,p3=p2,c3=c2] endmodule\n", + "\n", + "//----------------------------------------------------------------------------------------------------------------------------\n", + "rewards \"rounds\"\n", + " [c12] true : 1;\n", + "endrewards\n", + "\n", + "//----------------------------------------------------------------------------------------------------------------------------\n", + "formula leaders = (s1=4?1:0)+(s2=4?1:0)+(s3=4?1:0);\n", + "label \"elected\" = s1=4|s2=4|s3=4;\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "leader_model = stormpy.build_model(leader)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-------------------------------------------------------------- \n", + "Model type: \tMDP (sparse)\n", + "States: \t190\n", + "Transitions: \t323\n", + "Choices: \t316\n", + "Reward Models: rounds\n", + "State Labels: \t3 labels\n", + " * deadlock -> 3 item(s)\n", + " * init -> 1 item(s)\n", + " * elected -> 0 item(s)\n", + "Choice Labels: \tnone\n", + "-------------------------------------------------------------- \n", + "\n", + "['__class__', '__delattr__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '_as_sparse_ctmc', '_as_sparse_dtmc', '_as_sparse_exact_dtmc', '_as_sparse_exact_mdp', '_as_sparse_imdp', '_as_sparse_ma', '_as_sparse_mdp', '_as_sparse_pctmc', '_as_sparse_pdtmc', '_as_sparse_pma', '_as_sparse_pmdp', '_as_sparse_pomdp', '_as_sparse_ppomdp', '_as_sparse_smg', '_as_symbolic_ctmc', '_as_symbolic_dtmc', '_as_symbolic_ma', '_as_symbolic_mdp', '_as_symbolic_pctmc', '_as_symbolic_pdtmc', '_as_symbolic_pma', '_as_symbolic_pmdp', 'add_reward_model', 'apply_scheduler', 'backward_transition_matrix', 'choice_labeling', 'choice_origins', 'get_choice_index', 'get_nr_available_actions', 'get_reward_model', 'has_choice_labeling', 'has_choice_origins', 'has_parameters', 'has_reward_model', 'has_state_valuations', 'initial_states', 'initial_states_as_bitvector', 'is_discrete_time_model', 'is_exact', 'is_nondeterministic_model', 'is_partially_observable', 'is_sink_state', 'is_sparse_model', 'is_symbolic_model', 'labeling', 'labels_state', 'model_type', 'nondeterministic_choice_indices', 'nr_choices', 'nr_states', 'nr_transitions', 'reduce_to_state_based_rewards', 'reward_models', 'set_initial_states', 'state_valuations', 'states', 'supports_parameters', 'supports_uncertainty', 'to_dot', 'transition_matrix']\n" + ] + } + ], + "source": [ + "print(leader_model)\n", + "\n", + "print(dir(leader_model))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "prop = stormpy.parse_properties(\"Rmax=? [F \\\"elected\\\"]\", leader)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "stormpy_result = stormpy.model_checking(leader_model, prop[0], extract_scheduler=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf]\n" + ] + } + ], + "source": [ + "print(stormpy_result.get_values())" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-------------------------------------------------------------- \n", + "Model type: \tDTMC (sparse)\n", + "States: \t49\n", + "Transitions: \t56\n", + "Reward Models: rounds\n", + "State Labels: \t3 labels\n", + " * deadlock -> 3 item(s)\n", + " * init -> 1 item(s)\n", + " * elected -> 0 item(s)\n", + "Choice Labels: \tnone\n", + "-------------------------------------------------------------- \n", + "\n" + ] + } + ], + "source": [ + "scheduler = stormpy_result.scheduler\n", + "\n", + "print(leader_model.apply_scheduler(scheduler))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 -> 0, 1 -> 0, 2 -> 0, 3 -> 0, 4 -> 0, 5 -> 0, 6 -> 0, 7 -> 0, 8 -> 0, 9 -> 0, 10 -> 0, 11 -> 0, 12 -> 0, 13 -> 0, 14 -> 0, 15 -> 0, 16 -> 0, 17 -> 0, 18 -> 0, 19 -> 0, 20 -> 0, 21 -> 0, 22 -> 0, 23 -> 0, 24 -> 0, 25 -> 0, 26 -> 0, 27 -> 0, 28 -> 0, 29 -> 0, 30 -> 0, 31 -> 0, 32 -> 0, 33 -> 0, 34 -> 0, 35 -> 0, 36 -> 0, 37 -> 0, 38 -> 0, 39 -> 0, 40 -> 0, 41 -> 0, 42 -> 0, 43 -> 0, 44 -> 0, 45 -> 0, 46 -> 0, 47 -> 0, 48 -> 0, 49 -> 0, 50 -> 0, 51 -> 0, 52 -> 0, 53 -> 0, 54 -> 0, 55 -> 0, 56 -> 0, 57 -> 0, 58 -> 0, 59 -> 0, 60 -> 0, 61 -> 0, 62 -> 0, 63 -> 0, 64 -> 0, 65 -> 0, 66 -> 0, 67 -> 0, 68 -> 0, 69 -> 0, 70 -> 0, 71 -> 0, 72 -> 0, 73 -> 0, 74 -> 0, 75 -> 0, 76 -> 0, 77 -> 0, 78 -> 0, 79 -> 0, 80 -> 0, 81 -> 0, 82 -> 0, 83 -> 0, 84 -> 0, 85 -> 0, 86 -> 0, 87 -> 0, 88 -> 0, 89 -> 0, 90 -> 0, 91 -> 0, 92 -> 0, 93 -> 0, 94 -> 0, 95 -> 0, 96 -> 0, 97 -> 0, 98 -> 0, 99 -> 0, 100 -> 0, 101 -> 0, 102 -> 0, 103 -> 0, 104 -> 0, 105 -> 0, 106 -> 0, 107 -> 0, 108 -> 0, 109 -> 0, 110 -> 0, 111 -> 0, 112 -> 0, 113 -> 0, 114 -> 0, 115 -> 0, 116 -> 0, 117 -> 0, 118 -> 0, 119 -> 0, 120 -> 0, 121 -> 0, 122 -> 0, 123 -> 0, 124 -> 0, 125 -> 0, 126 -> 0, 127 -> 0, 128 -> 0, 129 -> 0, 130 -> 0, 131 -> 0, 132 -> 0, 133 -> 0, 134 -> 0, 135 -> 0, 136 -> 0, 137 -> 0, 138 -> 0, 139 -> 0, 140 -> 0, 141 -> 0, 142 -> 0, 143 -> 0, 144 -> 0, 145 -> 0, 146 -> 0, 147 -> 0, 148 -> 0, 149 -> 0, 150 -> 0, 151 -> 0, 152 -> 0, 153 -> 0, 154 -> 0, 155 -> 0, 156 -> 0, 157 -> 0, 158 -> 0, 159 -> 0, 160 -> 0, 161 -> 0, 162 -> 0, 163 -> 0, 164 -> 0, 165 -> 0, 166 -> 0, 167 -> 0, 168 -> 0, 169 -> 0, 170 -> 0, 171 -> 0, 172 -> 0, 173 -> 0, 174 -> 0, 175 -> 0, 176 -> 0, 177 -> 0, 178 -> 0, 179 -> 0, 180 -> 0, 181 -> 0, 182 -> 0, 183 -> 0, 184 -> 0, 185 -> 0, 186 -> 0, 187 -> 0, 188 -> 0, 189 -> 0\n" + ] + } + ], + "source": [ + "scheduler_str = []\n", + "for state in leader_model.states:\n", + " choice = scheduler.get_choice(state)\n", + " action = choice.get_deterministic_choice()\n", + " scheduler_str.append(f\"{state} -> {action}\")\n", + "print(\", \".join(scheduler_str))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "stormvogel_model = mapping.stormpy_to_stormvogel(leader_model)\n", + "\n", + "stormvogel_result = result.convert_model_checking_result(stormvogel_model, stormpy_result)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import stormvogel.layout\n", + "\n", + "vis1 = show.show(stormvogel_model, stormvogel_result, layout=stormvogel.layout.EXPLORE(), show_editor=False, save_and_embed=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "induced_dtmc = stormvogel_result.generate_induced_dtmc()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "vis2 = show.show(induced_dtmc, layout=stormvogel.layout.EXPLORE(), show_editor=False, save_and_embed=True)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/getting_started/05_simulator.ipynb b/docs/getting_started/05_simulator.ipynb new file mode 100644 index 0000000..b4fc037 --- /dev/null +++ b/docs/getting_started/05_simulator.ipynb @@ -0,0 +1,2923 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a7245ed2", + "metadata": {}, + "source": [ + "# The simulator" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "a8ddc37c-66d2-43e4-8162-6be19a1d70a1", + "metadata": {}, + "outputs": [], + "source": [ + "from stormvogel.show import show\n", + "from stormvogel.layout import Layout\n", + "import stormvogel.model\n", + "import stormvogel.simulator" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "cab40f99-3460-4497-8b9f-3d669eee1e11", + "metadata": {}, + "outputs": [], + "source": [ + "# We create the monty hall mdp\n", + "mdp = stormvogel.model.new_mdp(\"Monty Hall\")\n", + "\n", + "init = mdp.get_initial_state()\n", + "\n", + "# first choose car position\n", + "init.set_transitions(\n", + " [(1 / 3, mdp.new_state(\"carchosen\", {\"car_pos\": i})) for i in range(3)]\n", + ")\n", + "\n", + "# we choose a door in each case\n", + "for s in mdp.get_states_with_label(\"carchosen\"):\n", + " s.set_transitions(\n", + " [\n", + " (\n", + " mdp.action(f\"open{i}\"),\n", + " mdp.new_state(\"open\", s.features | {\"chosen_pos\": i}),\n", + " )\n", + " for i in range(3)\n", + " ]\n", + " )\n", + "\n", + "# the other goat is revealed\n", + "for s in mdp.get_states_with_label(\"open\"):\n", + " car_pos = s.features[\"car_pos\"]\n", + " chosen_pos = s.features[\"chosen_pos\"]\n", + " other_pos = {0, 1, 2} - {car_pos, chosen_pos}\n", + " s.set_transitions(\n", + " [\n", + " (\n", + " 1 / len(other_pos),\n", + " mdp.new_state(\"goatrevealed\", s.features | {\"reveal_pos\": i}),\n", + " )\n", + " for i in other_pos\n", + " ]\n", + " )\n", + "\n", + "# we must choose whether we want to switch\n", + "for s in mdp.get_states_with_label(\"goatrevealed\"):\n", + " car_pos = s.features[\"car_pos\"]\n", + " chosen_pos = s.features[\"chosen_pos\"]\n", + " reveal_pos = s.features[\"reveal_pos\"]\n", + " other_pos = list({0, 1, 2} - {reveal_pos, chosen_pos})[0]\n", + " s.set_transitions(\n", + " [\n", + " (\n", + " mdp.action(\"stay\"),\n", + " mdp.new_state(\n", + " [\"done\"] + ([\"target\"] if chosen_pos == car_pos else []),\n", + " s.features | {\"chosen_pos\": chosen_pos},\n", + " ),\n", + " ),\n", + " (\n", + " mdp.action(\"switch\"),\n", + " mdp.new_state(\n", + " [\"done\"] + ([\"target\"] if other_pos == car_pos else []),\n", + " s.features | {\"chosen_pos\": other_pos},\n", + " ),\n", + " ),\n", + " ]\n", + " )\n", + "\n", + "# we add self loops to all states with no outgoing transitions\n", + "mdp.add_self_loops()" + ] + }, + { + "cell_type": "markdown", + "id": "d1f90374-dc85-4f31-b59f-aaf5e48a32f7", + "metadata": {}, + "source": [ + "We show what our mdp model looks like." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "c129cf62-40ca-4246-8718-5c859744e7f8", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "vis = show(mdp, layout=Layout(\"layouts/monty.json\"), save_and_embed=True)" + ] + }, + { + "cell_type": "markdown", + "id": "b5b2990c-65ed-4d7b-a4b8-f303843622e5", + "metadata": {}, + "source": [ + "We want to simulate this model. That is, we start at the initial state and then we walk through the model by choosing random actions.\n", + "\n", + "When we do this, we get a partial model as a result that contains everything we discovered during this walk. \n", + "\n", + "Try running this multiple times, and observe that sometimes we get to the target and sometimes we do not." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "eb0fadc0-7bb6-4c1d-ae3e-9e16527726ab", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# we can choose how many steps we take:\n", + "steps = 4\n", + "\n", + "# and we can specify a seed if we want:\n", + "seed = 12345676346\n", + "\n", + "# then we run the simulator:\n", + "partial_model = stormvogel.simulator.simulate(mdp, steps=steps, seed=seed)\n", + "# We could also provide a seed.\n", + "#partial_model = stormvogel.simulator.simulate(mdp, steps=steps, seed=seed)\n", + "\n", + "vis = show(partial_model, save_and_embed=True, layout=Layout(\"layouts/small_monty.json\"))" + ] + }, + { + "cell_type": "markdown", + "id": "49e3893d-bc35-4648-87eb-74a6a222ebf0", + "metadata": {}, + "source": [ + "We can also provide a scheduler (i.e. policy) which chooses what actions we should take at all time.\n", + "\n", + "In this case, we always take the first action, which means that we open door 0, and don't switch doors." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "59ac1e34-866c-42c4-b19b-c2a15c830e2e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#it still chooses random actions but we can prevent this by providing a scheduler:\n", + "taken_actions = {}\n", + "for id, state in mdp.states.items():\n", + " taken_actions[id] = state.available_actions()[0]\n", + "scheduler = stormvogel.result.Scheduler(mdp, taken_actions)\n", + "\n", + "partial_model = stormvogel.simulator.simulate(mdp, steps=steps, scheduler=scheduler, seed=seed)\n", + "vis = show(partial_model, save_and_embed=True, layout=Layout(\"layouts/small_monty.json\"))" + ] + }, + { + "cell_type": "markdown", + "id": "57a9b77d-4a75-42e4-8006-0bb11f2b345c", + "metadata": {}, + "source": [ + "We can highlight the scheduled states in the visualization of the entire model." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "7e23fc38-b2af-4f02-b0a2-5d06151d2ca5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "vis = show(mdp, show_editor=True, layout=Layout(\"layouts/monty.json\"), scheduler=scheduler, save_and_embed=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "191d5284-33b2-4ab3-8a1a-7c0c3334d250", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "e4f388d8-d08b-40f5-a61b-1f5f29d004c9", + "metadata": {}, + "source": [ + "We can also get a path from the simulator function." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "34d0c293-d090-4e3d-9e80-4351f5fcba62", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "initial state --(action: empty)--> state: 2 --(action: open0)--> state: 7 --(action: empty)--> state: 17 --(action: stay)--> state: 33\n" + ] + } + ], + "source": [ + "#we can also use another simulator function that returns a path instead of a partial model:\n", + "path = stormvogel.simulator.simulate_path(mdp, steps=4, scheduler=scheduler, seed=123456)\n", + "\n", + "print(path)" + ] + }, + { + "cell_type": "markdown", + "id": "1e0f6fea-6cd3-43e0-beea-84dc26eeca0b", + "metadata": {}, + "source": [ + "We can even visualize this path interactively! This works with any Path, not just a scheduler path. TODO." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "afbb3234-99e4-49d0-b259-f598e895f600", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "ename": "AttributeError", + "evalue": "'Visualization' object has no attribute 'show_path'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[8], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtime\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m sleep\n\u001b[1;32m 4\u001b[0m vis \u001b[38;5;241m=\u001b[39m show(mdp, save_and_embed\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, layout\u001b[38;5;241m=\u001b[39mLayout(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlayouts/monty.json\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n\u001b[0;32m----> 5\u001b[0m \u001b[43mvis\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshow_path\u001b[49m(path)\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m state \u001b[38;5;129;01min\u001b[39;00m path:\n\u001b[1;32m 7\u001b[0m vis\u001b[38;5;241m.\u001b[39mhighlight_state(state, color\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mred\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "\u001b[0;31mAttributeError\u001b[0m: 'Visualization' object has no attribute 'show_path'" + ] + } + ], + "source": [ + "from stormvogel.show import show\n", + "from time import sleep\n", + "\n", + "vis = show(mdp, save_and_embed=True, layout=Layout(\"layouts/monty.json\"))\n", + "vis.show_path(path)\n", + "for state in path:\n", + " vis.highlight_state(state, color=\"red\")\n", + " sleep(1)\n", + " # TODO should crash\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f6fe7f4-cc9e-4c1d-9850-3799ca47a903", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/getting_started/simulator.ipynb b/docs/getting_started/07_pomdp.ipynb similarity index 54% rename from docs/getting_started/simulator.ipynb rename to docs/getting_started/07_pomdp.ipynb index f7fafbe..85bc035 100644 --- a/docs/getting_started/simulator.ipynb +++ b/docs/getting_started/07_pomdp.ipynb @@ -1,185 +1,34 @@ { "cells": [ - { - "cell_type": "markdown", - "id": "a7245ed2", - "metadata": {}, - "source": [ - "# The simulator" - ] - }, { "cell_type": "code", - "execution_count": 7, - "id": "a8ddc37c-66d2-43e4-8162-6be19a1d70a1", + "execution_count": 1, + "id": "8013f1f2-f771-43b4-ba2a-c584d05d21b2", "metadata": {}, "outputs": [], "source": [ - "from stormvogel import show\n", - "import stormvogel.model" + "import stormvogel.model\n", + "from stormvogel.layout import Layout\n", + "from stormvogel.show import show" ] }, { - "cell_type": "code", - "execution_count": 8, - "id": "cab40f99-3460-4497-8b9f-3d669eee1e11", - "metadata": {}, - "outputs": [], - "source": [ - "# We create the monty hall mdp\n", - "mdp = stormvogel.model.new_mdp(\"Monty Hall\")\n", - "\n", - "init = mdp.get_initial_state()\n", - "\n", - "# first choose car position\n", - "init.set_transitions(\n", - " [(1 / 3, mdp.new_state(\"carchosen\", {\"car_pos\": i})) for i in range(3)]\n", - ")\n", - "\n", - "# we choose a door in each case\n", - "for s in mdp.get_states_with_label(\"carchosen\"):\n", - " s.set_transitions(\n", - " [\n", - " (\n", - " mdp.action(f\"open{i}\"),\n", - " mdp.new_state(\"open\", s.features | {\"chosen_pos\": i}),\n", - " )\n", - " for i in range(3)\n", - " ]\n", - " )\n", - "\n", - "# the other goat is revealed\n", - "for s in mdp.get_states_with_label(\"open\"):\n", - " car_pos = s.features[\"car_pos\"]\n", - " chosen_pos = s.features[\"chosen_pos\"]\n", - " other_pos = {0, 1, 2} - {car_pos, chosen_pos}\n", - " s.set_transitions(\n", - " [\n", - " (\n", - " 1 / len(other_pos),\n", - " mdp.new_state(\"goatrevealed\", s.features | {\"reveal_pos\": i}),\n", - " )\n", - " for i in other_pos\n", - " ]\n", - " )\n", - "\n", - "# we must choose whether we want to switch\n", - "for s in mdp.get_states_with_label(\"goatrevealed\"):\n", - " car_pos = s.features[\"car_pos\"]\n", - " chosen_pos = s.features[\"chosen_pos\"]\n", - " reveal_pos = s.features[\"reveal_pos\"]\n", - " other_pos = list({0, 1, 2} - {reveal_pos, chosen_pos})[0]\n", - " s.set_transitions(\n", - " [\n", - " (\n", - " mdp.action(\"stay\"),\n", - " mdp.new_state(\n", - " [\"done\"] + ([\"target\"] if chosen_pos == car_pos else []),\n", - " s.features | {\"chosen_pos\": chosen_pos},\n", - " ),\n", - " ),\n", - " (\n", - " mdp.action(\"switch\"),\n", - " mdp.new_state(\n", - " [\"done\"] + ([\"target\"] if other_pos == car_pos else []),\n", - " s.features | {\"chosen_pos\": other_pos},\n", - " ),\n", - " ),\n", - " ]\n", - " )\n", - "\n", - "# we add self loops to all states with no outgoing transitions\n", - "mdp.add_self_loops()" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "eb0fadc0-7bb6-4c1d-ae3e-9e16527726ab", + "cell_type": "markdown", + "id": "dbc502e0-a2f6-49c8-b9b1-638666906e27", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ModelType.MDP with name None\n", - "\n", - "States:\n", - "State 0 with labels ['init'] and features {}\n", - "State 1 with labels ['carchosen'] and features {}\n", - "State 2 with labels ['open'] and features {}\n", - "State 3 with labels ['goatrevealed'] and features {}\n", - "State 4 with labels ['done', 'target'] and features {}\n", - "\n", - "Transitions:\n", - "0.3333333333333333 -> State 1 with labels ['carchosen'] and features {}\n", - "1.0 -> State 2 with labels ['open'] and features {}\n", - "1.0 -> State 3 with labels ['goatrevealed'] and features {}\n", - "1.0 -> State 4 with labels ['done', 'target'] and features {}\n" - ] - } - ], - "source": [ - "#we want to simulate this model. That is, we start at the initial state and then\n", - "#we walk through the model according to transition probabilities.\n", - "#When we do this, we get a partial model as a result that contains everything we discovered\n", - "#during this walk.\n", - "\n", - "#we can choose how many steps we take:\n", - "steps = 4\n", - "\n", - "#and we can specify a seed if we want:\n", - "seed = 123456\n", - "\n", - "#then we run the simulator:\n", - "partial_model = stormvogel.simulator.simulate(mdp, steps=steps, seed=seed)\n", - "print(partial_model)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "59ac1e34-866c-42c4-b19b-c2a15c830e2e", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ModelType.MDP with name None\n", - "\n", - "States:\n", - "State 0 with labels ['init'] and features {}\n", - "State 1 with labels ['carchosen'] and features {}\n", - "State 2 with labels ['open'] and features {}\n", - "State 3 with labels ['goatrevealed'] and features {}\n", - "State 4 with labels ['done'] and features {}\n", - "\n", - "Transitions:\n", - "0.3333333333333333 -> State 1 with labels ['carchosen'] and features {}\n", - "1.0 -> State 2 with labels ['open'] and features {}\n", - "1.0 -> State 3 with labels ['goatrevealed'] and features {}\n", - "1.0 -> State 4 with labels ['done'] and features {}\n" - ] - } - ], "source": [ - "#it still chooses random actions but we can prevent this by providing a scheduler:\n", - "taken_actions = {}\n", - "for id, state in mdp.states.items():\n", - " taken_actions[id] = state.available_actions()[0]\n", - "scheduler = stormvogel.result.Scheduler(mdp, taken_actions)\n", + "## What is a POMDP?\n", + "A Partially Observed MDP is an MDP where you do not know what state you are currently in.\n", + "You can obtain information about what state you might be in using Observations.\n", + "In a POMDP (every?) state has an Observation. The policy can use the current and previous observations for the decision making.\n", "\n", - "partial_model = stormvogel.simulator.simulate(mdp, steps=steps, scheduler=scheduler, seed=seed)\n", - "print(partial_model)" + "The scenario is as follows:You are dropped in some city, but you are not sure which city. By walking and seeing landmarks, you learn where you are." ] }, { "cell_type": "code", - "execution_count": 11, - "id": "22871288-755c-463f-9150-f207c2f5c211", + "execution_count": 2, + "id": "887c917f-6df3-4379-b1e0-9471d76b0180", "metadata": {}, "outputs": [ { @@ -187,7 +36,7 @@ "text/html": [ "\n", " state: 2 --(action: open0)--> state: 7 --(action: empty)--> state: 17 --(action: stay)--> state: 33\n" - ] - } - ], - "source": [ - "#we can also use another simulator function that returns a path instead of a partial model:\n", - "path = stormvogel.simulator.simulate_path(mdp, steps=4, scheduler=scheduler, seed=123456)\n", + "pomdp = stormvogel.model.new_pomdp(create_initial_state=False)\n", + "\n", + "nijmegen = pomdp.new_state(\"nijmegen\")\n", + "eindhoven = pomdp.new_state(\"eindhoven\")\n", + "aachen = pomdp.new_state(\"aachen\")\n", + "cities = [nijmegen, eindhoven, aachen]\n", + "\n", + "saw_nijmegen = pomdp.new_state(\"saw nijmegen\")\n", + "saw_eindhoven = pomdp.new_state(\"saw eindhoven\")\n", + "saw_aachen = pomdp.new_state(\"saw aachen\")\n", + "saw_cities = [saw_nijmegen, saw_eindhoven, saw_aachen]\n", + "\n", + "walk = pomdp.new_action(\"walk\")\n", + "stay_put = pomdp.new_action(\"stay put\")\n", "\n", - "print(path)" + "for city, saw_city in zip(cities, saw_cities):\n", + " city.set_transitions([\n", + " (walk, saw_city),\n", + " (stay_put, city)\n", + " ])\n", + " saw_city.set_transitions([(1, city)])\n", + "\n", + "# We add an observation to the saw states. The observed states are marked with an O.\n", + "for saw_city in saw_cities:\n", + " saw_city.set_observation(saw_city.id)\n", + "\n", + "vis = show(pomdp, save_and_embed=True)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "99c763fa-82ea-42ff-8833-79c640f14518", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { "kernelspec": { - "display_name": ".venv", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, diff --git a/docs/getting_started/08_pomdp-maze.ipynb b/docs/getting_started/08_pomdp-maze.ipynb new file mode 100644 index 0000000..ddc5996 --- /dev/null +++ b/docs/getting_started/08_pomdp-maze.ipynb @@ -0,0 +1,822 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "6ab964ed-c2a0-4979-beb1-b780cc47bc35", + "metadata": {}, + "source": [ + "# POMDP Mazes\n", + "In this notebook, we will show off the power of POMDPs using mazes.\n", + "The idea is that you start on some tile, without knowing which tile. You can walk in four directions. If you run into a wall, you observe this. Based on this you can determine where in the maze you are and escape." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "8a36e98c-af1c-46be-852c-a966558a92ab", + "metadata": {}, + "outputs": [], + "source": [ + "import stormvogel.model\n", + "from stormvogel.model import EmptyAction\n", + "from stormvogel.layout import Layout\n", + "from stormvogel.show import show" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "2f4d4394-8a2a-424e-b16e-8ed202bdf493", + "metadata": {}, + "outputs": [], + "source": [ + "# HERE FOR DEVELOPMENT, MOVE TO LIB\n", + "import matplotlib\n", + "from matplotlib import pyplot as plt\n", + "\n", + "WALL_SYM = \"X\"\n", + "EMPTY_SYM = \".\"\n", + "EXIT_SYM = \"O\"\n", + "\n", + "def parse_level(level):\n", + " d = {EXIT_SYM: 2, WALL_SYM: 1, EMPTY_SYM: 0}\n", + " level_list = list(filter(lambda x: x != \"\", level.splitlines()))\n", + " level_list = list(map(\n", + " lambda x: list(map(lambda y: d[y], x)), \n", + " level_list))\n", + " width = len(level_list[0])\n", + " height = len(level_list)\n", + " return (level_list, width, height)\n", + "\n", + "\n", + "def show_grid(level, hor_size):\n", + " # https://stackoverflow.com/questions/19586828/drawing-grid-pattern-in-matplotlib\n", + " level_list, width, height = parse_level(level)\n", + " \n", + " # make a figure + axes\n", + " fig, ax = plt.subplots(1, 1, tight_layout=True)\n", + " fig.set_size_inches(hor_size, hor_size)\n", + " # make color map\n", + " my_cmap = matplotlib.colors.ListedColormap(['#8FCB9B', '#8F8073', '#0096FF'])\n", + " # draw the grid\n", + " for x in range(width + 1):\n", + " ax.axvline(x, lw=2, color='k', zorder=5)\n", + " for y in range(height + 1):\n", + " ax.axhline(y, lw=2, color='k', zorder=5)\n", + " \n", + " # draw the boxes\n", + " ax.imshow(level_list, interpolation='none', cmap=my_cmap, extent=[0, width, 0, height], zorder=0, )\n", + " # turn off the axis labels\n", + " ax.axis('off')\n", + "\n", + "END = 4\n", + "EXIT = 2\n", + "WALL = 1\n", + "WALKABLE = 0\n", + "OUT_OF_BOUNDS = -1\n", + "UNKNOWN = 3\n", + "\n", + "LEFT = \"\u2190\"\n", + "RIGHT = \"\u2192\"\n", + "UP = \"\u2191\"\n", + "DOWN = \"\u2193\"\n", + "\n", + "def out_of_bounds(x, y, width, height):\n", + " return x < 0 or y < 0 or x > width-1 or y > height -1\n", + "\n", + "def direction_result(x: int, y: int, direction: str, level_list: list, width: int, height: int):\n", + " d = {UP: (0,-1), RIGHT: (1,0), DOWN: (0,1), LEFT:(-1,0)}\n", + " res_x = x + d[direction][0]\n", + " res_y = y + d[direction][1]\n", + " if out_of_bounds(res_x,res_y,width,height):\n", + " return ((res_x,res_y), OUT_OF_BOUNDS)\n", + " if level_list[res_y][res_x] == WALL:\n", + " return ((x,y), WALL)\n", + " else:\n", + " return ((res_x, res_y), WALKABLE)\n", + "\n", + "def grid_world(level:list, position_scalar:int=200):\n", + " \"\"\"Create a grid world with an actor.\"\"\"\n", + " level_list, width, height = parse_level(level)\n", + " pomdp = stormvogel.model.new_pomdp(create_initial_state=False)\n", + " reward_model = pomdp.add_rewards(\"\")\n", + " \n", + " escaped = pomdp.new_state(\"escaped\")\n", + " escaped.set_observation(END)\n", + " #reward_model.set(escaped, 0)\n", + " \n", + " grid = [[None for x in range(width)] for y in range(height)]\n", + " for x in range(width):\n", + " for y in range(height):\n", + " if level_list[y][x] == WALKABLE:\n", + " grid[y][x] = pomdp.new_state([\"t\", f\"({x},{y})\"])\n", + " grid[y][x].set_observation(UNKNOWN)\n", + " #reward_model.set_state_action_reward(grid[y][x], -1)\n", + " if level_list[y][x] == EXIT:\n", + " grid[y][x] = pomdp.new_state([\"e\", f\"({x},{y})\"])\n", + " grid[y][x].set_observation(UNKNOWN)\n", + " #reward_model.set_state_action_reward(grid[y][x], 100)\n", + " grid[y][x].add_transitions([(1, escaped)])\n", + " dirs = {d: pomdp.new_action(d) for d in [UP, DOWN, LEFT, RIGHT]}\n", + " positions = {}\n", + " # Add movement\n", + " for x in range(width):\n", + " for y in range(height):\n", + " if level_list[y][x] == WALKABLE:\n", + " for d,action in dirs.items():\n", + " positions[str(grid[y][x].id)] = {\"x\": x * position_scalar, \"y\": y * position_scalar}\n", + " ((res_x, res_y), observation) = direction_result(x,y,d,level_list,width,height)\n", + "\n", + " \n", + " \n", + " if not observation == OUT_OF_BOUNDS:\n", + " took_dir = pomdp.new_state([d, f\"({x},{y})\"])\n", + " grid[y][x].add_transitions([(action, took_dir)])\n", + " #reward_model.set_state_action_reward(grid[y][x], action, -1)\n", + " # print(took_dir)\n", + " took_dir.add_transitions([(1, grid[res_y][res_x])])\n", + " reward_model.set_state_action_reward(took_dir, EmptyAction, -1)\n", + " took_dir.set_observation(observation)\n", + " pomdp.add_self_loops()\n", + " reward_model.set_unset_rewards(0)\n", + " return pomdp, positions" + ] + }, + { + "cell_type": "markdown", + "id": "6e428cc8-ed8b-4830-a1ae-eea45b663642", + "metadata": {}, + "source": [ + "First, we define a simple maze in ASSCI. 'X' are walls, '.' are walkable tiles, 'O' is the exit. We visualize it as a grid where walls are brown, walkable tiles are green, and the exit is blue." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "ebdc5973-ff3f-4056-9593-511837966101", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYYAAAE8CAYAAADE0Rb2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAGd0lEQVR4nO3cMWpjWRaA4ftMZTZyUknD7KgXUgsoOplI0SSNF+CF9I5maGgqKKukVHeCnir4E+tqwNimvi++iMPh8n49C7zNOecAgP+5ee0BAHhbhAGAEAYAQhgACGEAIIQBgBAGAEIYAAhhACA+rB7ctu0l5wDgha3+o4vlMIzxdxx2d7f/10A/g8PxNOac9nSBPa2xpzX2tOZwPC2fvSoMu7vbsf/86eqBfhb7h8fx9O1oTxfY0xp7WmNPa/YPj8tn/cYAQAgDACEMAIQwABDCAEAIAwAhDACEMAAQwgBACAMAIQwAhDAAEMIAQAgDACEMAIQwABDCAEAIAwAhDACEMAAQwgBACAMAIQwAhDAAEMIAQAgDACEMAIQwABDCAEAIAwAhDACEMAAQwgBACAMAIQwAhDAAEMIAQAgDACEMAIQwABDCAEAIAwAhDACEMAAQwgBACAMAIQwAhDAAEMIAQAgDACEMAIQwABDCAEAIAwAhDACEMAAQwgBACAMAIQwAhDAAEMIAQAgDACEMAIQwABDCAEAIAwAhDACEMAAQwgBACAMAsc0559LBbRvbto3d3e1Lz/RuHY6nMecc2802dh/vX3ucN+vw5WnMsz1dYk9rfuzJ8+lZh+NpnM/npbNXhQGA92vxcT8+XPOhivw8bwxrfBNeY09rvDGsORxPy2evCsPu7nbsP3+6eqCfxf7hcTx9O47dx/ux/+P31x7nzdr/+tt4+uurPV1gT2t+7Mnz6Vn7h8fls358BiCEAYAQBgBCGAAIYQAghAGAEAYAQhgACGEAIIQBgBAGAEIYAAhhACCEAYAQBgBCGAAIYQAghAGAEAYAQhgACGEAIIQBgBAGAEIYAAhhACCEAYAQBgBCGAAIYQAghAGAEAYAQhgACGEAIIQBgBAGAEIYAAhhACCEAYAQBgBCGAAIYQAghAGAEAYAQhgACGEAIIQBgBAGAEIYAAhhACCEAYAQBgBCGAAIYQAghAGAEAYAQhgACGEAIIQBgBAGAEIYAAhhACCEAYAQBgBCGAAIYQAghAGAEAYAQhgACGEAIIQBgNjmnHPp4LaNbdvG7u72pWd6tw7H05hzjrHdjHH/y2uP83Y9/TnGPLtPF3y/T9vNNnYf7197nDfr8OVpzPN0ny44HE/jfD4vnb0qDAC8X4uP+/Hhmg9V5Od5Y1jkjWGJN4Y13hjWHI6n5bNXhWF3dzv2nz9dPdDPYv/wOJ6+Hf+Owr/+/drjvF3//McYX//jPl3w/T7tPt6P/R+/v/Y4b9b+19/G019f3acL9g+Py2f9+AxACAMAIQwAhDAAEMIAQAgDACEMAIQwABDCAEAIAwAhDACEMAAQwgBACAMAIQwAhDAAEMIAQAgDACEMAIQwABDCAEAIAwAhDACEMAAQwgBACAMAIQwAhDAAEMIAQAgDACEMAIQwABDCAEAIAwAhDACEMAAQwgBACAMAIQwAhDAAEMIAQAgDACEMAIQwABDCAEAIAwAhDACEMAAQwgBACAMAIQwAhDAAEMIAQAgDACEMAIQwABDCAEAIAwAhDACEMAAQwgBACAMAIQwAhDAAEMIAQAgDACEMAIQwABDCAEAIAwCxzTnn0sFtG9u2jd3d7UvP9G4djqcx5xxjuxnj/pfXHuftevpzjHl2ny74fp+2m23sPt6/9jhv1uHL05jn6T5dcDiexvl8Xjp7VRgAeL8WH/fjwzUfqsjP+/ENz56eZU9r7GmNPa05HE/LZ68Kw+7uduw/f7p6oJ/F/uFxPH072tMF9rTGntbY05r9w+PyWT8+AxDCAEAIAwAhDACEMAAQwgBACAMAIQwAhDAAEMIAQAgDACEMAIQwABDCAEAIAwAhDACEMAAQwgBACAMAIQwAhDAAEMIAQAgDACEMAIQwABDCAEAIAwAhDACEMAAQwgBACAMAIQwAhDAAEMIAQAgDACEMAIQwABDCAEAIAwAhDACEMAAQwgBACAMAIQwAhDAAEMIAQAgDACEMAIQwABDCAEAIAwAhDACEMAAQwgBACAMAIQwAhDAAEMIAQAgDACEMAIQwABDCAEAIAwAhDACEMAAQwgBACAMAIQwAhDAAEMIAQGxzzrl0cNvGtm1jd3f70jO9W4fjacw57ekCe1pjT2vsac3heBrn83np7IfVD13sBwDvnD8lARDCAEAIAwAhDACEMAAQwgBACAMAIQwAhDAAEP8FOZN53RGeeDkAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "LEVEL =\"\"\"\n", + "XXXXX\n", + "X...X\n", + "XOX.X\n", + "XXXXX\n", + "\"\"\"\n", + "\n", + "show_grid(LEVEL, hor_size=4)" + ] + }, + { + "cell_type": "markdown", + "id": "150834b0-8bbf-4e10-bd76-9a53a6930524", + "metadata": {}, + "source": [ + "Now we create a POMDP from the level. Can you see how it corresponds to the maze?" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "d0400922-7b90-43c5-a01e-cc051089521d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "pomdp, positions = grid_world(LEVEL)\n", + "\n", + "vis = show(pomdp, layout=Layout(\"layouts/grid.json\"), separate_labels=[\"t\", \"e\"], save_and_embed=True)" + ] + }, + { + "cell_type": "markdown", + "id": "a40aedf2-5973-4fb6-8bed-6146dd6db7ca", + "metadata": {}, + "source": [ + "Some things to keep in mind.\n", + "* The tiles in the grid that aren't walls have a state t,(x,y)\n", + "* From such a tile, you can try to go left, right, top or bottom (these are actions)\n", + "* After taking an action, you go to a state with an observation (symbol \u2299). After this you proceed to another tile state.\n", + " + If you try to go into a wall, you observe that you hit a wall (1)\n", + " + If you do not hit a wall, you observe this as well (0)\n", + "* Every state has a reward according to our reward model (euro sign). Solving the maze will give you 100 reward but taking a step will cost 1.\n", + "\n", + "For example, if you try to go left in tile (3,2), you observe that you hit a wall and you go back to (3,2). However, if you try to go up, you observe that you don't hit a wall and proceed to (3,1)." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "8d5b0e8e-47db-4dfb-a533-a8181ec04751", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-------------------------------------------------------------- \n", + "Model type: \tPOMDP (sparse)\n", + "States: \t22\n", + "Transitions: \t34\n", + "Choices: \t34\n", + "Observations: \t5\n", + "Reward Models: (default)\n", + "State Labels: \t12 labels\n", + " * (3,1) -> 5 item(s)\n", + " * (1,2) -> 1 item(s)\n", + " * escaped -> 1 item(s)\n", + " * (1,1) -> 5 item(s)\n", + " * \u2192 -> 4 item(s)\n", + " * e -> 1 item(s)\n", + " * \u2191 -> 4 item(s)\n", + " * \u2190 -> 4 item(s)\n", + " * t -> 4 item(s)\n", + " * (3,2) -> 5 item(s)\n", + " * \u2193 -> 4 item(s)\n", + " * (2,1) -> 5 item(s)\n", + "Choice Labels: \t0 labels\n", + "-------------------------------------------------------------- \n", + "\n" + ] + } + ], + "source": [ + "from stormvogel.mapping import stormvogel_to_stormpy, stormpy_to_stormvogel\n", + "\n", + "stormpy_model = stormvogel_to_stormpy(pomdp)\n", + "print(stormpy_model)\n", + "pomdp2 = stormpy_to_stormvogel(stormpy_model)\n", + "#vis2 = show(pomdp2, layout=Layout(\"layouts/grid.json\"), separate_labels=[\"t\", \"e\"], show_editor=True)\n", + "\n", + "\n", + "# TODO use stormpy to find the best policy/schedule, i.e. escape the maze as quickly as possible.\n", + "# Ask Pim or Linus for help?" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "b1171d26-20f6-40e5-af57-721db56be040", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'0': Action(name='0', labels=frozenset()), '1': Action(name='1', labels=frozenset()), '2': Action(name='2', labels=frozenset()), '3': Action(name='3', labels=frozenset()), '4': Action(name='4', labels=frozenset()), '5': Action(name='5', labels=frozenset()), '6': Action(name='6', labels=frozenset()), '7': Action(name='7', labels=frozenset()), '8': Action(name='8', labels=frozenset()), '9': Action(name='9', labels=frozenset()), '10': Action(name='10', labels=frozenset()), '11': Action(name='11', labels=frozenset()), '12': Action(name='12', labels=frozenset()), '13': Action(name='13', labels=frozenset()), '14': Action(name='14', labels=frozenset()), '15': Action(name='15', labels=frozenset()), '16': Action(name='16', labels=frozenset()), '17': Action(name='17', labels=frozenset()), '18': Action(name='18', labels=frozenset()), '19': Action(name='19', labels=frozenset()), '20': Action(name='20', labels=frozenset()), '21': Action(name='21', labels=frozenset()), '22': Action(name='22', labels=frozenset()), '23': Action(name='23', labels=frozenset()), '24': Action(name='24', labels=frozenset()), '25': Action(name='25', labels=frozenset()), '26': Action(name='26', labels=frozenset()), '27': Action(name='27', labels=frozenset()), '28': Action(name='28', labels=frozenset()), '29': Action(name='29', labels=frozenset()), '30': Action(name='30', labels=frozenset()), '31': Action(name='31', labels=frozenset()), '32': Action(name='32', labels=frozenset()), '33': Action(name='33', labels=frozenset())}\n" + ] + } + ], + "source": [ + "print(pomdp2.actions)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/getting_started/die.html b/docs/getting_started/die.html index 7dd5298..43d594b 100644 --- a/docs/getting_started/die.html +++ b/docs/getting_started/die.html @@ -1,6 +1,6 @@