forked from eaplatanios/jelly-bean-world
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathenvironments.py
94 lines (85 loc) · 5.23 KB
/
environments.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# Copyright 2019, The Jelly Bean World Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
# use this file except in compliance with the License. You may obtain a copy of
# the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations under
# the License.
"""Collection of JBW environments for OpenAI gym."""
from __future__ import absolute_import, division, print_function
try:
from gym.envs.registration import register
modules_loaded = True
except:
modules_loaded = False
from .agent import Agent
from .direction import RelativeDirection
from .item import *
from .simulator import *
from .visualizer import MapVisualizer
def make_config():
# specify the item types
items = []
items.append(Item("banana", [0.0, 1.0, 0.0], [0.0, 1.0, 0.0], [1, 0, 0, 0], [0, 0, 0, 0], False, 0.0,
intensity_fn=IntensityFunction.CONSTANT, intensity_fn_args=[-5.3],
interaction_fns=[
[InteractionFunction.PIECEWISE_BOX, 10.0, 200.0, 0.0, -6.0], # parameters for interaction between item 0 and item 0
[InteractionFunction.PIECEWISE_BOX, 200.0, 0.0, -6.0, -6.0], # parameters for interaction between item 0 and item 1
[InteractionFunction.PIECEWISE_BOX, 10.0, 200.0, 2.0, -100.0], # parameters for interaction between item 0 and item 2
[InteractionFunction.ZERO] # parameters for interaction between item 0 and item 3
]))
items.append(Item("onion", [1.0, 0.0, 0.0], [1.0, 0.0, 0.0], [0, 1, 0, 0], [0, 0, 0, 0], False, 0.0,
intensity_fn=IntensityFunction.CONSTANT, intensity_fn_args=[-5.0],
interaction_fns=[
[InteractionFunction.PIECEWISE_BOX, 200.0, 0.0, -6.0, -6.0], # parameters for interaction between item 1 and item 0
[InteractionFunction.ZERO], # parameters for interaction between item 1 and item 1
[InteractionFunction.PIECEWISE_BOX, 200.0, 0.0, -100.0, -100.0], # parameters for interaction between item 1 and item 2
[InteractionFunction.ZERO] # parameters for interaction between item 1 and item 3
]))
items.append(Item("jellybean", [0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [0, 0, 0, 0], [0, 0, 0, 0], False, 0.0,
intensity_fn=IntensityFunction.CONSTANT, intensity_fn_args=[-5.3],
interaction_fns=[
[InteractionFunction.PIECEWISE_BOX, 10.0, 200.0, 2.0, -100.0], # parameters for interaction between item 2 and item 0
[InteractionFunction.PIECEWISE_BOX, 200.0, 0.0, -100.0, -100.0], # parameters for interaction between item 2 and item 1
[InteractionFunction.PIECEWISE_BOX, 10.0, 200.0, 0.0, -6.0], # parameters for interaction between item 2 and item 2
[InteractionFunction.ZERO] # parameters for interaction between item 2 and item 3
]))
items.append(Item("wall", [0.0, 0.0, 0.0], [0.5, 0.5, 0.5], [0, 0, 0, 1], [0, 0, 0, 0], True, 0.0,
intensity_fn=IntensityFunction.CONSTANT, intensity_fn_args=[0.0],
interaction_fns=[
[InteractionFunction.ZERO], # parameters for interaction between item 3 and item 0
[InteractionFunction.ZERO], # parameters for interaction between item 3 and item 1
[InteractionFunction.ZERO], # parameters for interaction between item 3 and item 2
[InteractionFunction.CROSS, 10.0, 15.0, 20.0, -200.0, -20.0, 1.0] # parameters for interaction between item 3 and item 3
]))
# construct the simulator configuration
return SimulatorConfig(max_steps_per_movement=1, vision_range=5,
allowed_movement_directions=[jbw.ActionPolicy.ALLOWED, jbw.ActionPolicy.DISALLOWED, jbw.ActionPolicy.DISALLOWED, jbw.ActionPolicy.DISALLOWED],
allowed_turn_directions=[jbw.ActionPolicy.DISALLOWED, jbw.ActionPolicy.DISALLOWED, jbw.ActionPolicy.ALLOWED, jbw.ActionPolicy.ALLOWED],
no_op_allowed=False, patch_size=32, mcmc_num_iter=4000, items=items, agent_color=[0.0, 0.0, 1.0], agent_field_of_view=2*pi,
collision_policy=MovementConflictPolicy.FIRST_COME_FIRST_SERVED, decay_param=0.4, diffusion_param=0.14, deleted_item_lifetime=2000)
if modules_loaded:
# Construct the simulator configuration.
sim_config = make_config()
# Create a reward function.
reward_fn = lambda prev_items, items: len(items) - len(prev_items)
register(
id='JBW-v0',
entry_point='jbw.environment:JBWEnv',
kwargs={
'sim_config': sim_config,
'reward_fn': reward_fn,
'render': False})
register(
id='JBW-render-v0',
entry_point='jbw.environment:JBWEnv',
kwargs={
'sim_config': sim_config,
'reward_fn': reward_fn,
'render': True})