diff --git a/habitat/core/batched_env.py b/habitat/core/batched_env.py index 368afe9562..5a1275959a 100644 --- a/habitat/core/batched_env.py +++ b/habitat/core/batched_env.py @@ -46,6 +46,17 @@ def __init__( include_depth = "DEPTH_SENSOR" in config.SENSORS include_rgb = "RGB_SENSOR" in config.SENSORS + + self.include_point_goal_gps_compass = "POINTGOAL_WITH_GPS_COMPASS_SENSOR" in config.SENSORS + # This key is a hard_coded_string. Will not work with any value: + # see this line : https://github.com/eundersander/habitat-lab/blob/eundersander/gala_kinematic/habitat_baselines/rl/ppo/policy.py#L206 + self.gps_compass_key = "pointgoal_with_gps_compass" + gps_compass_sensor_shape= 4 + self.include_ee_pos = "EE_POS_SENSOR" in config.SENSORS + self.ee_pos_key = "ee_pos" + ee_pos_shape = 3 + + assert include_depth or include_rgb self._num_envs = config.NUM_ENVIRONMENTS @@ -72,10 +83,11 @@ def __init__( bsim_config.sensor0.width = sensor_width bsim_config.sensor0.height = sensor_height bsim_config.sensor0.hfov = 60.0 - bsim_config.force_random_actions = True + bsim_config.force_random_actions = False bsim_config.do_async_physics_step = self._config.OVERLAP_PHYSICS - bsim_config.max_episode_length = 100 bsim_config.num_physics_substeps = self._config.NUM_PHYSICS_SUBSTEPS + bsim_config.do_procedural_episode_set = True + # bsim_config.episode_set_filepath = "../data/episode_sets/train.episode_set.json" self._bsim = BatchedSimulator(bsim_config) else: self._bsim = None @@ -106,10 +118,13 @@ def __init__( else: observations["rgb"] = torch.rand([self._num_envs, sensor_height, sensor_width, 3], dtype=torch.float32) * 255 observations["depth"] = torch.rand([self._num_envs, sensor_height, sensor_width, 1], dtype=torch.float32) * 255 + if self.include_point_goal_gps_compass: + observations[self.gps_compass_key] = torch.empty([self._num_envs, gps_compass_sensor_shape], dtype=torch.float32) + if self.include_ee_pos: + observations[self.ee_pos_key] = torch.empty([self._num_envs, ee_pos_shape], dtype=torch.float32) + self._observations = observations - # print('observations["rgb"].shape: ', observations["rgb"].shape) - self._is_closed = False num_other_actions = 1 # doAttemptGrip/doAttemptDrop @@ -144,6 +159,20 @@ def __init__( dtype=np.float32, ) obs_dict["depth"] = depth_obs + if self.include_point_goal_gps_compass: + obs_dict[self.gps_compass_key] = spaces.Box( + low=-np.inf, + high=np.inf, # todo: investigate depth min/max + shape=(gps_compass_sensor_shape,), + dtype=np.float32, + ) + if self.include_ee_pos: + obs_dict[self.ee_pos_key] = spaces.Box( + low=-np.inf, + high=np.inf, # todo: investigate depth min/max + shape=(ee_pos_shape,), + dtype=np.float32, + ) self.observation_spaces = [obs_dict] * 1 # config.NUM_ENVIRONMENTS # note we only ever read element #0 of this array @@ -156,6 +185,7 @@ def __init__( # self.number_of_episodes = [] self._paused: List[int] = [] + @property def num_envs(self): r"""number of individual environments.""" @@ -183,26 +213,33 @@ def get_metrics(self): return results def get_nonpixel_observations(self, env_states, observations): - for state in env_states: - robot_pos = state.robot_position - robot_yaw = state.robot_yaw - # todo: update observations here - - - def get_dones_and_rewards_and_fix_actions(self, env_states, actions): + # TODO: update observations here + for (b, state) in enumerate(env_states): + if self.include_point_goal_gps_compass: + robot_pos = state.robot_position + robot_yaw = state.robot_yaw + + observations[self.gps_compass_key] [b, 0] = robot_pos[0] + observations[self.gps_compass_key] [b, 1] = robot_pos[1] + observations[self.gps_compass_key] [b, 2] = robot_pos[2] + observations[self.gps_compass_key] [b, 3] = robot_yaw + if self.include_ee_pos: + for i in range(3): + observations[self.ee_pos_key][b, i] = state.ee_pos[i] + + + + def get_dones_rewards_resets(self, env_states, actions): for (b, state) in enumerate(env_states): - if state.did_finish_episode_and_reset: + max_episode_len = 500 + if state.did_collide or state.episode_step_idx >= max_episode_len: self.dones[b] = True - self.rewards[b] = 100.0 if state.finished_episode_success else 0.0 - # The previously-computed action shouldn't be used for the next step because - # it was computed from a stale observation from the just-ended episode. - for i in range(self.action_dim): - actions[b * self.action_dim + i] = 0.0 - else: - self.dones[b] = False - self.rewards[b] = -1.0 if state.did_collide else 0.0 - - return actions + # for now, if we want to reset an env, we must reset it to the same + # episode index (this is a temporary restriction) + self.resets[b] = state.episode_idx + self.rewards[b] = 100.0 if not state.did_collide else 0.0 + else: + self.resets[b] = -1 def reset(self): r"""Reset all the vectorized environments @@ -214,10 +251,11 @@ def reset(self): self._bsim.start_render() env_states = self._bsim.get_environment_states() self.get_nonpixel_observations(env_states, self._observations) - self._bsim.wait_for_frame() + self._bsim.wait_render() self.rewards = [0.0] * self._num_envs self.dones = [True] * self._num_envs + self.resets = [-1] * self._num_envs return self._observations @@ -234,17 +272,21 @@ def async_step( assert len(actions_flat_list) == self.num_envs * self.action_dim if self._bsim: if self._config.OVERLAP_PHYSICS: - self._bsim.wait_async_step_physics() + self._bsim.wait_step_physics_or_reset() self._bsim.start_render() env_states = self._bsim.get_environment_states() - # todo: decide if Python gets a copy of env_states vs direct access to C++ memory, - # and then decide whether to start async physics step *before* processing env_states - actions_flat_list = self.get_dones_and_rewards_and_fix_actions(env_states, actions_flat_list) - self._bsim.start_async_step_physics(actions_flat_list) + + self.get_nonpixel_observations(env_states, self._observations) + self.get_dones_rewards_resets(env_states, actions_flat_list) + self._bsim.start_step_physics_or_reset(actions_flat_list, self.resets) + else: - self._bsim.set_actions(actions_flat_list) # note possible wasted (unused) actions - self._bsim.auto_reset_or_step_physics() + # note: this path is untested + self._bsim.start_step_physics_or_reset(actions_flat_list, self.resets) + self._bsim.wait_step_physics_or_reset() self._bsim.start_render() + env_states = self._bsim.get_environment_states() + self.get_dones_rewards_resets(env_states, actions_flat_list) @profiling_wrapper.RangeContext("wait_step") def wait_step(self) -> List[Any]: @@ -254,7 +296,8 @@ def wait_step(self) -> List[Any]: # this updates self._observations["depth"] (and rgb) tensors # perf todo: ensure we're getting here before rendering finishes (issue a warning otherwise) - self._bsim.wait_for_frame() + self._bsim.wait_render() + # these are "one frame behind" like the observations (i.e. computed from # the same earlier env state). diff --git a/habitat_baselines/config/default.py b/habitat_baselines/config/default.py index 7b0851eed5..2c19c3b6ea 100644 --- a/habitat_baselines/config/default.py +++ b/habitat_baselines/config/default.py @@ -98,7 +98,7 @@ _C.RL.POLICY.action_distribution_type = "categorical" # or 'gaussian' # For gaussian action distribution: _C.RL.POLICY.ACTION_DIST = CN() -_C.RL.POLICY.ACTION_DIST.use_log_std = False +_C.RL.POLICY.ACTION_DIST.use_log_std = True _C.RL.POLICY.ACTION_DIST.use_softplus = False _C.RL.POLICY.ACTION_DIST.min_std = 1e-6 _C.RL.POLICY.ACTION_DIST.max_std = 1 diff --git a/habitat_baselines/config/rearrange/gala_kinematic.yaml b/habitat_baselines/config/rearrange/gala_kinematic.yaml index 7fa55034a0..51d99936e6 100644 --- a/habitat_baselines/config/rearrange/gala_kinematic.yaml +++ b/habitat_baselines/config/rearrange/gala_kinematic.yaml @@ -11,7 +11,7 @@ NUM_PHYSICS_SUBSTEPS: 1 SAVE_VIDEOS_INTERVAL: -1 NUM_UPDATES: 60 NUM_ENVIRONMENTS: 512 -SENSORS: ["DEPTH_SENSOR", "RGB_SENSOR"] +# SENSORS: ["DEPTH_SENSOR", "RGB_SENSOR", "POINTGOAL_WITH_GPS_COMPASS_SENSOR"] SIMULATOR: AGENTS: ['AGENT_0'] AGENT_0: diff --git a/habitat_baselines/config/rearrange/gala_kinematic_ddppo.yaml b/habitat_baselines/config/rearrange/gala_kinematic_ddppo.yaml index 8a6f0b93d7..295ba9aa55 100644 --- a/habitat_baselines/config/rearrange/gala_kinematic_ddppo.yaml +++ b/habitat_baselines/config/rearrange/gala_kinematic_ddppo.yaml @@ -6,8 +6,9 @@ NUM_CHECKPOINTS: 0 BATCHED_ENV: True OVERLAP_PHYSICS: True SAVE_VIDEOS_INTERVAL: 500 -NUM_UPDATES: 61 +NUM_UPDATES: 6100 NUM_ENVIRONMENTS: 512 +SENSORS: ["DEPTH_SENSOR", "RGB_SENSOR", "POINTGOAL_WITH_GPS_COMPASS_SENSOR", "EE_POS_SENSOR"] SIMULATOR: AGENTS: ['AGENT_0'] AGENT_0: diff --git a/habitat_baselines/rl/ppo/policy.py b/habitat_baselines/rl/ppo/policy.py index 46c4dfa78d..1968d1c421 100644 --- a/habitat_baselines/rl/ppo/policy.py +++ b/habitat_baselines/rl/ppo/policy.py @@ -4,6 +4,7 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. import abc +from re import X import torch from gym import spaces @@ -202,34 +203,41 @@ def __init__( ): super().__init__() - if ( - IntegratedPointGoalGPSAndCompassSensor.cls_uuid - in observation_space.spaces - ): - self._n_input_goal = observation_space.spaces[ - IntegratedPointGoalGPSAndCompassSensor.cls_uuid - ].shape[0] - elif PointGoalSensor.cls_uuid in observation_space.spaces: - self._n_input_goal = observation_space.spaces[ - PointGoalSensor.cls_uuid - ].shape[0] - elif ImageGoalSensor.cls_uuid in observation_space.spaces: - goal_observation_space = spaces.Dict( - {"rgb": observation_space.spaces[ImageGoalSensor.cls_uuid]} - ) - self.goal_visual_encoder = SimpleCNN( - goal_observation_space, hidden_size - ) - self._n_input_goal = hidden_size - else: - self._n_input_goal = 0 + #### [gala_kinematic] Manually adding sensors in there + self.observation_space = observation_space + self._n_state = 0 + if "pointgoal_with_gps_compass" in self.observation_space.spaces: + self._n_state += 4 + if "ee_pos" in self.observation_space.spaces: + self._n_state += 3 + + # if ( + # IntegratedPointGoalGPSAndCompassSensor.cls_uuid + # in observation_space.spaces + # ): + # self._n_input_goal = observation_space.spaces[ + # IntegratedPointGoalGPSAndCompassSensor.cls_uuid + # ].shape[0] + # elif PointGoalSensor.cls_uuid in observation_space.spaces: + # self._n_input_goal = observation_space.spaces[ + # PointGoalSensor.cls_uuid + # ].shape[0] + # elif ImageGoalSensor.cls_uuid in observation_space.spaces: + # goal_observation_space = spaces.Dict( + # {"rgb": observation_space.spaces[ImageGoalSensor.cls_uuid]} + # ) + # self.goal_visual_encoder = SimpleCNN( + # goal_observation_space, hidden_size + # ) + # self._n_input_goal = hidden_size + #### [gala_kinematic] End of manually adding sensors in there self._hidden_size = hidden_size self.visual_encoder = SimpleCNN(observation_space, hidden_size) self.state_encoder = build_rnn_state_encoder( - (0 if self.is_blind else self._hidden_size) + self._n_input_goal, + (0 if self.is_blind else self._hidden_size) + self._n_state, self._hidden_size, ) @@ -248,24 +256,31 @@ def num_recurrent_layers(self): return self.state_encoder.num_recurrent_layers def forward(self, observations, rnn_hidden_states, prev_actions, masks): - if IntegratedPointGoalGPSAndCompassSensor.cls_uuid in observations: - target_encoding = observations[ - IntegratedPointGoalGPSAndCompassSensor.cls_uuid - ] - - elif PointGoalSensor.cls_uuid in observations: - target_encoding = observations[PointGoalSensor.cls_uuid] - elif ImageGoalSensor.cls_uuid in observations: - image_goal = observations[ImageGoalSensor.cls_uuid] - target_encoding = self.goal_visual_encoder({"rgb": image_goal}) - else: - target_encoding = None + #### [gala_kinematic] Manually adding sensors in there + x = [self.visual_encoder(observations)] + if "pointgoal_with_gps_compass" in self.observation_space.spaces: + x += [observations["pointgoal_with_gps_compass"]] + if "ee_pos" in self.observation_space.spaces: + x += [observations["ee_pos"]] + # if IntegratedPointGoalGPSAndCompassSensor.cls_uuid in observations: + # target_encoding = observations[ + # IntegratedPointGoalGPSAndCompassSensor.cls_uuid + # ] + + # elif PointGoalSensor.cls_uuid in observations: + # target_encoding = observations[PointGoalSensor.cls_uuid] + # elif ImageGoalSensor.cls_uuid in observations: + # image_goal = observations[ImageGoalSensor.cls_uuid] + # target_encoding = self.goal_visual_encoder({"rgb": image_goal}) + # else: + # target_encoding = None + # if not self.is_blind: + # perception_embed = self.visual_encoder(observations) + # x = [perception_embed] + # if target_encoding is not None: + # x += [target_encoding] + #### [gala_kinematic] End of manually adding sensors in there - if not self.is_blind: - perception_embed = self.visual_encoder(observations) - x = [perception_embed] - if target_encoding: - x += [target_encoding] x_out = torch.cat(x, dim=1) x_out, rnn_hidden_states = self.state_encoder(