From 78ee91407bc0eb3cfea9405a358efb70edabf606 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@users.noreply.github.com>
Date: Wed, 23 Mar 2022 11:18:31 -0700
Subject: [PATCH 01/12] initial commit

---
 configs/tasks/rearrangepick_replica_cad.yaml  |  2 +-
 habitat/core/batched_env.py                   | 52 +++++++++++++++++--
 .../config/rearrange/gala_kinematic.yaml      |  2 +-
 .../rearrange/gala_kinematic_ddppo.yaml       |  5 +-
 4 files changed, 53 insertions(+), 8 deletions(-)

diff --git a/configs/tasks/rearrangepick_replica_cad.yaml b/configs/tasks/rearrangepick_replica_cad.yaml
index 113fbdd204..853a359d16 100644
--- a/configs/tasks/rearrangepick_replica_cad.yaml
+++ b/configs/tasks/rearrangepick_replica_cad.yaml
@@ -3,7 +3,7 @@ ENVIRONMENT:
 DATASET:
     TYPE: RearrangeDataset-v0
     SPLIT: train
-    DATA_PATH: data/datasets/rearrange_pick/replica_cad/v0/{split}/{split}_counter_L_analysis_5000_500.json.gz
+    DATA_PATH: data/datasets/rearrange_pick/replica_cad/v0/rearrange_pick_replica_cad_v0/{split}/{split}_counter_L_analysis_5000_500.json.gz
     SCENES_DIR: "data/replica_cad/"
 TASK:
     TYPE: RearrangePickTask-v0
diff --git a/habitat/core/batched_env.py b/habitat/core/batched_env.py
index 368afe9562..8b664e8861 100644
--- a/habitat/core/batched_env.py
+++ b/habitat/core/batched_env.py
@@ -13,6 +13,7 @@
 
 from gym.spaces import Box
 import numpy as np
+import quaternion
 from gym import spaces
 from habitat.utils import profiling_wrapper
 from collections import OrderedDict
@@ -20,6 +21,11 @@
 import torch  # isort:skip # noqa: F401  must import torch before importing bps_pytorch
 
 
+from habitat.utils.geometry_utils import (
+    quaternion_rotate_vector,
+)
+from habitat.tasks.utils import cartesian_to_polar
+
 class BatchedEnv:
     r"""Todo
     """
@@ -46,6 +52,11 @@ def __init__(
 
         include_depth = "DEPTH_SENSOR" in config.SENSORS
         include_rgb = "RGB_SENSOR" in config.SENSORS
+        # include_gps = "GPS_SENSOR" in config.SENSORS
+        # include_compass = "COMPASS_SENSOR" in config.SENSORS
+        self.include_point_goal_gps_compass = "POINTGOAL_WITH_GPS_COMPASS_SENSOR" in config.SENSORS
+        gps_compass_sensor_shape= 4
+        print(self.include_point_goal_gps_compass, config.SENSORS)
         assert include_depth or include_rgb
 
         self._num_envs = config.NUM_ENVIRONMENTS
@@ -106,6 +117,12 @@ def __init__(
         else:
             observations["rgb"] = torch.rand([self._num_envs, sensor_height, sensor_width, 3], dtype=torch.float32) * 255
             observations["depth"] = torch.rand([self._num_envs, sensor_height, sensor_width, 1], dtype=torch.float32) * 255
+        # if include_gps:
+        #     observations["gps"] = torch.empty([self._num_envs, 3], dtype=torch.float32)
+        # if include_compass:
+        #     observations["compass"] = torch.empty([self._num_envs, 3], dtype=torch.float32)
+        if self.include_point_goal_gps_compass:
+            observations["goal_gps_compass"] = torch.empty([self._num_envs, gps_compass_sensor_shape], dtype=torch.float32)
         self._observations = observations
 
         # print('observations["rgb"].shape: ', observations["rgb"].shape)
@@ -144,6 +161,15 @@ def __init__(
                 dtype=np.float32,
             )
             obs_dict["depth"] = depth_obs
+        # if include_gps:
+        # if include_compass:
+        if self.include_point_goal_gps_compass:
+            obs_dict["goal_gps_compass"] = spaces.Box(
+                low=0.0,
+                high=np.inf,  # todo: investigate depth min/max
+                shape=(gps_compass_sensor_shape,),
+                dtype=np.float32,
+            )
 
         self.observation_spaces = [obs_dict] * 1  # config.NUM_ENVIRONMENTS  # note we only ever read element #0 of this array
 
@@ -156,6 +182,7 @@ def __init__(
         # self.number_of_episodes = []
         self._paused: List[int] = []
 
+
     @property
     def num_envs(self):
         r"""number of individual environments."""
@@ -183,10 +210,25 @@ def get_metrics(self):
         return results
 
     def get_nonpixel_observations(self, env_states, observations):
-        for state in env_states:
-            robot_pos = state.robot_position
-            robot_yaw = state.robot_yaw
-            # todo: update observations here
+        # TODO: update observations here
+        for (b, state) in enumerate(env_states):
+            if self.include_point_goal_gps_compass:
+                robot_pos = state.robot_position
+                robot_yaw = state.robot_yaw
+            
+                # direction_vector = state.goal_pos - robot_pos
+                # source_rotation = quaternion.quaternion(0, 0, 0, 0) #TODO:get actual rotation
+                # direction_vector_agent = quaternion_rotate_vector(
+                #     source_rotation.inverse(), direction_vector
+                # )
+                # rho, phi = cartesian_to_polar(
+                #         -direction_vector_agent[2], direction_vector_agent[0]
+                #     )
+                observations["goal_gps_compass"] [b, 0] = robot_pos[0]
+                observations["goal_gps_compass"] [b, 1] = robot_pos[1]
+                observations["goal_gps_compass"] [b, 2] = robot_pos[2]
+                observations["goal_gps_compass"] [b, 3] = robot_yaw
+            
 
 
     def get_dones_and_rewards_and_fix_actions(self, env_states, actions):
@@ -239,6 +281,7 @@ def async_step(
                 env_states = self._bsim.get_environment_states()
                 # todo: decide if Python gets a copy of env_states vs direct access to C++ memory,
                 # and then decide whether to start async physics step *before* processing env_states
+                self.get_nonpixel_observations(env_states, self._observations)
                 actions_flat_list = self.get_dones_and_rewards_and_fix_actions(env_states, actions_flat_list)
                 self._bsim.start_async_step_physics(actions_flat_list)
             else:
@@ -254,6 +297,7 @@ def wait_step(self) -> List[Any]:
 
             # this updates self._observations["depth"] (and rgb) tensors
             # perf todo: ensure we're getting here before rendering finishes (issue a warning otherwise)
+            
             self._bsim.wait_for_frame()
 
             # these are "one frame behind" like the observations (i.e. computed from
diff --git a/habitat_baselines/config/rearrange/gala_kinematic.yaml b/habitat_baselines/config/rearrange/gala_kinematic.yaml
index 7fa55034a0..6ac5992d2c 100644
--- a/habitat_baselines/config/rearrange/gala_kinematic.yaml
+++ b/habitat_baselines/config/rearrange/gala_kinematic.yaml
@@ -11,7 +11,7 @@ NUM_PHYSICS_SUBSTEPS: 1
 SAVE_VIDEOS_INTERVAL: -1
 NUM_UPDATES: 60
 NUM_ENVIRONMENTS: 512
-SENSORS: ["DEPTH_SENSOR", "RGB_SENSOR"]
+SENSORS: ["DEPTH_SENSOR", "RGB_SENSOR", "POINTGOAL_WITH_GPS_COMPASS_SENSOR"]
 SIMULATOR:
   AGENTS: ['AGENT_0']
   AGENT_0:
diff --git a/habitat_baselines/config/rearrange/gala_kinematic_ddppo.yaml b/habitat_baselines/config/rearrange/gala_kinematic_ddppo.yaml
index 8a6f0b93d7..58480e4935 100644
--- a/habitat_baselines/config/rearrange/gala_kinematic_ddppo.yaml
+++ b/habitat_baselines/config/rearrange/gala_kinematic_ddppo.yaml
@@ -1,5 +1,5 @@
-TENSORBOARD_DIR: "/checkpoint/eundersander/gala_kinematic/tb/gala_kinematic_ddppo"
-CHECKPOINT_FOLDER: "/checkpoint/eundersander/gala_kinematic/ckpt/gala_kinematic_ddppo"
+TENSORBOARD_DIR: "data/tb/gala_kinematic_ddppo"
+CHECKPOINT_FOLDER: "data/ckpt/gala_kinematic_ddppo"
 VIDEO_DIR: "../videos"
 REWARD_SCALE: 0.01
 NUM_CHECKPOINTS: 0
@@ -8,6 +8,7 @@ OVERLAP_PHYSICS: True
 SAVE_VIDEOS_INTERVAL: 500
 NUM_UPDATES: 61
 NUM_ENVIRONMENTS: 512
+SENSORS: ["DEPTH_SENSOR", "RGB_SENSOR", "POINTGOAL_WITH_GPS_COMPASS_SENSOR"]
 SIMULATOR:
   AGENTS: ['AGENT_0']
   AGENT_0:

From 4b0c3129d3d381f103f8afa8f8c096b0898c0eb1 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@users.noreply.github.com>
Date: Thu, 24 Mar 2022 14:00:20 -0700
Subject: [PATCH 02/12] remoing print statement

---
 habitat/core/batched_env.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/habitat/core/batched_env.py b/habitat/core/batched_env.py
index 8b664e8861..897e728e72 100644
--- a/habitat/core/batched_env.py
+++ b/habitat/core/batched_env.py
@@ -56,7 +56,6 @@ def __init__(
         # include_compass = "COMPASS_SENSOR" in config.SENSORS
         self.include_point_goal_gps_compass = "POINTGOAL_WITH_GPS_COMPASS_SENSOR" in config.SENSORS
         gps_compass_sensor_shape= 4
-        print(self.include_point_goal_gps_compass, config.SENSORS)
         assert include_depth or include_rgb
 
         self._num_envs = config.NUM_ENVIRONMENTS

From c12ba3acc2ee027404123b6a6ed34cea3087044f Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@users.noreply.github.com>
Date: Mon, 28 Mar 2022 13:51:18 -0700
Subject: [PATCH 03/12] Fixing the key of the sensors for learning

---
 habitat/core/batched_env.py         | 18 +++++++++++-------
 habitat_baselines/config/default.py |  2 +-
 habitat_baselines/rl/ppo/policy.py  |  2 +-
 3 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/habitat/core/batched_env.py b/habitat/core/batched_env.py
index 897e728e72..3b533f691b 100644
--- a/habitat/core/batched_env.py
+++ b/habitat/core/batched_env.py
@@ -55,6 +55,10 @@ def __init__(
         # include_gps = "GPS_SENSOR" in config.SENSORS
         # include_compass = "COMPASS_SENSOR" in config.SENSORS
         self.include_point_goal_gps_compass = "POINTGOAL_WITH_GPS_COMPASS_SENSOR" in config.SENSORS
+        
+        # This key is a hard_coded_string. Will not work with any value:
+        # see this line : https://github.com/eundersander/habitat-lab/blob/eundersander/gala_kinematic/habitat_baselines/rl/ppo/policy.py#L206
+        self.gps_compass_key = "pointgoal_with_gps_compass" 
         gps_compass_sensor_shape= 4
         assert include_depth or include_rgb
 
@@ -82,7 +86,7 @@ def __init__(
             bsim_config.sensor0.width = sensor_width
             bsim_config.sensor0.height = sensor_height
             bsim_config.sensor0.hfov = 60.0
-            bsim_config.force_random_actions = True
+            bsim_config.force_random_actions = False
             bsim_config.do_async_physics_step = self._config.OVERLAP_PHYSICS
             bsim_config.max_episode_length = 100
             bsim_config.num_physics_substeps = self._config.NUM_PHYSICS_SUBSTEPS
@@ -121,7 +125,7 @@ def __init__(
         # if include_compass:
         #     observations["compass"] = torch.empty([self._num_envs, 3], dtype=torch.float32)
         if self.include_point_goal_gps_compass:
-            observations["goal_gps_compass"] = torch.empty([self._num_envs, gps_compass_sensor_shape], dtype=torch.float32)
+            observations[self.gps_compass_key] = torch.empty([self._num_envs, gps_compass_sensor_shape], dtype=torch.float32)
         self._observations = observations
 
         # print('observations["rgb"].shape: ', observations["rgb"].shape)
@@ -163,7 +167,7 @@ def __init__(
         # if include_gps:
         # if include_compass:
         if self.include_point_goal_gps_compass:
-            obs_dict["goal_gps_compass"] = spaces.Box(
+            obs_dict[self.gps_compass_key] = spaces.Box(
                 low=0.0,
                 high=np.inf,  # todo: investigate depth min/max
                 shape=(gps_compass_sensor_shape,),
@@ -223,10 +227,10 @@ def get_nonpixel_observations(self, env_states, observations):
                 # rho, phi = cartesian_to_polar(
                 #         -direction_vector_agent[2], direction_vector_agent[0]
                 #     )
-                observations["goal_gps_compass"] [b, 0] = robot_pos[0]
-                observations["goal_gps_compass"] [b, 1] = robot_pos[1]
-                observations["goal_gps_compass"] [b, 2] = robot_pos[2]
-                observations["goal_gps_compass"] [b, 3] = robot_yaw
+                observations[self.gps_compass_key] [b, 0] = robot_pos[0]
+                observations[self.gps_compass_key] [b, 1] = robot_pos[1]
+                observations[self.gps_compass_key] [b, 2] = robot_pos[2]
+                observations[self.gps_compass_key] [b, 3] = robot_yaw
             
 
 
diff --git a/habitat_baselines/config/default.py b/habitat_baselines/config/default.py
index 7b0851eed5..2c19c3b6ea 100644
--- a/habitat_baselines/config/default.py
+++ b/habitat_baselines/config/default.py
@@ -98,7 +98,7 @@
 _C.RL.POLICY.action_distribution_type = "categorical"  # or 'gaussian'
 # For gaussian action distribution:
 _C.RL.POLICY.ACTION_DIST = CN()
-_C.RL.POLICY.ACTION_DIST.use_log_std = False
+_C.RL.POLICY.ACTION_DIST.use_log_std = True
 _C.RL.POLICY.ACTION_DIST.use_softplus = False
 _C.RL.POLICY.ACTION_DIST.min_std = 1e-6
 _C.RL.POLICY.ACTION_DIST.max_std = 1
diff --git a/habitat_baselines/rl/ppo/policy.py b/habitat_baselines/rl/ppo/policy.py
index 46c4dfa78d..7c276ade61 100644
--- a/habitat_baselines/rl/ppo/policy.py
+++ b/habitat_baselines/rl/ppo/policy.py
@@ -264,7 +264,7 @@ def forward(self, observations, rnn_hidden_states, prev_actions, masks):
         if not self.is_blind:
             perception_embed = self.visual_encoder(observations)
             x = [perception_embed]
-            if target_encoding:
+            if target_encoding is not None:
                 x += [target_encoding]
 
         x_out = torch.cat(x, dim=1)

From 5b850b396341bec281ffa49140ef32f090ac8e2b Mon Sep 17 00:00:00 2001
From: Eric Undersander <eundersander@fb.com>
Date: Tue, 29 Mar 2022 00:01:03 -0400
Subject: [PATCH 04/12] reference code for loading an episode set by filepath

---
 habitat/core/batched_env.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/habitat/core/batched_env.py b/habitat/core/batched_env.py
index 368afe9562..bd52f305b0 100644
--- a/habitat/core/batched_env.py
+++ b/habitat/core/batched_env.py
@@ -76,6 +76,8 @@ def __init__(
             bsim_config.do_async_physics_step = self._config.OVERLAP_PHYSICS
             bsim_config.max_episode_length = 100
             bsim_config.num_physics_substeps = self._config.NUM_PHYSICS_SUBSTEPS
+            bsim_config.do_procedural_episode_set = True
+            # bsim_config.episode_set_filepath = "../data/episode_sets/train.episode_set.json"
             self._bsim = BatchedSimulator(bsim_config)
         else:
             self._bsim = None

From 5cc6afef0406ebec4702248b6029daad167bb447 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@users.noreply.github.com>
Date: Tue, 29 Mar 2022 12:34:29 -0700
Subject: [PATCH 05/12] adding the ee_pos to the non-visual observations

---
 habitat/core/batched_env.py                   | 26 +++++--
 .../config/rearrange/gala_kinematic.yaml      |  2 +-
 .../rearrange/gala_kinematic_ddppo.yaml       |  2 +-
 habitat_baselines/rl/ppo/policy.py            | 67 +++++++++++++------
 4 files changed, 69 insertions(+), 28 deletions(-)

diff --git a/habitat/core/batched_env.py b/habitat/core/batched_env.py
index 3b533f691b..f34b3cf314 100644
--- a/habitat/core/batched_env.py
+++ b/habitat/core/batched_env.py
@@ -4,12 +4,14 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 
+from turtle import shape
 from typing import (
     Dict,
     Optional,
     List,
     Any,
 )
+import uuid
 
 from gym.spaces import Box
 import numpy as np
@@ -52,14 +54,17 @@ def __init__(
 
         include_depth = "DEPTH_SENSOR" in config.SENSORS
         include_rgb = "RGB_SENSOR" in config.SENSORS
-        # include_gps = "GPS_SENSOR" in config.SENSORS
-        # include_compass = "COMPASS_SENSOR" in config.SENSORS
+
         self.include_point_goal_gps_compass = "POINTGOAL_WITH_GPS_COMPASS_SENSOR" in config.SENSORS
-        
         # This key is a hard_coded_string. Will not work with any value:
         # see this line : https://github.com/eundersander/habitat-lab/blob/eundersander/gala_kinematic/habitat_baselines/rl/ppo/policy.py#L206
         self.gps_compass_key = "pointgoal_with_gps_compass" 
         gps_compass_sensor_shape= 4
+        self.include_ee_pos = "EE_POS_SENSOR" in config.SENSORS
+        self.ee_pos_key = "ee_pos"
+        ee_pos_shape = 3
+
+
         assert include_depth or include_rgb
 
         self._num_envs = config.NUM_ENVIRONMENTS
@@ -126,6 +131,9 @@ def __init__(
         #     observations["compass"] = torch.empty([self._num_envs, 3], dtype=torch.float32)
         if self.include_point_goal_gps_compass:
             observations[self.gps_compass_key] = torch.empty([self._num_envs, gps_compass_sensor_shape], dtype=torch.float32)
+        if self.include_ee_pos:
+            observations[self.ee_pos_key] = torch.empty([self._num_envs, ee_pos_shape], dtype=torch.float32)
+        
         self._observations = observations
 
         # print('observations["rgb"].shape: ', observations["rgb"].shape)
@@ -164,8 +172,6 @@ def __init__(
                 dtype=np.float32,
             )
             obs_dict["depth"] = depth_obs
-        # if include_gps:
-        # if include_compass:
         if self.include_point_goal_gps_compass:
             obs_dict[self.gps_compass_key] = spaces.Box(
                 low=0.0,
@@ -173,6 +179,13 @@ def __init__(
                 shape=(gps_compass_sensor_shape,),
                 dtype=np.float32,
             )
+        if self.include_ee_pos:
+            obs_dict[self.ee_pos_key] = spaces.Box(
+                low=0.0,
+                high=np.inf,  # todo: investigate depth min/max
+                shape=(ee_pos_shape,),
+                dtype=np.float32,
+            )
 
         self.observation_spaces = [obs_dict] * 1  # config.NUM_ENVIRONMENTS  # note we only ever read element #0 of this array
 
@@ -231,6 +244,9 @@ def get_nonpixel_observations(self, env_states, observations):
                 observations[self.gps_compass_key] [b, 1] = robot_pos[1]
                 observations[self.gps_compass_key] [b, 2] = robot_pos[2]
                 observations[self.gps_compass_key] [b, 3] = robot_yaw
+            if self.include_ee_pos:
+                for i in range(3):
+                    observations[self.ee_pos_key][b, i] = state.ee_pos[i]
             
 
 
diff --git a/habitat_baselines/config/rearrange/gala_kinematic.yaml b/habitat_baselines/config/rearrange/gala_kinematic.yaml
index 6ac5992d2c..51d99936e6 100644
--- a/habitat_baselines/config/rearrange/gala_kinematic.yaml
+++ b/habitat_baselines/config/rearrange/gala_kinematic.yaml
@@ -11,7 +11,7 @@ NUM_PHYSICS_SUBSTEPS: 1
 SAVE_VIDEOS_INTERVAL: -1
 NUM_UPDATES: 60
 NUM_ENVIRONMENTS: 512
-SENSORS: ["DEPTH_SENSOR", "RGB_SENSOR", "POINTGOAL_WITH_GPS_COMPASS_SENSOR"]
+# SENSORS: ["DEPTH_SENSOR", "RGB_SENSOR", "POINTGOAL_WITH_GPS_COMPASS_SENSOR"]
 SIMULATOR:
   AGENTS: ['AGENT_0']
   AGENT_0:
diff --git a/habitat_baselines/config/rearrange/gala_kinematic_ddppo.yaml b/habitat_baselines/config/rearrange/gala_kinematic_ddppo.yaml
index 58480e4935..c99ab00ada 100644
--- a/habitat_baselines/config/rearrange/gala_kinematic_ddppo.yaml
+++ b/habitat_baselines/config/rearrange/gala_kinematic_ddppo.yaml
@@ -8,7 +8,7 @@ OVERLAP_PHYSICS: True
 SAVE_VIDEOS_INTERVAL: 500
 NUM_UPDATES: 61
 NUM_ENVIRONMENTS: 512
-SENSORS: ["DEPTH_SENSOR", "RGB_SENSOR", "POINTGOAL_WITH_GPS_COMPASS_SENSOR"]
+SENSORS: ["DEPTH_SENSOR", "RGB_SENSOR", "POINTGOAL_WITH_GPS_COMPASS_SENSOR", EE_POS_SENSOR]
 SIMULATOR:
   AGENTS: ['AGENT_0']
   AGENT_0:
diff --git a/habitat_baselines/rl/ppo/policy.py b/habitat_baselines/rl/ppo/policy.py
index 7c276ade61..f60f7c9ffc 100644
--- a/habitat_baselines/rl/ppo/policy.py
+++ b/habitat_baselines/rl/ppo/policy.py
@@ -202,27 +202,33 @@ def __init__(
     ):
         super().__init__()
 
-        if (
-            IntegratedPointGoalGPSAndCompassSensor.cls_uuid
-            in observation_space.spaces
-        ):
-            self._n_input_goal = observation_space.spaces[
-                IntegratedPointGoalGPSAndCompassSensor.cls_uuid
-            ].shape[0]
-        elif PointGoalSensor.cls_uuid in observation_space.spaces:
-            self._n_input_goal = observation_space.spaces[
-                PointGoalSensor.cls_uuid
-            ].shape[0]
-        elif ImageGoalSensor.cls_uuid in observation_space.spaces:
-            goal_observation_space = spaces.Dict(
-                {"rgb": observation_space.spaces[ImageGoalSensor.cls_uuid]}
-            )
-            self.goal_visual_encoder = SimpleCNN(
-                goal_observation_space, hidden_size
-            )
-            self._n_input_goal = hidden_size
-        else:
-            self._n_input_goal = 0
+        #### Manually adding sensors in there
+        self.observation_space = observation_space
+        self._n_input_goal = 0
+        if "pointgoal_with_gps_compass" in self.observation_space.spaces:
+            self._n_input_goal += 4
+        if "ee_pos" in self.observation_space.spaces:
+            self._n_input_goal += 3
+
+        # if (
+        #     IntegratedPointGoalGPSAndCompassSensor.cls_uuid
+        #     in observation_space.spaces
+        # ):
+        #     self._n_input_goal = observation_space.spaces[
+        #         IntegratedPointGoalGPSAndCompassSensor.cls_uuid
+        #     ].shape[0]
+        # elif PointGoalSensor.cls_uuid in observation_space.spaces:
+        #     self._n_input_goal = observation_space.spaces[
+        #         PointGoalSensor.cls_uuid
+        #     ].shape[0]
+        # elif ImageGoalSensor.cls_uuid in observation_space.spaces:
+        #     goal_observation_space = spaces.Dict(
+        #         {"rgb": observation_space.spaces[ImageGoalSensor.cls_uuid]}
+        #     )
+        #     self.goal_visual_encoder = SimpleCNN(
+        #         goal_observation_space, hidden_size
+        #     )
+        #     self._n_input_goal = hidden_size
 
         self._hidden_size = hidden_size
 
@@ -248,6 +254,25 @@ def num_recurrent_layers(self):
         return self.state_encoder.num_recurrent_layers
 
     def forward(self, observations, rnn_hidden_states, prev_actions, masks):
+        #### Manually adding sensors in there
+        x = []
+        if "pointgoal_with_gps_compass" in self.observation_space.spaces:
+            self._n_input_goal += 4
+            x += [observations["pointgoal_with_gps_compass"]]
+        if "ee_pos" in self.observation_space.spaces:
+            x += [observations["ee_pos"]]
+
+        # if IntegratedPointGoalGPSAndCompassSensor.cls_uuid in observations:
+        #     target_encoding = observations[
+        #         IntegratedPointGoalGPSAndCompassSensor.cls_uuid
+        #     ]
+        # elif PointGoalSensor.cls_uuid in observations:
+        #     target_encoding = observations[PointGoalSensor.cls_uuid]
+        # elif ImageGoalSensor.cls_uuid in observations:
+        #     image_goal = observations[ImageGoalSensor.cls_uuid]
+        #     target_encoding = self.goal_visual_encoder({"rgb": image_goal})
+
+        # x = [target_encoding]
         if IntegratedPointGoalGPSAndCompassSensor.cls_uuid in observations:
             target_encoding = observations[
                 IntegratedPointGoalGPSAndCompassSensor.cls_uuid

From bd45a66c7bffea881fa72755a7bd156b1c4cb95c Mon Sep 17 00:00:00 2001
From: Eric Undersander <eundersander@fb.com>
Date: Wed, 30 Mar 2022 01:15:23 -0400
Subject: [PATCH 06/12] add get_dones_rewards_resets for per-env episode
 termination logic; use new bsim wait_step_physics_or_reset API

---
 habitat/core/batched_env.py | 42 ++++++++++++++++++-------------------
 1 file changed, 20 insertions(+), 22 deletions(-)

diff --git a/habitat/core/batched_env.py b/habitat/core/batched_env.py
index bd52f305b0..8e2eb2b435 100644
--- a/habitat/core/batched_env.py
+++ b/habitat/core/batched_env.py
@@ -74,7 +74,6 @@ def __init__(
             bsim_config.sensor0.hfov = 60.0
             bsim_config.force_random_actions = True
             bsim_config.do_async_physics_step = self._config.OVERLAP_PHYSICS
-            bsim_config.max_episode_length = 100
             bsim_config.num_physics_substeps = self._config.NUM_PHYSICS_SUBSTEPS
             bsim_config.do_procedural_episode_set = True
             # bsim_config.episode_set_filepath = "../data/episode_sets/train.episode_set.json"
@@ -191,20 +190,17 @@ def get_nonpixel_observations(self, env_states, observations):
             # todo: update observations here
 
 
-    def get_dones_and_rewards_and_fix_actions(self, env_states, actions):
+    def get_dones_rewards_resets(self, env_states, actions):
         for (b, state) in enumerate(env_states):
-            if state.did_finish_episode_and_reset:
+            max_episode_len = 500
+            if state.did_collide or state.episode_step_idx >= max_episode_len:
                 self.dones[b] = True
-                self.rewards[b] = 100.0 if state.finished_episode_success else 0.0
-                # The previously-computed action shouldn't be used for the next step because
-                # it was computed from a stale observation from the just-ended episode.
-                for i in range(self.action_dim):
-                    actions[b * self.action_dim + i] = 0.0
-            else:            
-                self.dones[b] = False
-                self.rewards[b] = -1.0 if state.did_collide else 0.0
-        
-        return actions
+                # for now, if we want to reset an env, we must reset it to the same 
+                # episode index (this is a temporary restriction)
+                self.resets[b] = state.episode_idx
+                self.rewards[b] = 100.0 if not state.did_collide else 0.0
+            else:
+                self.resets[b] = -1
 
     def reset(self):
         r"""Reset all the vectorized environments
@@ -216,10 +212,11 @@ def reset(self):
             self._bsim.start_render()
             env_states = self._bsim.get_environment_states()
             self.get_nonpixel_observations(env_states, self._observations)
-            self._bsim.wait_for_frame()
+            self._bsim.wait_render()
 
         self.rewards = [0.0] * self._num_envs
         self.dones = [True] * self._num_envs
+        self.resets = [-1] * self._num_envs
 
         return self._observations
         
@@ -236,17 +233,18 @@ def async_step(
         assert len(actions_flat_list) == self.num_envs * self.action_dim
         if self._bsim:
             if self._config.OVERLAP_PHYSICS:
-                self._bsim.wait_async_step_physics()
+                self._bsim.wait_step_physics_or_reset()
                 self._bsim.start_render()
                 env_states = self._bsim.get_environment_states()
-                # todo: decide if Python gets a copy of env_states vs direct access to C++ memory,
-                # and then decide whether to start async physics step *before* processing env_states
-                actions_flat_list = self.get_dones_and_rewards_and_fix_actions(env_states, actions_flat_list)
-                self._bsim.start_async_step_physics(actions_flat_list)
+                self.get_dones_rewards_resets(env_states, actions_flat_list)
+                self._bsim.start_step_physics_or_reset(actions_flat_list, self.resets)
             else:
-                self._bsim.set_actions(actions_flat_list)  # note possible wasted (unused) actions
-                self._bsim.auto_reset_or_step_physics()
+                # note: this path is untested
+                self._bsim.start_step_physics_or_reset(actions_flat_list, self.resets)
+                self._bsim.wait_step_physics_or_reset()
                 self._bsim.start_render()
+                env_states = self._bsim.get_environment_states()
+                self.get_dones_rewards_resets(env_states, actions_flat_list)
 
     @profiling_wrapper.RangeContext("wait_step")
     def wait_step(self) -> List[Any]:
@@ -256,7 +254,7 @@ def wait_step(self) -> List[Any]:
 
             # this updates self._observations["depth"] (and rgb) tensors
             # perf todo: ensure we're getting here before rendering finishes (issue a warning otherwise)
-            self._bsim.wait_for_frame()
+            self._bsim.wait_render()
 
             # these are "one frame behind" like the observations (i.e. computed from
             # the same earlier env state).

From 2ce8c5d872d6f28766443611b54a8748cb415dc3 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@users.noreply.github.com>
Date: Wed, 30 Mar 2022 10:07:21 -0700
Subject: [PATCH 07/12] reverting changes to the artifact saving path

---
 habitat_baselines/config/rearrange/gala_kinematic_ddppo.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/habitat_baselines/config/rearrange/gala_kinematic_ddppo.yaml b/habitat_baselines/config/rearrange/gala_kinematic_ddppo.yaml
index c99ab00ada..1e6daeecce 100644
--- a/habitat_baselines/config/rearrange/gala_kinematic_ddppo.yaml
+++ b/habitat_baselines/config/rearrange/gala_kinematic_ddppo.yaml
@@ -1,5 +1,5 @@
-TENSORBOARD_DIR: "data/tb/gala_kinematic_ddppo"
-CHECKPOINT_FOLDER: "data/ckpt/gala_kinematic_ddppo"
+TENSORBOARD_DIR: "/checkpoint/eundersander/gala_kinematic/tb/gala_kinematic_ddppo"
+CHECKPOINT_FOLDER: "/checkpoint/eundersander/gala_kinematic/ckpt/gala_kinematic_ddppo"
 VIDEO_DIR: "../videos"
 REWARD_SCALE: 0.01
 NUM_CHECKPOINTS: 0

From cf3f750a75c09366e703fad71d856d955194f4be Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@users.noreply.github.com>
Date: Wed, 30 Mar 2022 10:14:13 -0700
Subject: [PATCH 08/12] reverting change made by mistake

---
 configs/tasks/rearrangepick_replica_cad.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configs/tasks/rearrangepick_replica_cad.yaml b/configs/tasks/rearrangepick_replica_cad.yaml
index 853a359d16..113fbdd204 100644
--- a/configs/tasks/rearrangepick_replica_cad.yaml
+++ b/configs/tasks/rearrangepick_replica_cad.yaml
@@ -3,7 +3,7 @@ ENVIRONMENT:
 DATASET:
     TYPE: RearrangeDataset-v0
     SPLIT: train
-    DATA_PATH: data/datasets/rearrange_pick/replica_cad/v0/rearrange_pick_replica_cad_v0/{split}/{split}_counter_L_analysis_5000_500.json.gz
+    DATA_PATH: data/datasets/rearrange_pick/replica_cad/v0/{split}/{split}_counter_L_analysis_5000_500.json.gz
     SCENES_DIR: "data/replica_cad/"
 TASK:
     TYPE: RearrangePickTask-v0

From d2396c7a1e75f157044e601cd867ffa41d4557bf Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@users.noreply.github.com>
Date: Wed, 30 Mar 2022 10:50:35 -0700
Subject: [PATCH 09/12] removing unused code

---
 habitat/core/batched_env.py        | 26 ++------------------------
 habitat_baselines/rl/ppo/policy.py |  6 ++++--
 2 files changed, 6 insertions(+), 26 deletions(-)

diff --git a/habitat/core/batched_env.py b/habitat/core/batched_env.py
index d3107c28be..5a1275959a 100644
--- a/habitat/core/batched_env.py
+++ b/habitat/core/batched_env.py
@@ -4,18 +4,15 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 
-from turtle import shape
 from typing import (
     Dict,
     Optional,
     List,
     Any,
 )
-import uuid
 
 from gym.spaces import Box
 import numpy as np
-import quaternion
 from gym import spaces
 from habitat.utils import profiling_wrapper
 from collections import OrderedDict
@@ -23,11 +20,6 @@
 import torch  # isort:skip # noqa: F401  must import torch before importing bps_pytorch
 
 
-from habitat.utils.geometry_utils import (
-    quaternion_rotate_vector,
-)
-from habitat.tasks.utils import cartesian_to_polar
-
 class BatchedEnv:
     r"""Todo
     """
@@ -126,10 +118,6 @@ def __init__(
         else:
             observations["rgb"] = torch.rand([self._num_envs, sensor_height, sensor_width, 3], dtype=torch.float32) * 255
             observations["depth"] = torch.rand([self._num_envs, sensor_height, sensor_width, 1], dtype=torch.float32) * 255
-        # if include_gps:
-        #     observations["gps"] = torch.empty([self._num_envs, 3], dtype=torch.float32)
-        # if include_compass:
-        #     observations["compass"] = torch.empty([self._num_envs, 3], dtype=torch.float32)
         if self.include_point_goal_gps_compass:
             observations[self.gps_compass_key] = torch.empty([self._num_envs, gps_compass_sensor_shape], dtype=torch.float32)
         if self.include_ee_pos:
@@ -137,8 +125,6 @@ def __init__(
         
         self._observations = observations
 
-        # print('observations["rgb"].shape: ', observations["rgb"].shape)
-
         self._is_closed = False
 
         num_other_actions = 1  # doAttemptGrip/doAttemptDrop
@@ -175,14 +161,14 @@ def __init__(
             obs_dict["depth"] = depth_obs
         if self.include_point_goal_gps_compass:
             obs_dict[self.gps_compass_key] = spaces.Box(
-                low=0.0,
+                low=-np.inf,
                 high=np.inf,  # todo: investigate depth min/max
                 shape=(gps_compass_sensor_shape,),
                 dtype=np.float32,
             )
         if self.include_ee_pos:
             obs_dict[self.ee_pos_key] = spaces.Box(
-                low=0.0,
+                low=-np.inf,
                 high=np.inf,  # todo: investigate depth min/max
                 shape=(ee_pos_shape,),
                 dtype=np.float32,
@@ -233,14 +219,6 @@ def get_nonpixel_observations(self, env_states, observations):
                 robot_pos = state.robot_position
                 robot_yaw = state.robot_yaw
             
-                # direction_vector = state.goal_pos - robot_pos
-                # source_rotation = quaternion.quaternion(0, 0, 0, 0) #TODO:get actual rotation
-                # direction_vector_agent = quaternion_rotate_vector(
-                #     source_rotation.inverse(), direction_vector
-                # )
-                # rho, phi = cartesian_to_polar(
-                #         -direction_vector_agent[2], direction_vector_agent[0]
-                #     )
                 observations[self.gps_compass_key] [b, 0] = robot_pos[0]
                 observations[self.gps_compass_key] [b, 1] = robot_pos[1]
                 observations[self.gps_compass_key] [b, 2] = robot_pos[2]
diff --git a/habitat_baselines/rl/ppo/policy.py b/habitat_baselines/rl/ppo/policy.py
index f60f7c9ffc..ad977de6e7 100644
--- a/habitat_baselines/rl/ppo/policy.py
+++ b/habitat_baselines/rl/ppo/policy.py
@@ -202,7 +202,7 @@ def __init__(
     ):
         super().__init__()
 
-        #### Manually adding sensors in there
+        #### [gala_kinematic] Manually adding sensors in there
         self.observation_space = observation_space
         self._n_input_goal = 0
         if "pointgoal_with_gps_compass" in self.observation_space.spaces:
@@ -229,6 +229,7 @@ def __init__(
         #         goal_observation_space, hidden_size
         #     )
         #     self._n_input_goal = hidden_size
+        #### [gala_kinematic] End of manually adding sensors in there
 
         self._hidden_size = hidden_size
 
@@ -254,7 +255,7 @@ def num_recurrent_layers(self):
         return self.state_encoder.num_recurrent_layers
 
     def forward(self, observations, rnn_hidden_states, prev_actions, masks):
-        #### Manually adding sensors in there
+        #### [gala_kinematic] Manually adding sensors in there
         x = []
         if "pointgoal_with_gps_compass" in self.observation_space.spaces:
             self._n_input_goal += 4
@@ -273,6 +274,7 @@ def forward(self, observations, rnn_hidden_states, prev_actions, masks):
         #     target_encoding = self.goal_visual_encoder({"rgb": image_goal})
 
         # x = [target_encoding]
+        #### [gala_kinematic] End of manually adding sensors in there
         if IntegratedPointGoalGPSAndCompassSensor.cls_uuid in observations:
             target_encoding = observations[
                 IntegratedPointGoalGPSAndCompassSensor.cls_uuid

From e2fce98c2f2df44cee964c7478f13ccc34601102 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@users.noreply.github.com>
Date: Wed, 30 Mar 2022 11:01:35 -0700
Subject: [PATCH 10/12] fixing typo

---
 habitat_baselines/rl/ppo/policy.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/habitat_baselines/rl/ppo/policy.py b/habitat_baselines/rl/ppo/policy.py
index ad977de6e7..dd58e76d56 100644
--- a/habitat_baselines/rl/ppo/policy.py
+++ b/habitat_baselines/rl/ppo/policy.py
@@ -258,7 +258,6 @@ def forward(self, observations, rnn_hidden_states, prev_actions, masks):
         #### [gala_kinematic] Manually adding sensors in there
         x = []
         if "pointgoal_with_gps_compass" in self.observation_space.spaces:
-            self._n_input_goal += 4
             x += [observations["pointgoal_with_gps_compass"]]
         if "ee_pos" in self.observation_space.spaces:
             x += [observations["ee_pos"]]

From 64260b4e260088be2392ea518bd770a0737ab538 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@users.noreply.github.com>
Date: Wed, 30 Mar 2022 11:19:19 -0700
Subject: [PATCH 11/12] making policy code simpler

---
 habitat_baselines/rl/ppo/policy.py | 39 +++++++++++-------------------
 1 file changed, 14 insertions(+), 25 deletions(-)

diff --git a/habitat_baselines/rl/ppo/policy.py b/habitat_baselines/rl/ppo/policy.py
index dd58e76d56..1968d1c421 100644
--- a/habitat_baselines/rl/ppo/policy.py
+++ b/habitat_baselines/rl/ppo/policy.py
@@ -4,6 +4,7 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 import abc
+from re import X
 
 import torch
 from gym import spaces
@@ -204,11 +205,11 @@ def __init__(
 
         #### [gala_kinematic] Manually adding sensors in there
         self.observation_space = observation_space
-        self._n_input_goal = 0
+        self._n_state = 0
         if "pointgoal_with_gps_compass" in self.observation_space.spaces:
-            self._n_input_goal += 4
+            self._n_state += 4
         if "ee_pos" in self.observation_space.spaces:
-            self._n_input_goal += 3
+            self._n_state += 3
 
         # if (
         #     IntegratedPointGoalGPSAndCompassSensor.cls_uuid
@@ -236,7 +237,7 @@ def __init__(
         self.visual_encoder = SimpleCNN(observation_space, hidden_size)
 
         self.state_encoder = build_rnn_state_encoder(
-            (0 if self.is_blind else self._hidden_size) + self._n_input_goal,
+            (0 if self.is_blind else self._hidden_size) + self._n_state,
             self._hidden_size,
         )
 
@@ -256,42 +257,30 @@ def num_recurrent_layers(self):
 
     def forward(self, observations, rnn_hidden_states, prev_actions, masks):
         #### [gala_kinematic] Manually adding sensors in there
-        x = []
+        x = [self.visual_encoder(observations)]
         if "pointgoal_with_gps_compass" in self.observation_space.spaces:
             x += [observations["pointgoal_with_gps_compass"]]
         if "ee_pos" in self.observation_space.spaces:
             x += [observations["ee_pos"]]
-
         # if IntegratedPointGoalGPSAndCompassSensor.cls_uuid in observations:
         #     target_encoding = observations[
         #         IntegratedPointGoalGPSAndCompassSensor.cls_uuid
         #     ]
+
         # elif PointGoalSensor.cls_uuid in observations:
         #     target_encoding = observations[PointGoalSensor.cls_uuid]
         # elif ImageGoalSensor.cls_uuid in observations:
         #     image_goal = observations[ImageGoalSensor.cls_uuid]
         #     target_encoding = self.goal_visual_encoder({"rgb": image_goal})
-
-        # x = [target_encoding]
+        # else:
+        #     target_encoding = None
+        # if not self.is_blind:
+        #     perception_embed = self.visual_encoder(observations)
+        #     x = [perception_embed]
+        #     if target_encoding is not None:
+        #         x += [target_encoding]
         #### [gala_kinematic] End of manually adding sensors in there
-        if IntegratedPointGoalGPSAndCompassSensor.cls_uuid in observations:
-            target_encoding = observations[
-                IntegratedPointGoalGPSAndCompassSensor.cls_uuid
-            ]
-
-        elif PointGoalSensor.cls_uuid in observations:
-            target_encoding = observations[PointGoalSensor.cls_uuid]
-        elif ImageGoalSensor.cls_uuid in observations:
-            image_goal = observations[ImageGoalSensor.cls_uuid]
-            target_encoding = self.goal_visual_encoder({"rgb": image_goal})
-        else:
-            target_encoding = None
 
-        if not self.is_blind:
-            perception_embed = self.visual_encoder(observations)
-            x = [perception_embed]
-            if target_encoding is not None:
-                x += [target_encoding]
 
         x_out = torch.cat(x, dim=1)
         x_out, rnn_hidden_states = self.state_encoder(

From 7c21fe5e9e5dc9e394b784edb816433c68ef7e88 Mon Sep 17 00:00:00 2001
From: vincentpierre <vincentpierre@users.noreply.github.com>
Date: Wed, 30 Mar 2022 15:17:11 -0700
Subject: [PATCH 12/12] adding missing quotes

---
 habitat_baselines/config/rearrange/gala_kinematic_ddppo.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/habitat_baselines/config/rearrange/gala_kinematic_ddppo.yaml b/habitat_baselines/config/rearrange/gala_kinematic_ddppo.yaml
index 1e6daeecce..295ba9aa55 100644
--- a/habitat_baselines/config/rearrange/gala_kinematic_ddppo.yaml
+++ b/habitat_baselines/config/rearrange/gala_kinematic_ddppo.yaml
@@ -6,9 +6,9 @@ NUM_CHECKPOINTS: 0
 BATCHED_ENV: True
 OVERLAP_PHYSICS: True
 SAVE_VIDEOS_INTERVAL: 500
-NUM_UPDATES: 61
+NUM_UPDATES: 6100
 NUM_ENVIRONMENTS: 512
-SENSORS: ["DEPTH_SENSOR", "RGB_SENSOR", "POINTGOAL_WITH_GPS_COMPASS_SENSOR", EE_POS_SENSOR]
+SENSORS: ["DEPTH_SENSOR", "RGB_SENSOR", "POINTGOAL_WITH_GPS_COMPASS_SENSOR", "EE_POS_SENSOR"]
 SIMULATOR:
   AGENTS: ['AGENT_0']
   AGENT_0: