[Bug Report] MARL Quadcopter Reset function problem #1625

JulienHansen · 2024-12-24T23:03:13Z

JulienHansen
Dec 24, 2024

Hi everyone,

I'm not sure why, but at some point during my training, the pre_physic_step function starts generating NaN values. I've checked and rechecked the code, and the values seem to appear out of nowhere. I checked for division by zero, but none of my values are close to zero. Has anyone experienced this behavior before?

I might have misunderstood how the MARL workflow is intended to be designed, as I'm new to experimenting with it. My goal is to create a multi-agent formation task for drones. Currently, my drones spawn, but they disappear as soon as the NaN values appear.

Here is my first attempt at designing the task. Many functions are still incomplete, as I’m just trying to figure out why the actions are producing NaN values. Could the issue come from the reset_index function?

Thanks for your help!

class formationEnv(DirectMARLEnv):
    cfg: formationEnvCfg

    def __init__(self, cfg: formationEnvCfg, render_mode: str | None = None, **kwargs):
        super().__init__(cfg, render_mode, **kwargs)
        print("Initialization start")

        # Initialize robots, cameras, and terrain for multi-agent setup
        self._robots = [self.scene[f"robot_{i+1}"] for i in range(len(self.cfg.possible_agents))]
        print(f"Robots initialized: {self._robots}")

        self.robot1 = self.scene["robot_1"]
        self.robot2 = self.scene["robot_2"]
        self.robot3 = self.scene["robot_3"]
        self.robot4 = self.scene["robot_4"]
        self.robot5 = self.scene["robot_5"]
        self.robot6 = self.scene["robot_6"]
        self._terrain = self.scene["terrain"]

        print("Terrain and robots set")

        num_agents = len(self.cfg.possible_agents)
        self._actions = torch.zeros(self.num_envs, num_agents, self.cfg.individual_action_space, device=self.device)
        self._thrust = torch.zeros(self.num_envs, num_agents, 3, device=self.device)
        self._moment = torch.zeros(self.num_envs, num_agents, 3, device=self.device)

        print("Actions, thrust, and moments initialized")

        # Logging for each robot
        self._episode_sums = {
            key: torch.zeros(self.num_envs, num_agents, dtype=torch.float, device=self.device)
            for key in ["lin_vel", "ang_vel", "distance_to_goal"]
        }

        print("Episode sums initialized")

        self._body_id1 = self.robot1.find_bodies("body")[0]
        self._body_id2 = self.robot2.find_bodies("body")[0]
        self._body_id3 = self.robot3.find_bodies("body")[0]
        self._body_id4 = self.robot4.find_bodies("body")[0]
        self._body_id5 = self.robot5.find_bodies("body")[0]
        self._body_id6 = self.robot6.find_bodies("body")[0]

        print("Body IDs set for all robots")

        self._robot_mass = self.robot1.root_physx_view.get_masses()[0].sum()
        self._gravity_magnitude = torch.tensor(self.sim.cfg.gravity, device=self.device).norm()
        self._robot_weight = (self._robot_mass * self._gravity_magnitude).item()

        print(f"Gravity and robot weight computed: {self._robot_weight}")

        # Debug visualization
        self.set_debug_vis(self.cfg.debug_vis)
        print("Debug visualization set")

    def _get_observations(self) -> dict[str, torch.Tensor]:
        observations = {}
        for i, robot in enumerate(self._robots):
            state = torch.zeros(60, device=self.device)  # Dummy state
            if torch.any(torch.isnan(state)):
                print(f"Warning: NaN detected in observation for robot_{i+1}. Replacing with zeros.")
                state = torch.zeros_like(state)
            observations[f"robot_{i+1}"] = state
        return observations

    def _get_states(self) -> torch.Tensor:
        states = torch.zeros(self.num_envs, self.cfg.state_space, device=self.device)
        return states

    def _get_rewards(self) -> dict[str, torch.Tensor]:
        rewards = torch.zeros(self.num_envs, len(self._robots), device=self.device)
        if torch.any(torch.isnan(rewards)):
            print("Warning: NaN detected in rewards. Replacing with zeros.")
            rewards = torch.zeros_like(rewards)
        rewards_dict = {
            f"robot_{i+1}": rewards[:, i]
            for i in range(len(self._robots))
        }
        return rewards_dict

    def _get_dones(self) -> tuple[dict[str, torch.Tensor], dict[str, torch.Tensor]]:
        time_out = self.episode_length_buf >= self.max_episode_length - 1
        terminated = {}
        time_outs = {}

        for i, agent_name in enumerate(self.cfg.possible_agents):
            low_altitude = self._robots[i].data.root_pos_w[:, 2] < 0.1
            high_altitude = self._robots[i].data.root_pos_w[:, 2] > 100
            altitude_violation = torch.logical_or(low_altitude, high_altitude)

            x_constraint = self._robots[i].data.root_pos_w[:, 0] < -22

            done_condition = torch.logical_or(altitude_violation, x_constraint)
            terminated[agent_name] = done_condition
            time_outs[agent_name] = time_out

        return terminated, time_outs
    '''
    def _pre_physics_step(self, actions: dict[str, torch.Tensor]) -> None:
        print("Pre-physics step")
        for i, action_key in enumerate(actions.keys()):
            action = actions[action_key]
            print(f"Action for {action_key}: {action}")

            temp_act = action.clone().clamp(-1.0, 1.0)
            self._actions[:, i, :] = temp_act
            self._thrust[:, i, 2] = (self.cfg.thrust_to_weight * self._robot_weight * (temp_act[:, 0] + 1.0) / 2.0)
            self._moment[:, i, :] = self.cfg.moment_scale * temp_act[:, 1:]

            print(f"Thrust for {action_key}: {self._thrust[:, i, 2]}")
            print(f"Moment for {action_key}: {self._moment[:, i, :]}")
    '''
    def _pre_physics_step(self, actions: dict[str, torch.tensor]) -> None:
        action = actions["robot_1"]
        temp_act = action.clone().clamp(-1.0, 1.0)
        self._actions[:, 0, :] = temp_act
        self._thrust[:, 0, 2] = (self.cfg.thrust_to_weight * self._robot_weight * (temp_act[:, 0] + 1.0) / 2.0)
        self._moment[:, 0, :] = self.cfg.moment_scale * temp_act[:, 1:]
        print("Pre-physics step")
        print("Trust:",self._thrust[:, 0, 2])
        print("Moment:",self._moment[:, 0, :])
        print("Actions",action)
        print("Pre-physics step done")

    def _apply_action(self) -> None:
        self.robot1.set_external_force_and_torque(self._thrust[:, self._body_id1, :], self._moment[:, self._body_id1, :], body_ids=self._body_id1)
        self.robot2.set_external_force_and_torque(self._thrust[:, self._body_id2, :], self._moment[:, self._body_id2, :], body_ids=self._body_id2)
        self.robot3.set_external_force_and_torque(self._thrust[:, self._body_id3, :], self._moment[:, self._body_id3, :], body_ids=self._body_id3)
        self.robot4.set_external_force_and_torque(self._thrust[:, self._body_id4, :], self._moment[:, self._body_id4, :], body_ids=self._body_id4)
        self.robot5.set_external_force_and_torque(self._thrust[:, self._body_id5, :], self._moment[:, self._body_id5, :], body_ids=self._body_id5)
        self.robot6.set_external_force_and_torque(self._thrust[:, self._body_id6, :], self._moment[:, self._body_id6, :], body_ids=self._body_id6)

    
    def _reset_idx(self, env_ids: Sequence[int] | torch.Tensor | None):
        print("Reset start")
        if env_ids is None or len(env_ids) == self.num_envs:
            env_ids = self.robot1._ALL_INDICES

        super()._reset_idx(env_ids)

        for i, agent_name in enumerate(self.cfg.possible_agents):

            joint_pos = self._robots[i].data.default_joint_pos[env_ids]
            joint_vel = self._robots[i].data.default_joint_vel[env_ids]
            default_root_state = self._robots[i].data.default_root_state[env_ids]
            default_root_state[:, :3] += self._terrain.env_origins[env_ids]
            self._robots[i].write_root_pose_to_sim(default_root_state[:, :7], env_ids)
            self._robots[i].write_root_velocity_to_sim(default_root_state[:, 7:], env_ids)
            self._robots[i].write_joint_state_to_sim(joint_pos, joint_vel, None, env_ids)
        print("Reset done")

and below the values of my actions, and i have check again but the others functions don't modify the actions values

Pre-physics step
Trust: tensor([0.0665, 0.0665], device='cuda:0')
Moment: tensor([[-0.0083, -0.0009,  0.0012],
        [-0.0083, -0.0009,  0.0012]], device='cuda:0')
Actions tensor([[-0.7469, -0.8330, -0.0936,  0.1228]], device='cuda:0')
Pre-physics step done
Pre-physics step
Trust: tensor([nan, nan], device='cuda:0')
Moment: tensor([[nan, nan, nan],
        [nan, nan, nan]], device='cuda:0')
Actions tensor([[nan, nan, nan, nan]], device='cuda:0')
Pre-physics step done

Answered by JulienHansen

Feb 3, 2025

I just found the error , the reset function was not correct and was causing Nan value to be generated, here is a corrected snippet of the corrected version :

    def _reset_idx(self, env_ids: Sequence[int] | torch.Tensor | None):
        print("Reset start")
        if env_ids is None:
            env_ids = self._robot1._ALL_INDICES

        if len(env_ids) == self.num_envs:
            self.episode_length_buf = torch.randint_like(self.episode_length_buf, high=int(self.max_episode_length))

        # Reset robots
        self._robot1.reset(env_ids)
        self._robot2.reset(env_ids)
        self._robot3.reset(env_ids)
        self._robot4.reset(env_ids)
        self._robot5.reset(env_ids)…

View full answer

RandomOakForest · 2025-01-04T00:30:44Z

RandomOakForest
Jan 4, 2025
Maintainer

Thank you for posting this. This may be related to the batch sizes available per GPU and learners assigned to them. I'm moving this into a discussion for the team to follow up. In the meantime, could you try less robots and ensure your batch sizes are of reasonable size?

3 replies

JulienHansen Jan 6, 2025
Author

I tried different batch sizes, but the issue persisted. After further testing, it seems that the problem occurs when the number of environments is greater than 1. For a single environment with 6 drones, everything runs fine. However, when I try with 5 environments or even 2, the issue appears after 1 or 2 seconds, and NaN values start to show up.

Here is the value of my actions after going through every function

Pre-physics step
Actions tensor([[-0.7469, -0.8330, -0.0936,  0.1228]], device='cuda:0')
Pre-physics step done
Apply action start
Actions in apply action tensor([-0.7469, -0.8330, -0.0936,  0.1228], device='cuda:0')
Apply action done
Apply action start
Actions in apply action tensor([-0.7469, -0.8330, -0.0936,  0.1228], device='cuda:0')
Apply action done
Get dones start
Actions in get done tensor([-0.7469, -0.8330, -0.0936,  0.1228], device='cuda:0')
Get dones done
Get rewards start
Actions in get reward tensor([-0.7469, -0.8330, -0.0936,  0.1228], device='cuda:0')
Get rewards done
Get observations start
Actions in get obs tensor([-0.7469, -0.8330, -0.0936,  0.1228], device='cuda:0')
Get observations done
  0%|                                                                                                                                               | 16/36000 [00:00<39:04, 15.35it/s]Pre-physics step
Actions tensor([[nan, nan, nan, nan]], device='cuda:0')
Pre-physics step done
Apply action start
Actions in apply action tensor([nan, nan, nan, nan], device='cuda:0')
Apply action done
Apply action start
Actions in apply action tensor([nan, nan, nan, nan], device='cuda:0')
Apply action done
Get dones start
Actions in get done tensor([nan, nan, nan, nan], device='cuda:0')
Get dones done
Get rewards start
Actions in get reward tensor([nan, nan, nan, nan], device='cuda:0')
Get rewards done
Get observations start
Actions in get obs tensor([nan, nan, nan, nan], device='cuda:0')
Get observations done

i don't get it it doesn't seems to come from one of my functions.

JulienHansen Jan 7, 2025
Author

With more testing from all the functions everything seems to work fined with only 1 environment the drone don't seems to have any kind of issues thus i was wondering when i have multiple environment if i was doing something wrong either on my tensors definitions

        num_agents = len(self.cfg.possible_agents)
        self._actions = torch.zeros(self.num_envs, num_agents, self.cfg.individual_action_space, device=self.device)
        self._thrust = torch.zeros(self.num_envs, num_agents, 3, device=self.device)
        self._moment = torch.zeros(self.num_envs, num_agents, 3, device=self.device)

Or in my pre physic logic:

    def _pre_physics_step(self, actions: dict[str, torch.Tensor]) -> None:
        print("Pre-physics step")
        for i, action_key in enumerate(actions.keys()):
            action = actions[action_key]
            print(f"Action for {action_key}: {action}")

            temp_act = action.clone().clamp(-1.0, 1.0)
            self._actions[:, i, :] = temp_act
            self._thrust[:, i, 2] = (self.cfg.thrust_to_weight * self._robot_weight * (temp_act[:, 0] + 1.0) / 2.0)
            self._moment[:, i, :] = self.cfg.moment_scale * temp_act[:, 1:]

Because i only try to extend the actual quadcopter configuration available to a MARL Set-up. I really can't see/understand why it works with 1 envs but explode otherwise

Rishi-V Jan 8, 2025

I checked over your code and your _pre_physics_step and _apply_action look correct.
I just realized though that you are not defining the _setup_scene function. Could you add the _setup_scene function (should be fairly straightforward by following the examples) and see if that resolves you issue?

Rishi-V · 2025-01-04T06:50:31Z

Rishi-V
Jan 4, 2025

I've been playing around with a custom MARL set-up with two quadrupeds and I have not encountered any NaN issues. Could you try pdb-ing or print out every line to determine where the first NaN is occurring?

3 replies

JulienHansen Jan 6, 2025
Author

after a certain iteration as shown in muy reply above the Nan occured, i was wondering what algorithm/library are you using to train your set-up ?
Thanks in advance

Rishi-V Jan 6, 2025

I've been using skrl with PPO and IPPO. Note I had to make comment out some lines in SKRL to get IPPO working (see Toni-SM/skrl#238 (comment)).

JulienHansen Jan 7, 2025
Author

thanks i will try with IPPO maybe it has something to do with MAPPO but i don't see what it would be the case

RandomOakForest · 2025-01-10T13:55:07Z

RandomOakForest
Jan 10, 2025
Maintainer

Thanks for following up, great suggestions @Rishi-V. @JulienHansen, let us know if still have this issue.

5 replies

JulienHansen Jan 14, 2025
Author

What i don't understand is that the Nan values are always generated after 16 steps

Get observations done
  0%|                                                                                                          | 16/36000 [00:00<41:09, 14.57it/s]Pre-physics step
Action for robot_1: tensor([[nan, nan, nan, nan]], device='cuda:0')
Action for robot_2: tensor([[nan, nan, nan, nan]], device='cuda:0')
Action for robot_3: tensor([[nan, nan, nan, nan]], device='cuda:0')
Action for robot_4: tensor([[nan, nan, nan, nan]], device='cuda:0')
Action for robot_5: tensor([[nan, nan, nan, nan]], device='cuda:0')
Action for robot_6: tensor([[nan, nan, nan, nan]], device='cuda:0')

Could it come from the MAPPO skrl config file ?

RandomOakForest Jan 23, 2025
Maintainer

Interesting. Could you please check that if you are resetting position, that velocity is also reset? also that previous PhsyX states don’t carry over? Thanks.

JulienHansen Jan 24, 2025
Author

I just check and i'm wondering, in MARL envs is the step function necessary ? In the documentation it is said that:

"For vectorized environments, it is recommended to only call the [reset()] method once before the first call to step i.e. after the 
environment is created. After that, the step function handles  the reset of terminated sub-environments. This is because the 
simulator does not support resetting individual sub-environments  in a vectorized environment."

And that's what i observe, my reset function is only call at the beginning of my simulation and then never again. I observe that the 16th iteration step correspond to the moment the drone collide with the ground so i think it should be the iteration where the environments are reset. Following that i have some questions:

Is the step function necessary for MARL envionment ? If yes, why does it work with 1 environment and why does the shadow_hand example in the repo not needing it ?
Could it come from my reset function ? I try to generalise the reset function that was given in the quadcopter single agent case

    def _reset_idx(self, env_ids: Sequence[int] | torch.Tensor | None):
        print("Reset start")
        if env_ids is None:
            env_ids = self._robot1._ALL_INDICES

        if len(env_ids) == self.num_envs:
            # Spread out the resets to avoid spikes in training when many environments reset at a similar time
            self.episode_length_buf = torch.randint_like(self.episode_length_buf, high=int(self.max_episode_length))

        # Reset all robots in their respective environments
        self._robot1.reset(env_ids)
        self._robot2.reset(env_ids)
        self._robot3.reset(env_ids)
        self._robot4.reset(env_ids)
        self._robot5.reset(env_ids)
        self._robot6.reset(env_ids)

        super()._reset_idx(env_ids)
        self._actions[env_ids] = 0.0

        # Reset states and write them to simulation for each robot
        print("============================================")
        print("Resetting robot 1")
        joint_pos1 = self._robot1.data.default_joint_pos[env_ids]
        joint_vel1 = self._robot1.data.default_joint_vel[env_ids]
        default_root_state1 = self._robot1.data.default_root_state[env_ids]
        default_root_state1[:, :3] += self._terrain.env_origins[env_ids]
        self._robot1.write_root_pose_to_sim(default_root_state1[:, :7], env_ids)
        self._robot1.write_root_velocity_to_sim(default_root_state1[:, 7:], env_ids)
        self._robot1.write_joint_state_to_sim(joint_pos1, joint_vel1, None, env_ids)

        print("Resetting robot 2")
        joint_pos2 = self._robot2.data.default_joint_pos[env_ids]
        joint_vel2 = self._robot2.data.default_joint_vel[env_ids]
        default_root_state2 = self._robot2.data.default_root_state[env_ids]
        default_root_state2[:, :3] += self._terrain.env_origins[env_ids]
        self._robot2.write_root_pose_to_sim(default_root_state2[:, :7], env_ids)
        self._robot2.write_root_velocity_to_sim(default_root_state2[:, 7:], env_ids)
        self._robot2.write_joint_state_to_sim(joint_pos2, joint_vel2, None, env_ids)
``
Of course i did the same for the other robot, i also tried a version more compact but for debugging simplicity i made this one. 
Thanks in advance i think that's the source of my problem and i don't really picture the differences between the step function and reset function.

JulienHansen Feb 3, 2025
Author

I just found the error , the reset function was not correct and was causing Nan value to be generated, here is a corrected snippet of the corrected version :

    def _reset_idx(self, env_ids: Sequence[int] | torch.Tensor | None):
        print("Reset start")
        if env_ids is None:
            env_ids = self._robot1._ALL_INDICES

        if len(env_ids) == self.num_envs:
            self.episode_length_buf = torch.randint_like(self.episode_length_buf, high=int(self.max_episode_length))

        # Reset robots
        self._robot1.reset(env_ids)
        self._robot2.reset(env_ids)
        self._robot3.reset(env_ids)
        self._robot4.reset(env_ids)
        self._robot5.reset(env_ids)
        self._robot6.reset(env_ids)

        super()._reset_idx(env_ids)
        self._actions[env_ids] = 0.0

        # For each robot, clone default states and add the environment origins (moved to the proper device)
        print("============================================")
        print("Resetting robot 1")
        joint_pos1 = self._robot1.data.default_joint_pos[env_ids]
        joint_vel1 = self._robot1.data.default_joint_vel[env_ids]
        default_root_state1 = self._robot1.data.default_root_state[env_ids].clone()
        default_root_state1[:, :3] += self._terrain.env_origins[env_ids].to(self.device)
        self._robot1.write_root_pose_to_sim(default_root_state1[:, :7], env_ids)
        self._robot1.write_root_velocity_to_sim(default_root_state1[:, 7:], env_ids)
        self._robot1.write_joint_state_to_sim(joint_pos1, joint_vel1, None, env_ids)

Sorry for the time taken and thank you for your help !

Answer selected by JulienHansen

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[Bug Report] MARL Quadcopter Reset function problem #1625

{{title}}

Replies: 3 comments 11 replies

{{title}}

{{title}}

{{title}}

{{title}}

{{editor}}'s edit

{{editor}}'s edit

{{title}}

{{title}}

{{title}}

{{title}}

{{title}}

{{title}}

{{title}}

{{title}}

{{title}}

Select a reply

[Bug Report] MARL Quadcopter Reset function problem #1625

JulienHansen Dec 24, 2024

Replies: 3 comments · 11 replies

RandomOakForest Jan 4, 2025 Maintainer

JulienHansen Jan 6, 2025 Author

JulienHansen Jan 7, 2025 Author

Rishi-V Jan 8, 2025

Rishi-V Jan 4, 2025

JulienHansen Jan 6, 2025 Author

Rishi-V Jan 6, 2025

JulienHansen Jan 7, 2025 Author

RandomOakForest Jan 10, 2025 Maintainer

JulienHansen Jan 14, 2025 Author

RandomOakForest Jan 23, 2025 Maintainer

JulienHansen Jan 24, 2025 Author

JulienHansen Feb 3, 2025 Author

JulienHansen
Dec 24, 2024

Replies: 3 comments 11 replies

RandomOakForest
Jan 4, 2025
Maintainer

JulienHansen Jan 6, 2025
Author

JulienHansen Jan 7, 2025
Author

Rishi-V
Jan 4, 2025

JulienHansen Jan 6, 2025
Author

JulienHansen Jan 7, 2025
Author

RandomOakForest
Jan 10, 2025
Maintainer

JulienHansen Jan 14, 2025
Author

RandomOakForest Jan 23, 2025
Maintainer

JulienHansen Jan 24, 2025
Author

JulienHansen Feb 3, 2025
Author