Skip to content

Commit

Permalink
Merge branch 'crossQ' of https://github.com/BY571/rl into crossQ
Browse files Browse the repository at this point in the history
  • Loading branch information
vmoens committed Jul 8, 2024
2 parents 68a1a9f + 35c7a98 commit c04eb3b
Showing 1 changed file with 3 additions and 11 deletions.
14 changes: 3 additions & 11 deletions torchrl/objectives/crossq.py
Original file line number Diff line number Diff line change
Expand Up @@ -521,12 +521,15 @@ def actor_loss(
log_prob = dist.log_prob(a_reparm)

td_q = tensordict.select(*self.qvalue_network.in_keys, strict=False)
self.qvalue_network.eval()
td_q.set(self.tensor_keys.action, a_reparm)
td_q = self._vmap_qnetworkN0(
td_q,
self._cached_detached_qvalue_params,
)

min_q = td_q.get(self.tensor_keys.state_action_value).min(0)[0].squeeze(-1)
self.qvalue_network.train()

if log_prob.shape != min_q.shape:
raise RuntimeError(
Expand All @@ -550,17 +553,6 @@ def qvalue_loss(
next_tensordict.set(self.tensor_keys.action, next_action)
next_sample_log_prob = next_dist.log_prob(next_action)

# TODO: separate forward pass seems faster than the combined.
# next_state_action_value = self._vmap_qnetworkN0(
# next_tensordict.select(*self.qvalue_network.in_keys, strict=False),
# self.qvalue_network_params,
# ).get(self.tensor_keys.state_action_value)

# current_state_action_value = self._vmap_qnetworkN0(
# tensordict.select(*self.qvalue_network.in_keys, strict=False),
# self.qvalue_network_params,
# ).get(self.tensor_keys.state_action_value)

combined = torch.cat(
[
tensordict.select(*self.qvalue_network.in_keys, strict=False),
Expand Down

0 comments on commit c04eb3b

Please sign in to comment.