Skip to content

Commit

Permalink
fixed to work with modern Pytorch
Browse files Browse the repository at this point in the history
  • Loading branch information
jim committed Aug 25, 2020
1 parent 7e2f170 commit ead9d92
Show file tree
Hide file tree
Showing 5 changed files with 12 additions and 12 deletions.
8 changes: 4 additions & 4 deletions 1_dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def run_episode(episode, env):
while True:
# env.render()
action = select_action(FloatTensor([state]))
next_state, reward, done, _ = env.step(action[0, 0])
next_state, reward, done, _ = env.step(action[0, 0].item())

# negative reward when attempt ends
if done:
Expand Down Expand Up @@ -136,14 +136,14 @@ def learn():

batch_state = Variable(torch.cat(batch_state))
batch_action = Variable(torch.cat(batch_action))
batch_reward = Variable(torch.cat(batch_reward))
batch_reward = Variable(torch.cat(batch_reward)).unsqueeze(-1)
batch_next_state = Variable(torch.cat(batch_next_state))

# current Q values are estimated by NN for all actions
current_q_values = model(batch_state).gather(1, batch_action)
# expected Q values are estimated from actions which gives maximum Q value
max_next_q_values = model(batch_next_state).detach().max(1)[0]
expected_q_values = batch_reward + (GAMMA * max_next_q_values)
expected_q_values = batch_reward + (GAMMA * max_next_q_values).unsqueeze(-1)

# loss is measured from error between current and newly expected Q values
loss = F.smooth_l1_loss(current_q_values, expected_q_values)
Expand All @@ -161,7 +161,7 @@ def botPlay():
frame = env.render(mode='rgb_array')
frames.append(frame)
action = select_action(FloatTensor([state]))
next_state, reward, done, _ = env.step(action[0, 0])
next_state, reward, done, _ = env.step(action[0, 0].item())

state = next_state
steps += 1
Expand Down
8 changes: 4 additions & 4 deletions 2_double_dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def run_episode(episode, env):
while True:
# env.render()
action = select_action(FloatTensor([state]))
next_state, reward, done, _ = env.step(action[0, 0])
next_state, reward, done, _ = env.step(action[0, 0].item())

# negative reward when attempt ends
if done:
Expand Down Expand Up @@ -138,14 +138,14 @@ def learn():

batch_state = Variable(torch.cat(batch_state))
batch_action = Variable(torch.cat(batch_action))
batch_reward = Variable(torch.cat(batch_reward))
batch_reward = Variable(torch.cat(batch_reward)).unsqueeze(-1)
batch_next_state = Variable(torch.cat(batch_next_state))

# current Q values are estimated by NN for all actions
current_q_values = model(batch_state).gather(1, batch_action)
# expected Q values are estimated from actions which gives maximum Q value
max_next_q_values = target(batch_next_state).detach().max(1)[0]
expected_q_values = batch_reward + (GAMMA * max_next_q_values)
expected_q_values = batch_reward + (GAMMA * max_next_q_values).unsqueeze(-1)

# loss is measured from error between current and newly expected Q values
loss = F.smooth_l1_loss(current_q_values, expected_q_values)
Expand All @@ -163,7 +163,7 @@ def botPlay():
frame = env.render(mode='rgb_array')
frames.append(frame)
action = select_action(FloatTensor([state]))
next_state, reward, done, _ = env.step(action[0, 0])
next_state, reward, done, _ = env.step(action[0, 0].item())

state = next_state
steps += 1
Expand Down
8 changes: 4 additions & 4 deletions 3_dueling_dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def run_episode(episode, env):
while True:
# env.render()
action = select_action(FloatTensor([state]))
next_state, reward, done, _ = env.step(action[0, 0])
next_state, reward, done, _ = env.step(action[0, 0].item())

# negative reward when attempt ends
if done:
Expand Down Expand Up @@ -137,14 +137,14 @@ def learn():

batch_state = Variable(torch.cat(batch_state))
batch_action = Variable(torch.cat(batch_action))
batch_reward = Variable(torch.cat(batch_reward))
batch_reward = Variable(torch.cat(batch_reward)).unsqueeze(-1)
batch_next_state = Variable(torch.cat(batch_next_state))

# current Q values are estimated by NN for all actions
current_q_values = model(batch_state).gather(1, batch_action)
# expected Q values are estimated from actions which gives maximum Q value
max_next_q_values = model(batch_next_state).detach().max(1)[0]
expected_q_values = batch_reward + (GAMMA * max_next_q_values)
expected_q_values = batch_reward + (GAMMA * max_next_q_values).unsqueeze(-1)

# loss is measured from error between current and newly expected Q values
loss = F.smooth_l1_loss(current_q_values, expected_q_values)
Expand All @@ -162,7 +162,7 @@ def botPlay():
frame = env.render(mode='rgb_array')
frames.append(frame)
action = select_action(FloatTensor([state]))
next_state, reward, done, _ = env.step(action[0, 0])
next_state, reward, done, _ = env.step(action[0, 0].item())

state = next_state
steps += 1
Expand Down
Binary file added 4_policy_gradient_play.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added 4_policy_gradient_score.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit ead9d92

Please sign in to comment.