-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcpu_thread.py
119 lines (102 loc) · 4.09 KB
/
cpu_thread.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import os
import gym
import time
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import animation
from parameters import parameters
def preprocess_state(state):
return np.clip(state, -10, 10).astype('float32')
def process_reward(reward):
return 0.01*reward
def generate_game(env, pid, process_queue, common_dict):
observation = env.reset()
done = False
reward_list = []
action_list = []
observation_list = []
prob_list = []
while not done:
observation_list.append(observation)
process_queue.put((pid, observation))
while pid not in common_dict:
time.sleep(0.0001)
action, prob = common_dict[pid]
del common_dict[pid]
observation, reward, done, _ = env.step(np.clip(action, -1, 1))
action_list.append(action)
prob_list.append(prob)
reward_list.append(process_reward(reward))
if reward != -100:
reward_list[-1] = reward_list[-1]/(1-parameters.GAMMA)
print('Distance: {0:5.1f}'.format(np.sum(observation_list, 0)[2]), flush=True)
for i in range(len(reward_list) - 2, -1, -1):
reward_list[i] += reward_list[i + 1] * parameters.GAMMA # compute the discounted obtained reward for each step
if reward != -100:
return observation_list[:-20], reward_list[:-20], action_list[:-20], prob_list[:-20]
return observation_list, reward_list, action_list, prob_list
def play_to_gif(env, pid, process_queue, common_dict):
while 'epoch' not in common_dict:
time.sleep(0.001)
while True:
if common_dict['epoch'] % 25 == 0:
episode = common_dict['epoch']
observation = env.reset()
frames = []
done = False
while not done:
process_queue.put((pid, observation))
while pid not in common_dict:
time.sleep(0.0001)
action, prob = common_dict[pid]
del common_dict[pid]
observation, _, done, _ = env.step(np.clip(action, -1, 1))
frames.append(env.render(mode='rgb_array'))
display_frames_as_gif(frames, 'Episode {}.gif'.format(episode))
del frames
time.sleep(0.1)
def play(env, pid, process_queue, common_dict):
while True:
observation = env.reset()
done = False
while not done:
process_queue.put((pid, observation))
while pid not in common_dict:
time.sleep(0.0001)
action, prob = common_dict[pid]
del common_dict[pid]
observation, _, done, _ = env.step(np.clip(action, -1, 1))
env.render()
def display_frames_as_gif(frames, name):
"""
Displays a list of frames as a gif, with controls
"""
plt.figure(figsize=(frames[0].shape[1] / 72.0, frames[0].shape[0] / 72.0), dpi=72)
patch = plt.imshow(frames[0])
plt.axis('off')
plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
def animate(i):
patch.set_data(frames[i])
anim = animation.FuncAnimation(plt.gcf(), animate, frames=len(frames), interval=33)
anim.save('gifs/' + name, writer=animation.PillowWriter(fps=40))
def cpu_thread(render, memory_queue, process_queue, common_dict, core):
import psutil
p = psutil.Process()
p.cpu_affinity([core])
import signal
signal.signal(signal.SIGINT, signal.SIG_IGN)
try:
env = gym.make("BipedalWalker-v2")
pid = os.getpid()
print('process started with pid: {} on core {}'.format(os.getpid(), core), flush=True)
if render == 0:
while True:
observation_list, reward_list, action_list, prob_list = generate_game(env, pid, process_queue, common_dict)
for i in range(len(observation_list)):
memory_queue.put((observation_list.pop(), reward_list.pop(), action_list.pop(), prob_list.pop()))
elif render == 1:
play_to_gif(env, pid, process_queue, common_dict)
elif render == 2:
play(env, pid, process_queue, common_dict)
except Exception as e:
print(e, flush=True)