-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSurvialDQN.py
139 lines (128 loc) · 4.8 KB
/
SurvialDQN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# -------------------------
# Project: Deep Q-Learning on Flappy Bird
# Author: Flood Sung
# Date: 2016.3.21
# -------------------------
from game.Game_env import Game
from BrainDQN_Nature import BrainDQN
from map.map_info import map
import logging
from BrainDQN_Run import BrainDQNRun
from Pos import Pos
from Accuracy import AccuracyJudges
def playSurvival():
logging.basicConfig(filename='logs/myplayer.log', level=logging.INFO)
logging.info('Started')
# 9个方向
action_space = ['u', 'd', 'l', 'r', 'a', 'x', 's', 'w', 'e']
n_actions = len(action_space)
train = BrainDQN(n_actions)
# 初始化随机地图类
genermap = map()
TrainGame = Game()
for i in range(6000000):
loop = 0
loop += int(i / 1000000)
print("loop:", loop)
logging.info('loop %d,i %d', loop, i)
# 初始化随机地图信息
genermap.init_battle_map()
reservelist = genermap.fillramdomplayer(loop)
print("Posion start")
#产生毒气
posionlist = genermap.GeneratorPosion(i)
print("Posion end")
#产生道具
genermap.GeneratorTool(reservelist,posionlist,i)
# 初始化Game环境
TrainGame.binary_env_reset(genermap.cacheMap,genermap.PosionMap)
train.setInitState(TrainGame.binary_env)
# 循环训练1W次,换一张地图
for episode in range(2000):
# 随机取一个方向
action = train.getAction()
# 计算该方向的reward
nextObservation, reward, terminal = TrainGame.binary_step(action)
# 设置到训练集
train.setPerception(nextObservation, action, reward, terminal)
if terminal == True:
TrainGame.binary_env_reset(genermap.cacheMap,genermap.PosionMap)
train.setInitState(TrainGame.binary_env)
break
def TestTrain():
action_space = ['u', 'd', 'l', 'r', 'a', 'x', 's', 'w', 'e']
n_actions = len(action_space)
# 初始化随机地图类
genermap = map()
TrainGame = Game()
train = BrainDQNRun(n_actions)
Judges = AccuracyJudges()
for i in range(100):
loop = 0
loop += int(i / 1000000)
genermap.init_battle_map()
poisonlist = genermap.GeneratorPosion(i)
genermap.fillramdomplayer(loop)
# 初始化Game环境
TrainGame.binary_env_reset(genermap.cacheMap, genermap.PosionMap)
train.setInitState(TrainGame.binary_env)
done = False
moves = []
observation = TrainGame.binary_env
while done != True:
movetmp,observation_,done = train.getAction(observation)
observation = observation_
if done != True:
moves.append(movetmp)
print("moves:",len(moves))
for i in range(len(moves)):
print(" ",moves[i].x,moves[i].y)
Judges.JudgeIfTheBestPractise(TrainGame.binary_env,moves,genermap.PosionMap)
def PlaySurvalNew():
logging.basicConfig(filename='logs/myplayer.log', level=logging.INFO)
logging.info('Started')
# 9个方向
action_space = ['u', 'd', 'l', 'r', 'a', 'x', 's', 'w', 'e']
n_actions = len(action_space)
train = BrainDQN(n_actions)
# 初始化随机地图类
genermap = map()
TrainGame = Game()
mapnum = genermap.init_battle_map()
for i in range(mapnum):
#计算每张地图循环多少次
looptimes = genermap.calclooptimes(i)
logging.info('loop %d,i %d', looptimes, i)
for j in range(looptimes):
flag = False
index = j
if j >= looptimes/2:
flag = True
index = j - looptimes
# 初始化随机地图信息
reservelist = genermap.fillplayerpositon(index,i,flag)
# 产生毒气
posionlist = genermap.GeneratorPosion(i)
# 产生道具
genermap.GeneratorTool(reservelist, posionlist, i)
# 初始化Game环境
TrainGame.binary_env_reset(genermap.cacheMap, genermap.PosionMap)
train.setInitState(TrainGame.binary_env)
# 循环训练1W次,换一张地图
for episode in range(5000):
# 随机取一个方向
action = train.getAction()
# 计算该方向的reward
nextObservation, reward, terminal = TrainGame.binary_step(action)
# 设置到训练集
train.setPerception(nextObservation, action, reward, terminal)
if terminal == True:
TrainGame.binary_env_reset(genermap.cacheMap, genermap.PosionMap)
train.setInitState(TrainGame.binary_env)
#break
def main():
#playSurvival()
PlaySurvalNew()
#TestTrain()
if __name__ == '__main__':
main()