This repository has been archived by the owner on May 6, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathruntests.jl
93 lines (84 loc) · 3.21 KB
/
runtests.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
using ReinforcementLearningZoo
using Test
using ReinforcementLearningBase
using ReinforcementLearningCore
using ReinforcementLearningEnvironments
using Flux
using Statistics
using Random
using OpenSpiel
@testset "ReinforcementLearningZoo.jl" begin
@testset "training" begin
mktempdir() do dir
for method in (:BasicDQN, :DQN, :PrioritizedDQN, :Rainbow, :IQN, :VPG)
res = run(Experiment(
Val(:JuliaRL),
Val(method),
Val(:CartPole),
nothing;
save_dir = joinpath(dir, "CartPole", string(method)),
))
@info "stats for $method" avg_reward = mean(res.hook[1].rewards) avg_fps =
1 / mean(res.hook[2].times)
end
for method in (:BasicDQN, :DQN)
res = run(Experiment(
Val(:JuliaRL),
Val(method),
Val(:MountainCar),
nothing;
save_dir = joinpath(dir, "MountainCar", string(method)),
))
@info "stats for $method" avg_reward = mean(res.hook[1].rewards) avg_fps =
1 / mean(res.hook[2].times)
end
for method in (:A2C, :A2CGAE, :PPO)
res = run(Experiment(
Val(:JuliaRL),
Val(method),
Val(:CartPole),
nothing;
save_dir = joinpath(dir, "CartPole", string(method)),
))
@info "stats for $method" avg_reward =
mean(Iterators.flatten(res.hook[1].rewards))
end
for method in (:DDPG, :SAC, :TD3)
res = run(Experiment(
Val(:JuliaRL),
Val(method),
Val(:Pendulum),
nothing;
save_dir = joinpath(dir, "Pendulum", string(method)),
))
@info "stats for $method" avg_reward =
mean(Iterators.flatten(res.hook[1].rewards))
end
end
end
@testset "run pretrained models" begin
for x in ("JuliaRL_BasicDQN_CartPole",)
e = Experiment(x)
e.agent.policy = load_policy(x)
Flux.testmode!(e.agent)
run(e.agent, e.env, StopAfterEpisode(1), e.hook)
@info "result of evaluating pretrained model: $x for once:" reward =
e.hook[1].rewards[end]
end
end
@testset "minimax" begin
e = E`JuliaRL_Minimax_OpenSpiel(tic_tac_toe)`
run(e)
@test e.hook[1].rewards[end] == e.hook[2].rewards[end] == 0.0
end
@testset "TabularCFR" begin
e = E`JuliaRL_TabularCFR_OpenSpiel(kuhn_poker)`
run(e)
@test isapprox(mean(e.hook[2].rewards), -1 / 18; atol = 0.01)
@test isapprox(mean(e.hook[3].rewards), 1 / 18; atol = 0.01)
reset!(e.env)
expected_values = Dict(expected_policy_values(e.agent, e.env))
@test isapprox(expected_values[get_role(e.agent[2])], -1 / 18; atol = 0.01)
@test isapprox(expected_values[get_role(e.agent[3])], 1 / 18; atol = 0.01)
end
end