forked from smearle/control-pcgrl
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathevolve.py
3804 lines (3206 loc) · 136 KB
/
evolve.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import argparse
import gc
import json
import os
import pickle
import pprint
import sys
import time
from datetime import datetime
from timeit import default_timer as timer
from pathlib import Path
from pdb import set_trace as TT
from random import randint
import cv2
from typing import Tuple
import gym
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import psutil
import ray
import scipy
import torch as th
import torch.nn.functional as F
from gym import envs
from numba import njit
from qdpy.phenotype import Fitness, Features
from ribs.archives import GridArchive
from ribs.archives._add_status import AddStatus
from ribs.emitters import (
GradientImprovementEmitter,
ImprovementEmitter,
OptimizingEmitter,
)
from ribs.emitters.opt import CMAEvolutionStrategy
from ribs.optimizers import Optimizer
from ribs.visualize import grid_archive_heatmap
from torch import ByteTensor, Tensor, nn
from torch.nn import Conv2d, CrossEntropyLoss, Linear
from torch.utils.tensorboard import SummaryWriter
import deap
import deap.tools
import deap.algorithms
import qdpy
from qdpy import algorithms, containers, benchmarks, plots, tools
from deap.base import Toolbox
import graphviz
import warnings
import copy
# Use for .py file
from tqdm import tqdm
import gym_pcgrl
from evo_args import get_args
from gym_pcgrl.envs.helper import get_int_prob, get_string_map
# from example_play_call import random_player
# gvgai_path = '/home/sme/GVGAI_GYM/'
# sys.path.insert(0,gvgai_path)
# from play import play
# Use for notebook
# from tqdm.notebook import tqdm
# Use print to confirm access to local pcgrl gym
# print([env.id for env in envs.registry.all() if "gym_pcgrl" in env.entry_point])
"""
/// Required Environment ///
conda create -n ribs-pt python=3.7
pip install scipy==1.2.0 # must use this version with GVGAI_GYM
conda install -c conda-forge notebook
conda install pytorch torchvision torchaudio -c pyth
conda install tensorboard
pip install 'ribs[all]' gym~=0.17.0 Box2D~=2.3.10 tqdm
git clone https://github.com/amidos2006/gym-pcgrl.git
cd gym-pcgrl # Must run in project root folder for access to pcgrl modules
/// Instructions ///
To start TensorBoard run the following command:
$ tensorboard --logdir=runs
Then go to:
http://localhost:6006
/// Resources ///
Sam's example code:
https://github.com/smearle/gol-cmame/blob/master/gol_cmame.py
PCGRL Repo:
https://github.com/amidos2006/gym-pcgrl
Neural CA Paper:
https://arxiv.org/pdf/2009.01398.pdf
RIBS examples:
https://docs.pyribs.org/en/stable/tutorials/lunar_lander.html
"""
TARGETS_PENALTY_WEIGHT = 10
def draw_net(config: object, genome: object, view: object = False, filename: object = None, node_names: object = None, show_disabled: object = True,
prune_unused: object = False,
node_colors: object = None, fmt: object = 'svg') -> object:
""" Receives a genome and draws a neural network with arbitrary topology. """
# Attributes for network nodes.
if graphviz is None:
warnings.warn("This display is not available due to a missing optional dependency (graphviz)")
return
if node_names is None:
node_names = {}
assert type(node_names) is dict
if node_colors is None:
node_colors = {}
assert type(node_colors) is dict
node_attrs = {
'shape': 'circle',
'fontsize': '9',
'height': '0.2',
'width': '0.2'}
dot = graphviz.Digraph(format=fmt, node_attr=node_attrs)
inputs = set()
for k in config.genome_config.input_keys:
inputs.add(k)
name = node_names.get(k, str(k))
input_attrs = {'style': 'filled', 'shape': 'box', 'fillcolor': node_colors.get(k, 'lightgray')}
dot.node(name, _attributes=input_attrs)
outputs = set()
for k in config.genome_config.output_keys:
outputs.add(k)
name = node_names.get(k, str(k))
node_attrs = {'style': 'filled', 'fillcolor': node_colors.get(k, 'lightblue')}
dot.node(name, _attributes=node_attrs)
if prune_unused:
connections = set()
for cg in genome.connections.values():
if cg.enabled or show_disabled:
connections.add(cg.key)
used_nodes = copy.copy(outputs)
pending = copy.copy(outputs)
while pending:
new_pending = set()
for a, b in connections:
if b in pending and a not in used_nodes:
new_pending.add(a)
used_nodes.add(a)
pending = new_pending
else:
used_nodes = set(genome.nodes.keys())
for n in used_nodes:
if n in inputs or n in outputs:
continue
attrs = {'style': 'filled', 'fillcolor': node_colors.get(n, 'white')}
dot.node(str(n), _attributes=attrs)
for cg in genome.connections.values():
if cg.enabled or show_disabled:
#if cg.input not in used_nodes or cg.output not in used_nodes:
# continue
input, output = cg.key
a = node_names.get(input, str(input))
b = node_names.get(output, str(output))
style = 'solid' if cg.enabled else 'dotted'
color = 'green' if cg.weight > 0 else 'red'
width = str(0.1 + abs(cg.weight / 5.0))
dot.edge(a, b, _attributes={'style': style, 'color': color, 'penwidth': width})
dot.render(filename, view=view)
return dot
def save_level_frames(level_frames, model_name):
renders_dir = os.path.join(SAVE_PATH, "renders")
if not os.path.isdir(renders_dir):
os.mkdir(renders_dir)
model_dir = os.path.join(renders_dir, "model_{}".format(model_name))
if not os.path.isdir(model_dir):
os.mkdir(model_dir)
for j, im in enumerate(level_frames):
im.save(
os.path.join(
model_dir, "frame_{:0>4d}.png".format(j)
)
)
def get_qd_score(archive, env, bc_names):
max_loss = env.get_max_loss(ctrl_metrics=bc_names)
max_loss = max_loss * TARGETS_PENALTY_WEIGHT
if ALGO == 'ME':
# qd_score = archive.qd_score() # we need to specify lower *and upper* bounds for this
# TODO: work out max diversity bonus to make this possible ?? Would this bias scores between n. latent seeds
# though?
qd_score = np.nansum(archive.quality_array + max_loss)
else:
df = archive.as_pandas(include_solutions=False)
qd_score = (df['objective'] + max_loss).sum()
return qd_score
def save_train_stats(objs, archive, env, bc_names, itr=None):
train_time_stats = {
"qd_score": get_qd_score(archive, env, bc_names),
"objective": get_stats(objs),
}
if itr is not None:
save_path = os.path.join(SAVE_PATH, "checkpoint_{}".format(itr))
else:
save_path = SAVE_PATH
json.dump(
train_time_stats,
open(os.path.join(save_path, "train_time_stats.json"), "w"),
indent=4,
)
def get_stats(stats):
"""Take 1D numpy array of data and return some fun facts in the form of a dictionary."""
return {
"mean": np.nanmean(stats),
"std": np.nanstd(stats),
"max": np.nanmax(stats),
"min": np.nanmin(stats),
}
def save_grid(csv_name="levels", d=4):
fontsize = 32
if "zelda" in PROBLEM:
d = 3
fontsize = int(fontsize * d / 4)
elif "smb" in PROBLEM:
d = 4
if CMAES:
# TODO: implement me
return
# save grid using csv file
# get path to CSV
levels_path = os.path.join(SAVE_PATH, csv_name + ".csv")
# get env name
env_name = "{}-{}-v0".format(PROBLEM, REPRESENTATION)
# create env
env = gym.make(env_name)
map_width = env._prob._width
df = pd.read_csv(levels_path, header=0, skipinitialspace=True)
# .rename(
# index=str,
# header=0,
# columns={
# 0: "level",
# 1: "batch_reward",
# 2: "variance",
# 3: "diversity",
# 4: "targets",
# },
# )
bc_names = []
for i in range(5, 7): # assume 2 BCs
bc_names.append(df.columns[i])
# look for the most valid levels
targets_thresh = 0.0
og_df = df
df = og_df[og_df['targets'] == targets_thresh]
last_len = len(df)
while len(df) < d**2 and targets_thresh > og_df['targets'].min():
last_len = len(df)
# Raise the threshold so it includes at least one more individual
targets_thresh = og_df[og_df['targets'] < targets_thresh]['targets'].max()
df = og_df[og_df['targets'] >= targets_thresh]
# d = 6 # dimension of rows and columns
figw, figh = 16.0, 16.0
fig = plt.figure()
fig, axs = plt.subplots(ncols=d, nrows=d, figsize=(figw, figh))
df_g = df.sort_values(by=bc_names, ascending=False)
df_g["row"] = np.floor(np.linspace(0, d, len(df_g), endpoint=False)).astype(int)
for row_num in range(d):
row = df_g[df_g["row"] == row_num]
row = row.sort_values(by=[bc_names[1]], ascending=True)
row["col"] = np.arange(0, len(row), dtype=int)
idx = np.floor(np.linspace(0, len(row) - 1, d)).astype(int)
row = row[row["col"].isin(idx)]
row = row.drop(["row", "col"], axis=1)
# grid_models = np.array(row.loc[:,'solution_0':])
grid_models = row["level"].tolist()
for col_num in range(len(row)):
axs[row_num, col_num].set_axis_off()
level = np.zeros((map_width, map_width), dtype=int)
for i, l_rows in enumerate(grid_models[col_num].split("], [")):
for j, l_col in enumerate(l_rows.split(",")):
level[i, j] = int(
l_col.replace("[", "").replace("]", "").replace(" ", "")
)
# Set map
env._rep._x = env._rep._y = 0
env._rep._map = level
img = env.render(mode="rgb_array")
# axs[row_num, col_num].imshow(img, aspect="auto")
axs[-col_num-1, -row_num-1].imshow(img, aspect="auto")
fig.subplots_adjust(hspace=0.01, wspace=0.01)
levels_png_path = os.path.join(SAVE_PATH, "{}_grid.png".format(csv_name))
fig.text(0.5, 0.01, bc_names[0], ha='center', va='center',fontsize=fontsize)
fig.text(0.01, 0.5, bc_names[1], ha='center', va='center', rotation='vertical', fontsize=fontsize)
plt.tight_layout(rect=[0.025, 0.025, 1, 1])
fig.savefig(levels_png_path, dpi=300)
plt.close()
def auto_garbage_collect(pct=80.0):
if psutil.virtual_memory().percent >= pct:
gc.collect()
def tran_action(action, **kwargs):
skip = False
# return action, skip
return action.swapaxes(1, 2), skip
# usually, if action does not turn out to change the map, then the episode is terminated
# the skip boolean tells us whether, for some representation-specific reason, the agent has chosen not to act, but
# without ending the episode
@njit
def id_action(action, int_map=None, n_tiles=None, x=None, y=None, n_dirs=None):
# the argmax along tile_type dimension is performed inside the representation's update function
skip = False
return action, skip
# @njit
def wide_action(action, int_map=None, n_tiles=None, x=None, y=None, n_dirs=None):
# only consider tiles where the generator suggests something different than the existing tile
act_mask = action.argmax(axis=0) != int_map
n_new_builds = np.sum(act_mask)
act_mask = act_mask.reshape((1, *act_mask.shape))
# action = action * act_mask
action = np.where(act_mask == False, action.min() - 10, action)
coords = np.unravel_index(action.argmax(), action.shape)
if n_new_builds > 0:
assert act_mask[0, coords[1], coords[2]] == 1
coords = coords[2], coords[1], coords[0]
# assert int_map[coords[0], coords[1]] != coords[2]
skip = False
return coords, skip
@njit
def narrow_action(action, int_map=None, n_tiles=None, x=None, y=None, n_dirs=None):
act = action[:, y, x].argmax()
if act == 0:
skip = True
else:
skip = False
return act, skip
@njit
def turtle_action(action, int_map=None, n_tiles=None, x=None, y=None, n_dirs=None):
act = action[:, y, x].argmax()
# moving is counted as a skip, so lack of change does not end episode
if act < n_dirs:
skip = True
else:
skip = False
return act, skip
@njit
def flat_to_box(action, int_map=None, n_tiles=None, x=None, y=None, n_dirs=None):
action = action.reshape((n_tiles, *int_map.shape))
skip = False
return action, skip
@njit
def flat_to_wide(action, int_map=None, n_tiles=None, x=None, y=None, n_dirs=None):
w = int_map.shape[0]
h = int_map.shape[1]
assert len(action) == int_map.shape[0] + int_map.shape[1] + n_tiles
action = (action[:w].argmax(), action[w : w + h].argmax(), action[w + h :].argmax())
skip = False
return action, skip
@njit
def flat_to_narrow(action, int_map=None, n_tiles=None, x=None, y=None, n_dirs=None):
act = action.argmax()
if act == 0:
skip = True
else:
skip = False
return act, skip
@njit
def flat_to_turtle(action, int_map=None, n_tiles=None, x=None, y=None, n_dirs=None):
act = action.argmax()
if act < n_dirs:
skip = True
else:
skip = False
return act, skip
preprocess_action_funcs = {
"NCA": {
"cellular": id_action,
"wide": wide_action,
"narrow": narrow_action,
"turtle": turtle_action,
},
"CPPN": {
"cellular": tran_action,
},
"CNN": {
# will try to build this logic into the model
"cellular": flat_to_box,
"wide": flat_to_wide,
"narrow": flat_to_narrow,
"turtle": flat_to_turtle,
},
}
def id_observation(obs, **kwargs):
return obs
def local_observation(obs, **kwargs):
x = kwargs.get("x")
y = kwargs.get("y")
local_obs = np.zeros((1, obs.shape[1], obs.shape[2]))
# Might be some inconsistencies in ordering of x, y?
local_obs[0, y, x] = 1
np.concatenate((obs, local_obs), axis=0)
return obs
preprocess_observation_funcs = {
"NCA": {
"cellular": id_observation,
"wide": id_observation,
"narrow": local_observation,
"turtle": local_observation,
},
"CNN": {
"cellular": id_observation,
"wide": id_observation,
"narrow": local_observation,
"turtle": local_observation,
},
}
@njit
def archive_init_states(init_states_archive, init_states, index):
init_states_archive[index] = init_states
# @njit
def get_init_states(init_states_archive, index):
return init_states_archive[index]
def mate_individuals(ind_0, ind_1):
return ind_0.mate(ind_1)
def mutate_individual(ind):
ind.mutate()
return (ind,)
class MEOptimizer():
def __init__(self, grid, ind_cls, batch_size, ind_cls_args, start_time=None, stats=None):
self.batch_size = batch_size
self.grid = grid
self.inds = []
self.stats=stats
for _ in range(batch_size):
self.inds.append(ind_cls(**ind_cls_args))
toolbox = Toolbox()
toolbox.register("clone", copy.deepcopy)
toolbox.register("mutate", mutate_individual)
toolbox.register("mate", mate_individuals)
toolbox.register("select", tools.sel_random)
self.cxpb = 0
self.mutpb = 1.0
self.toolbox = toolbox
if start_time == None:
self.start_time = timer()
self.logbook = deap.tools.Logbook()
self.logbook.header = ["iteration", "containerSize", "evals", "nbUpdated"] + (stats.fields if stats else []) + ["elapsed"]
self.i = 0
def tell(self, objective_values, behavior_values):
# Update individuals' stats with results of last batch of simulations
# [(ind.fitness.setValues(obj), ind.fitness.features.setValues(bc)) for
# (ind, obj, bc) in zip(self.inds, objective_values, behavior_values)]
for (ind, obj, bc) in zip(self.inds, objective_values, behavior_values):
ind.fitness.setValues([obj])
ind.features.setValues(bc)
# Replace the current population by the offspring
nb_updated = self.grid.update(self.inds, issue_warning=True, ignore_exceptions=False)
# Compile stats and update logs
record = self.stats.compile(self.grid) if self.stats else {}
self.logbook.record(iteration=self.i, containerSize=self.grid.size_str(), evals=len(self.inds), nbUpdated=nb_updated, elapsed=timer()-self.start_time, **record)
self.i += 1
print(self.logbook.stream)
def ask(self):
if len(self.grid) == 0:
# Return the initial batch
return self.inds
elif len(self.grid) < self.batch_size:
# If few elites, supplement the population with individuals from the last generation
np.random.shuffle(self.inds)
breedable = self.grid.items + self.inds[:-len(self.grid)]
else:
breedable = self.grid
# Select the next batch individuals
batch = [self.toolbox.select(breedable) for i in range(self.batch_size)]
## Vary the pool of individuals
self.inds = deap.algorithms.varAnd(batch, self.toolbox, self.cxpb, self.mutpb)
return self.inds
class InitStatesArchive(GridArchive):
"""Save (some of) the initial states upon which the elites were evaluated when added to the archive, so that we can
reproduce their behavior at evaluation time (and compare it to evaluation to other seeds)."""
def __init__(self, bin_sizes, bin_bounds, n_init_states, map_w, map_h, **kwargs):
super(InitStatesArchive, self).__init__(bin_sizes, bin_bounds, **kwargs)
self.init_states_archive = np.empty(
shape=(*bin_sizes, n_init_states, map_w, map_h)
)
def set_init_states(self, init_states):
self.init_states = init_states
def add(self, solution, objective_value, behavior_values, meta, index=None):
status, dtype_improvement = super().add(
solution, objective_value, behavior_values
)
# NOTE: for now we won't delete these when popping an elite for re-evaluation
if status != AddStatus.NOT_ADDED:
if index is None:
index = self.get_index(behavior_values)
archive_init_states(self.init_states_archive, self.init_states, index)
return status, dtype_improvement
class MEGrid(containers.Grid):
def __init__(self, bin_sizes, bin_bounds):
super(MEGrid, self).__init__(shape=bin_sizes, max_items_per_bin=1,
features_domain=bin_bounds,
fitness_domain=((-np.inf, np.inf),),
)
# pyribs compatibility
def get_index(self, bcs):
return self.index_grid(features=bcs)
def add(self, item):
# We'll clip the feature calues at the extremes
# TODO: what's happening in this case using pyribs?
item.features.setValues([np.clip(item.features.values[i], *self.features_domain[i])
for i in range(len(item.features.values))])
return super(MEGrid, self).add(item)
class MEInitStatesArchive(MEGrid):
"""Save (some of) the initial states upon which the elites were evaluated when added to the archive, so that we can
reproduce their behavior at evaluation time (and compare it to evaluation to other seeds)."""
def __init__(self, bin_sizes, bin_bounds, n_init_states, map_w, map_h, **kwargs):
super(MEInitStatesArchive, self).__init__(bin_sizes, bin_bounds, **kwargs)
self.init_states_archive = np.empty(
shape=(*bin_sizes, n_init_states, map_w, map_h)
)
def set_init_states(self, init_states):
self.init_states = init_states
def add(self, item):
index = super(MEInitStatesArchive, self).add(item)
if index is not None:
idx = self.index_grid(item.features)
archive_init_states(self.init_states_archive, self.init_states, idx)
return index
class FlexArchive(InitStatesArchive):
""" Subclassing a pyribs archive class to do some funky stuff."""
def __init__(self, *args, **kwargs):
self.n_evals = {}
# self.obj_hist = {}
# self.bc_hist = {}
super().__init__(*args, **kwargs)
# # "index of indices", so we can remove them from _occupied_indices when removing
# self._index_ranks = {}
self._occupied_indices = set()
def _add_occupied_index(self, index):
# rank = len(self._occupied_indices)
# self._index_ranks[index] = rank # the index of the index in _occupied_indices
return super()._add_occupied_index(index)
def _remove_occupied_index(self, index):
self._occupied_indices.remove(index)
self._occupied_indices_cols = tuple(
[self._occupied_indices[i][j] for i in range(len(self._occupied_indices))]
for j in range(len(self._storage_dims))
)
def pop_elite(self, obj, bcs, old_bcs):
"""
Need to call update_elite after this!
"""
# Remove it, update it
old_idx = self.get_index(np.array(old_bcs))
self._remove_occupied_index(old_idx)
# rank = self._index_ranks.pop(old_idx)
# self._occupied_indices.pop(rank)
# [self._occupied_indices_cols[i].pop(rank) for i in range(len(self._storage_dims))]
n_evals = self.n_evals.pop(old_idx)
old_obj = self._objective_values[old_idx]
mean_obj = (old_obj * n_evals + obj) / (n_evals + 1)
mean_bcs = np.array(
[
(old_bcs[i] * n_evals + bcs[i]) / (n_evals + 1)
for i in range(len(old_bcs))
]
)
# obj_hist = self.obj_hist.pop(old_idx)
# obj_hist.append(obj)
# mean_obj = np.mean(obj_hist)
# bc_hist = self.bc_hist.pop(old_idx)
# bc_hist.append(bcs)
# bc_hist_np = np.asarray(bc_hist)
# mean_bcs = bc_hist_np.mean(axis=0)
self._objective_values[old_idx] = np.nan
self._behavior_values[old_idx] = np.nan
self._occupied[old_idx] = False
solution = self._solutions[old_idx].copy()
self._solutions[old_idx] = np.nan
self._metadata[old_idx] = np.nan
# while len(obj_hist) > 100:
# obj_hist = obj_hist[-100:]
# while len(bc_hist) > 100:
# bc_hist = bc_hist[-100:]
return solution, mean_obj, mean_bcs, n_evals
def update_elite(self, solution, mean_obj, mean_bcs, n_evals):
"""
obj: objective score from new evaluations
bcs: behavior characteristics from new evaluations
old_bcs: previous behavior characteristics, for getting the individuals index in the archive
"""
# Add it back
self.add(solution, mean_obj, mean_bcs, None, n_evals=n_evals)
def add(self, solution, objective_value, behavior_values, meta, n_evals=0):
index = self.get_index(behavior_values)
status, dtype_improvement = super().add(
solution, objective_value, behavior_values, meta, index
)
if not status == AddStatus.NOT_ADDED:
if n_evals == 0:
self.n_evals[index] = 1
else:
self.n_evals[index] = min(n_evals + 1, 100)
return status, dtype_improvement
def unravel_index(
indices: th.LongTensor, shape: Tuple[int, ...]
) -> th.LongTensor:
r"""Converts flat indices into unraveled coordinates in a target shape.
This is a `th` implementation of `numpy.unravel_index`.
Args:
indices: A tensor of indices, (*, N).
shape: The targeted shape, (D,).
Returns:
unravel coordinates, (*, N, D).
"""
shape = th.tensor(shape)
indices = indices % shape.prod() # prevent out-of-bounds indices
coord = th.zeros(indices.size() + shape.size(), dtype=int)
for i, dim in enumerate(reversed(shape)):
coord[..., i] = indices % dim
indices = indices // dim
return coord.flip(-1)
# TODO: Use the GPU!
# if CUDA:
# m.cuda()
# m.to('cuda:0')
class ResettableNN(nn.Module):
def reset(self):
pass
def gauss(x, mean=0, std=1):
return th.exp((-(x - mean) ** 2)/(2 * std ** 2))
class MixActiv(nn.Module):
def __init__(self):
super().__init__()
self.activations = (th.sin, th.tanh, gauss, th.relu)
self.n_activs = len(self.activations)
def forward(self, x):
n_chan = x.shape[1]
chans_per_activ = n_chan / self.n_activs
chan_i = 0
xs = []
for i, activ in enumerate(self.activations):
xs.append(activ(x[:, int(chan_i):int(chan_i + chans_per_activ), :, :]))
chan_i += chans_per_activ
x = th.cat(xs, axis=1)
return x
class AuxNCA(ResettableNN):
def __init__(self, n_in_chans, n_actions, n_aux=3):
super().__init__()
self.n_hid_1 = n_hid_1 = 32
self.n_aux = n_aux
self.l1 = Conv2d(n_in_chans + self.n_aux, n_hid_1, 3, 1, 1, bias=True)
self.l2 = Conv2d(n_hid_1, n_hid_1, 1, 1, 0, bias=True)
self.l3 = Conv2d(n_hid_1, n_actions + self.n_aux, 1, 1, 0, bias=True)
self.layers = [self.l1, self.l2, self.l3]
self.apply(init_weights)
self.last_aux = None
if RENDER:
cv2.namedWindow("Auxiliary NCA")
def forward(self, x):
with th.no_grad():
if self.last_aux is None:
self.last_aux = th.zeros(size=(1, self.n_aux, *x.shape[-2:]))
x_in = th.cat([x, self.last_aux], axis=1)
x = self.l1(x_in)
x = th.nn.functional.relu(x)
x = self.l2(x)
x = th.nn.functional.relu(x)
x = self.l3(x)
x = th.sigmoid(x)
self.last_aux = x[:,-self.n_aux:,:,:]
x = x[:, :-self.n_aux,:,:]
if RENDER:
# im = self.last_aux[0].cpu().numpy().transpose(1,2,0)
aux = self.last_aux[0].cpu().numpy()
aux = aux / aux.max()
im = np.expand_dims(np.vstack(aux), axis=0)
im = im.transpose(1, 2, 0)
cv2.imshow("Auxiliary NCA", im)
cv2.waitKey(1)
# axis 0 is batch
# axis 1 is the tile-type (one-hot)
# axis 0,1 is the x value
# axis 0,2 is the y value
return x, False
def reset(self, init_aux=None):
self.last_aux = None
class DoneAuxNCA(AuxNCA):
def __init__(self, n_in_chans, n_actions, n_aux=3):
# Add an extra auxiliary ("done") channel after the others
n_aux += 1
super().__init__(n_in_chans, n_actions, n_aux=n_aux)
done_kernel_size = 3
self.l_done = Conv2d(1, 1, 7, stride=999)
def forward(self, x):
with th.no_grad():
x, done = super().forward(x)
# retrieve local activation from done channel
done_x = th.sigmoid(self.l_done(x[:,-1:,:,:])).flatten() - 0.5
done = (done_x > 0).item()
return x, done
def reset(self, init_aux=None):
self.last_aux = None
class GeneratorNN(ResettableNN):
#class NCA(ResettableNN):
""" A neural cellular automata-type NN to generate levels or wide-representation action distributions."""
def __init__(self, n_in_chans, n_actions, **kwargs):
super().__init__()
n_hid_1 = 32
self.l1 = Conv2d(n_in_chans, n_hid_1, 3, 1, 1, bias=True)
self.l2 = Conv2d(n_hid_1, n_hid_1, 1, 1, 0, bias=True)
self.l3 = Conv2d(n_hid_1, n_actions, 1, 1, 0, bias=True)
self.layers = [self.l1, self.l2, self.l3]
self.apply(init_weights)
def forward(self, x):
with th.no_grad():
x = self.l1(x)
x = th.nn.functional.relu(x)
x = self.l2(x)
x = th.nn.functional.relu(x)
x = self.l3(x)
x = th.sigmoid(x)
# axis 0 is batch
# axis 1 is the tile-type (one-hot)
# axis 0,1 is the x value
# axis 0,2 is the y value
return x, False
class MixNCA(ResettableNN):
def __init__(self, *args, **kwargs):
super(MixNCA, self).__init__()
self.mix_activ = MixActiv()
def forward(self, x):
with th.no_grad():
x = self.l1(x)
x = self.mix_activ(x)
x = self.l2(x)
x = self.mix_activ(x)
x = self.l3(x)
x = th.sigmoid(x)
class CoordNCA(ResettableNN):
""" A neural cellular automata-type NN to generate levels or wide-representation action distributions.
With coordinates as additional input, like a CPPN."""
def __init__(self, n_in_chans, n_actions):
super().__init__()
n_hid_1 = 28
# n_hid_2 = 16
self.l1 = Conv2d(n_in_chans + 2, n_hid_1, 3, 1, 1, bias=True)
self.l2 = Conv2d(n_hid_1, n_hid_1, 1, 1, 0, bias=True)
self.l3 = Conv2d(n_hid_1, n_actions, 1, 1, 0, bias=True)
self.layers = [self.l1, self.l2, self.l3]
self.apply(init_weights)
def forward(self, x):
with th.no_grad():
coords = get_coord_grid(x, normalize=True)
x = th.hstack((coords, x))
x = self.l1(x)
x = th.nn.functional.relu(x)
x = self.l2(x)
x = th.nn.functional.relu(x)
x = self.l3(x)
x = th.sigmoid(x)
# axis 0 is batch
# axis 1 is the tile-type (one-hot)
# axis 0,1 is the x value
# axis 0,2 is the y value
return x, False
from pytorch_neat.cppn import create_cppn, Leaf
import neat
from neat.genome import DefaultGenome
def get_coord_grid(x, normalize=False):
width = x.shape[-2]
height = x.shape[-1]
X = th.arange(width)
Y = th.arange(height)
if normalize:
X = X / width
Y = Y / height
else:
X = X / 1
Y = Y / 1
X, Y = th.meshgrid(X, Y)
x = th.stack((X, Y)).unsqueeze(0)
return x
#class ReluCPPN(ResettableNN):
class FeedForwardCPPN(nn.Module):
def __init__(self, n_in_chans, n_actions):
super().__init__()
n_hid = 64
self.l1 = Conv2d(2, n_hid, kernel_size=1)
self.l2 = Conv2d(n_hid, n_hid, kernel_size=1)
self.l3 = Conv2d(n_hid, n_actions, kernel_size=1)
self.layers = [self.l1, self.l2, self.l3]
self.apply(init_weights)
def forward(self, x):
x = get_coord_grid(x, normalize=True)
with th.no_grad():
x = th.relu(self.l1(x))
x = th.relu(self.l2(x))
x = th.sigmoid(self.l3(x))
return x, True
class GenReluCPPN(ResettableNN):
def __init__(self, n_in_chans, n_actions):
super().__init__()
n_hid = 64
self.l1 = Conv2d(2+n_in_chans, n_hid, kernel_size=1)
self.l2 = Conv2d(n_hid, n_hid, kernel_size=1)
self.l3 = Conv2d(n_hid, n_actions, kernel_size=1)
self.layers = [self.l1, self.l2, self.l3]
self.apply(init_weights)