__latexindent_temp.tex

@article{Mnih2016,
abstract = {We propose a conceptually simple and lightweight framework for deep reinforcement learning that uses asynchronous gradient descent for optimization of deep neural network controllers. We present asynchronous variants of four standard reinforcement learning algorithms and show that parallel actor-learners have a stabilizing effect on training allowing all four methods to successfully train neural network controllers. The best performing method, an asynchronous variant of actor-critic, surpasses the current state-of-the-art on the Atari domain while training for half the time on a single multi-core CPU instead of a GPU. Furthermore, we show that asynchronous actor-critic succeeds on a wide variety of continuous motor control problems as well as on a new task of navigating random 3D mazes using a visual input.},
archivePrefix = {arXiv},
arxivId = {1602.01783},
author = {Mnih, Volodymyr and Badia, Adri{\`{a}} Puigdom{\`{e}}nech and Mirza, Mehdi and Graves, Alex and Lillicrap, Timothy P. and Harley, Tim and Silver, David and Kavukcuoglu, Koray},
eprint = {1602.01783},
file = {:Users/tas/LaTex/Forschungsarbeit/Papers/1602.01783.pdf:pdf},
title = {{Asynchronous Methods for Deep Reinforcement Learning}},
url = {http://arxiv.org/abs/1602.01783},
volume = {48},
year = {2016}
}
@article{Francois-lavet2018,
archivePrefix = {arXiv},
arxivId = {arXiv:1811.12560v1},
author = {Fran{\c{c}}ois-lavet, Vincent and Henderson, Peter and Islam, Riashat and Bellemare, Marc G and Fran{\c{c}}ois-lavet, Vincent and Pineau, Joelle and Bellemare, Marc G},
doi = {10.1561/2200000071.Vincent},
eprint = {arXiv:1811.12560v1},
file = {:Users/tas/LaTex/Forschungsarbeit/Papers/1811.12560.pdf:pdf},
isbn = {9781680833683},
issn = {19358245},
journal = {Foundations and trends in machine learning},
title = {{An Introduction to Deep Reinforcement Learning. (arXiv:1811.12560v1 [cs.LG]) http://arxiv.org/abs/1811.12560}},
year = {2018}
}
@article{Silver,
author = {Silver, David and Hubert, Thomas and Schrittwieser, Julian and Antonoglou, Ioannis and Lai, Matthew and Guez, Arthur and Lanctot, Marc and Sifre, Laurent and Kumaran, Dharshan and Graepel, Thore and Lillicrap, Timothy and Simonyan, Karen and Hassabis, Demis},
file = {:Users/tas/LaTex/Forschungsarbeit/Papers/alphazero{\_}preprint.pdf:pdf},
title = {{A general reinforcement learning algorithm that masters chess , shogi and Go through self-play}}
}
@article{Schaul2016,
archivePrefix = {arXiv},
arxivId = {arXiv:1511.05952v2},
author = {Schaul, Tom and Quan, John and Antonoglou, Ioannis and Silver, David and Deepmind, Google},
eprint = {arXiv:1511.05952v2},
file = {:Users/tas/LaTex/Forschungsarbeit/Papers/prioritized-replay.pdf:pdf},
pages = {1--23},
title = {{Rioritized xperience eplay}},
year = {2016}
}
@article{Sutton2015,
author = {Sutton, Richard S and Barto, Andrew G},
file = {:Users/tas/LaTex/Forschungsarbeit/Papers/SuttonBartoIPRLBook2ndEd.pdf:pdf},
title = {{Reinforcement Learning : An Introduction}},
year = {2015}
}
@book{Rojas1996,
author = {Rojas, Raúl},
title = {{Neural Networks: A Systematic Introduction}},
year = {1996},
publisher = {Springer},
city = {Berlin}
}
@book{Patterson1997,
address = {Haar bei M{\"{u}}nchen},
author = {Patterson, Dan W.},
edition = {2},
publisher = {Prentice Hall},
title = {{Introduction to Artificial Intelligence}},
year = {1997}
}
@article{Bartz2018,
author = {Bartz, Rainer},
file = {:Users/tas/LaTex/Forschungsarbeit/Papers/Bartz{\_}CI.pdf:pdf},
keywords = {Prof. Dr. Rainer Bartz},
title = {{Compendium Computational Intelligence}},
url = {http://www.nt-rt.fh-koeln.de/index.html},
year = {2018}
}
@article{Schwenker2001,
abstract = {In this paper, learning algorithms for radial basis function (RBF) networks are discussed. Whereas multilayer perceptrons (MLP) are typically trained with backpropagation algorithms, starting the training procedure with a random initialization of the MLP's parameters, an RBF network may be trained in many different ways. We categorize these RBF training methods into one-, two-, and three-phase learning schemes. Two-phase RBF learning is a very common learning scheme. The two layers of an RBF network are learnt separately; first the RBF layer is trained, including the adaptation of centers and scaling parameters, and then the weights of the output layer are adapted. RBF centers may be trained by clustering, vector quantization and classification tree algorithms, and the output layer by supervised learning (through gradient descent or pseudo inverse solution). Results from numerical experiments of RBF classifiers trained by two-phase learning are presented in three completely different pattern recognition applications: (a) the classification of 3D visual objects; (b) the recognition hand-written digits (2D objects); and (c) the categorization of high-resolution electrocardiograms given as a time series (1D objects) and as a set of features extracted from these time series. In these applications, it can be observed that the performance of RBF classifiers trained with two-phase learning can be improved through a third backpropagation-like training phase of the RBF network, adapting the whole set of parameters (RBF centers, scaling parameters, and output layer weights) simultaneously. This, we call three-phase learning in RBF networks. A practical advantage of two- and three-phase learning in RBF networks is the possibility to use unlabeled training data for the first training phase. Support vector (SV) learning in RBF networks is a different learning approach. SV learning can be considered, in this context of learning, as a special type of one-phase learning, where only the output layer weights of the RBF network are calculated, and the RBF centers are restricted to be a subset of the training data. Numerical experiments with several classifier schemes including k-nearest-neighbor, learning vector quantization and RBF classifiers trained through two-phase, three-phase and support vector learning are given. The performance of the RBF classifiers trained through SV learning and three-phase learning are superior to the results of two-phase learning, but SV learning often leads to complex network structures, since the number of support vectors is not a small fraction of the total number of data points.},
author = {Schwenker, Friedhelm and Kestler, Hans A. and Palm, G{\"{u}}nther},
journal = {Neural Networks},
number = {4-5},
pages = {439--458},
title = {{Three learning phases for radial-basis-function networks}},
url = {https://www.sciencedirect.com/science/article/pii/S0893608001000272},
volume = {14},
year = {2001}
}
@article{Schomaker2017,
author = {Schomaker, Prof L R B Lambert},
file = {:Users/tas/LaTex/Forschungsarbeit/Papers/VideoGameRL{\_}Wolfshaar.pdf:pdf},
title = {{Deep Reinforcement Learning of Video Games}},
year = {2017}
}
@article{VanHasselt2015,
abstract = {The popular Q-learning algorithm is known to overestimate action values under certain conditions. It was not previously known whether, in practice, such overestimations are common, whether they harm performance, and whether they can generally be prevented. In this paper, we answer all these questions affirmatively. In particular, we first show that the recent DQN algorithm, which combines Q-learning with a deep neural network, suffers from substantial overestimations in some games in the Atari 2600 domain. We then show that the idea behind the Double Q-learning algorithm, which was introduced in a tabular setting, can be generalized to work with large-scale function approximation. We propose a specific adaptation to the DQN algorithm and show that the resulting algorithm not only reduces the observed overestimations, as hypothesized, but that this also leads to much better performance on several games.},
archivePrefix = {arXiv},
arxivId = {1509.06461},
author = {van Hasselt, Hado and Guez, Arthur and Silver, David},
eprint = {1509.06461},
file = {:Users/tas/LaTex/Forschungsarbeit/Papers/doubledqn.pdf:pdf},
number = {2},
title = {{Deep Reinforcement Learning with Double Q-learning}},
url = {http://arxiv.org/abs/1509.06461},
year = {2015}
}
@article{Moritz2017,
abstract = {The next generation of AI applications will continuously interact with the environment and learn from these interactions. These applications impose new and demanding systems requirements, both in terms of performance and flexibility. In this paper, we consider these requirements and present Ray---a distributed system to address them. Ray implements a unified interface that can express both task-parallel and actor-based computations, supported by a single dynamic execution engine. To meet the performance requirements, Ray employs a distributed scheduler and a distributed and fault-tolerant store to manage the system's control state. In our experiments, we demonstrate scaling beyond 1.8 million tasks per second and better performance than existing specialized systems for several challenging reinforcement learning applications.},
archivePrefix = {arXiv},
arxivId = {1712.05889},
author = {Moritz, Philipp and Nishihara, Robert and Wang, Stephanie and Tumanov, Alexey and Liaw, Richard and Liang, Eric and Elibol, Melih and Yang, Zongheng and Paul, William and Jordan, Michael I. and Stoica, Ion},
eprint = {1712.05889},
file = {:Users/tas/LaTex/Forschungsarbeit/Papers/ray.pdf:pdf},
title = {{Ray: A Distributed Framework for Emerging AI Applications}},
url = {http://arxiv.org/abs/1712.05889},
year = {2017}
}