@Article{CSIAM-AM-3-2,
author = {Long, Jihao and Jiequn, Han and Weinan, E},
title = {An $L^2$ Analysis of Reinforcement Learning in High Dimensions with Kernel and Neural Network Approximation},
journal = {CSIAM Transactions on Applied Mathematics},
year = {2022},
volume = {3},
number = {2},
pages = {191--220},
abstract = {<p style="text-align: justify;">Reinforcement learning (RL) algorithms based on high-dimensional function approximation have achieved tremendous empirical success in large-scale problems with an enormous number of states. However, most analysis of such algorithms
gives rise to error bounds that involve either the number of states or the number
of features. This paper considers the situation where the function approximation is
made either using the kernel method or the two-layer neural network model, in the
context of a fitted Q-iteration algorithm with explicit regularization. We establish an $\tilde{O}(H^3|\mathcal{A}|^{\frac{1}{4}} n^{-\frac{1}{4}})$ bound for the optimal policy with $Hn$ samples, where $H$ is the length
of each episode and $|\mathcal{A}|$ is the size of action space. Our analysis hinges on analyzing
the $L^2$ error of the approximated Q-function using $n$ data points. Even though this
result still requires a finite-sized action space, the error bound is independent of the
dimensionality of the state space.</p>},
issn = {2708-0579},
doi = {https://doi.org/10.4208/csiam-am.SO-2021-0026},
url = {https://global-sci.com/article/82315/an-l2-analysis-of-reinforcement-learning-in-high-dimensions-with-kernel-and-neural-network-approximation}
}