@Article{JCM-39-1,
author = {Chirag, Agarwal and Klobusicky, Joe and Schonfeld, Dan},
title = {Convergence of Backpropagation with Momentum for Network Architectures with Skip Connections},
journal = {Journal of Computational Mathematics},
year = {2021},
volume = {39},
number = {1},
pages = {147--158},
abstract = {<p style="text-align: justify;">We study a class of deep neural networks with architectures that form a directed acyclic
graph (DAG). For backpropagation defined by gradient descent with adaptive momentum,
we show weights converge for a large class of nonlinear activation functions. The proof
generalizes the results of Wu et al. (2008) who showed convergence for a feed-forward
network with one hidden layer. For an example of the effectiveness of DAG architectures,
we describe an example of compression through an AutoEncoder, and compare against
sequential feed-forward networks under several metrics.</p>},
issn = {1991-7139},
doi = {https://doi.org/10.4208/jcm.1912-m2018-0279},
url = {https://global-sci.com/article/84222/convergence-of-backpropagation-with-momentum-for-network-architectures-with-skip-connections}
}