@misc{7c337c20fef749178ddc88e934d322ec,
title = "RepMLPNet: Hierarchical Vision MLP with Re-parameterized Locality",
abstract = "Compared to convolutional layers, fully-connected (FC) layers are better at modeling the long-range dependencies but worse at capturing the local patterns, hence usually less favored for image recognition. In this paper, we propose a methodology, Locality Injection, to incorporate local priors into an FC layer via merging the trained parameters of a parallel conv kernel into the FC kernel. Locality Injection can be viewed as a novel Structural Re-parameterization method since it equivalently converts the structures via transforming the parameters. Based on that, we propose a multi-layer-perceptron (MLP) block named RepMLP Block, which uses three FC layers to extract features, and a novel architecture named RepMLPNet. The hierarchical design distinguishes RepMLPNet from the other concurrently proposed vision MLPs. As it produces feature maps of different levels, it qualifies as a backbone model for downstream tasks like semantic segmentation. Our results reveal that 1) Locality Injection is a general methodology for MLP models; 2) RepMLPNet has favorable accuracy-efficiency trade-off compared to the other MLPs; 3) RepMLPNet is the first MLP that seamlessly transfer to Cityscapes semantic segmentation. The code and models are available at https://github.com/DingXiaoH/RepMLP.",
keywords = "Deep learning architectures and techniques",
author = "Xiaohan Ding and Honghao Chen and Xiangyu Zhang and Jungong Han and Guiguang Ding",
note = "Funding Information: *This work is supported by the National Natural Science Foundation of China (Nos.61925107, U1936202, 62021002) and the Beijing Academy of Artificial Intelligence (BAAI). This work is done during Xiaohan Ding and Honghao Chen{\textquoteright}s internship at MEGVII Technology. †Corresponding author. Publisher Copyright: {\textcopyright} 2022 IEEE.",
year = "2022",
doi = "10.1109/CVPR52688.2022.00066",
language = "English",
isbn = "1063-6919",
series = "Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition",
publisher = "IEEE Press",
address = "United States of America",
type = "Other",
}