@inproceedings{ed57ef42cd234e76a60d042f0d3bd57a,
title = "HyperDiff: Masked Diffusion Model with High-efficient Transformer for Hyperspectral Image Cross-Scene Classification",
abstract = "Hyperspectral Image (HSI) cross-scene classification is a challenging task in remote sensing, particularly when real-time processing of Target Domain (TD) HSI is required, and data cannot be reused for training. While deep learning methods have shown promising results, the generalization ability of HSI representations remains limited, mainly due to class label imbalance. This paper introduces a dual-stage learning framework based on transfer learning to enhance classification accuracy in the TD. The framework includes a self-supervised learning stage and a supervised fine-tuning stage. The self-supervised stage focuses on learning robust representations by leveraging inherent structures within HSI data, while the fine-tuning stage uses training labels to extract semantic information. A masked diffusion model predicts masked tokens from unmasked ones, capturing both high-level structures and fine details in HSI data. An efficient spatiospectral Transformer, which removes self-attention from the decoder, is proposed to enhance the self-supervised process. This design allows mask tokens to obtain information from visible tokens without interacting with each other, reducing sequence length and computational costs. By decoding each mask token conditionally independently, only a subset of masked tokens is processed. Extensive experiments on two public HSI datasets demonstrate that the proposed method outperforms state-of-the-art techniques.",
keywords = "Cross-scene classification, Diffusion model, Hyperspectral image, Transformer",
author = "Pei Zhang and Dong Wang and Chanyue Wu and Jing Yang and Lei Kang and Zongwen Bai and Ying Li and Qiang Shen",
note = "Publisher Copyright: {\textcopyright} 2025 IEEE.; 2025 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2025 ; Conference date: 06-04-2025 Through 11-04-2025",
year = "2025",
month = mar,
day = "7",
doi = "10.1109/ICASSP49660.2025.10887647",
language = "English",
series = "ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings",
publisher = "IEEE Press",
editor = "Rao, \{Bhaskar D\} and Isabel Trancoso and Gaurav Sharma and Mehta, \{Neelesh B.\}",
booktitle = "2025 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2025 - Proceedings",
address = "United States of America",
}