@inproceedings{b0b7765effd34cf9bec1c765cfa06c2b,
title = "Cross-Modal Semantic Alignment for Efficient Unsupervised Multimodal Anomaly Detection",
abstract = "Unsupervised industrial anomaly detection aims to train a model capable of identifying diverse anomalous patterns by utilizing only normal samples. Numerous investigations have confirmed the effectiveness of such paradigms for surface anomaly detection using only 2D images. To better capture structural anomalies, recent studies have investigated Multimodal Unsupervised Industrial Anomaly Detection by jointly utilizing 2D images and 3D point cloud. Existing methods either ignore the complementarity between different modalities or consume a lot of storage space to learn and store normal features, making it difficult to balance efficient multi-modal feature utilization with computational efficiency. This paper proposes a novel and efficient unsupervised multimodal anomaly detection framework to fully exploit information from dual-modality data. By modeling latent semantic consistency of normal samples across modalities, the method detects cross-modal consistency deviations during testing for anomaly localization. Simultaneously, lightweight memory banks are separately constructed for each modality, capturing intra-modal feature inconsistencies to provide a complementary anomaly identification perspective parallel to cross-modal detection. Extensive experiments demonstrate that our framework, through simultaneously considering both inter and intra-modal consistency, achieves state-of-the-art (SOTA) detection and segmentation performance on the MVTec 3D-AD dataset with lower computational costs and faster inference speed, while maintaining robust advantages in few-shot setting.",
author = "Baoqiang Li and Tengyu Zhang and Zuo Zuo and Zongze Wu",
note = "Publisher Copyright: {\textcopyright} 2025 The Authors.; 28th European Conference on Artificial Intelligence, ECAI 2025, including 14th Conference on Prestigious Applications of Intelligent Systems, PAIS 2025 ; Conference date: 25-10-2025 Through 30-10-2025",
year = "2025",
month = oct,
day = "21",
doi = "10.3233/FAIA250984",
language = "英语",
series = "Frontiers in Artificial Intelligence and Applications",
publisher = "IOS Press BV",
pages = "1591--1598",
editor = "Ines Lynce and Nello Murano and Mauro Vallati and Serena Villata and Federico Chesani and Michela Milano and Andrea Omicini and Mehdi Dastani",
booktitle = "ECAI 2025 - 28th European Conference on Artificial Intelligence, including 14th Conference on Prestigious Applications of Intelligent Systems, PAIS 2025 - Proceedings",
}