@inproceedings{34e5c8606cde45efa9f9159346fd6159,
title = "Salience DETR: Enhancing Detection Transformer with Hierarchical Salience Filtering Refinement",
abstract = "DETR-like methods have significantly increased detection performance in an end-to-end manner. The main-stream two-stage frameworks of them perform dense self-attention and select a fraction of queries for sparse cross-attention, which is proven effective for improving performance but also introduces a heavy computational burden and high dependence on stable query selection. This paper demonstrates that suboptimal two-stage selection strategies result in scale bias and redundancy due to the mismatch between selected queries and objects in two-stage initial-ization. To address these issues, we propose hierarchical salience filtering refinement, which performs transformer encoding only on filtered discriminative queries, for a bet-ter trade-off between computational efficiency and precision. The filtering process overcomes scale bias through a novel scale-independent salience supervision. To com-pensate for the semantic misalignment among queries, we introduce elaborate query refinement modules for stable two-stage initialization. Based on above improvements, the proposed Salience DETR achieves significant improvements of +4.0\% AP, +0.2\% AP, +4.4\% AP on three challenging task-specific detection datasets, as well as 49.2\% AP on COCO 2017 with less FLOPs. The code is available at https://github.com/xiuqhou/Salience-DETR.",
keywords = "Detection transformer, Object detection, Query refinement, Query salience, Self-attention",
author = "Xiuquan Hou and Meiqin Liu and Senlin Zhang and Ping Wei and Badong Chen",
note = "Publisher Copyright: {\textcopyright} 2024 IEEE.; 2024 IEEE/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2024 ; Conference date: 16-06-2024 Through 22-06-2024",
year = "2024",
doi = "10.1109/CVPR52733.2024.01664",
language = "英语",
isbn = "9798350353006",
series = "Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition",
publisher = "IEEE Computer Society",
pages = "17574--17583",
booktitle = "Proceedings - 2024 IEEE/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2024",
}