@inproceedings{6f8195e18fd448e0a4c58175ac3ae91f,
title = "FGCL: Fine-Grained Contrastive Learning For Mandarin Stuttering Event Detection",
abstract = "This paper presents the T031 team's approach to the StutteringSpeech Challenge in SLT2024. Mandarin Stuttering Event Detection (MSED) aims to detect instances of stuttering events in Mandarin speech. We propose a detailed acoustic analysis method to improve the accuracy of stutter detection by capturing subtle nuances that previous Stuttering Event Detection (SED) techniques have overlooked. To this end, we introduce the Fine-Grained Contrastive Learning (FGCL) framework for MSED. Specifically, we model the frame-level probabilities of stuttering events and introduce a mining algorithm to identify both easy and confusing frames. Then, we propose a stutter contrast loss to enhance the distinction between stuttered and fluent speech frames, thereby improving the discriminative capability of stuttered feature embeddings. Extensive evaluations on English and Mandarin datasets demonstrate the effectiveness of FGCL, achieving a significant increase of over 5.0\% in F1 score on Mandarin data1,.",
keywords = "Madarian stuttering event detection, contrastive learning, fine-grained, likelihood modeling",
author = "Han Jiang and Wenyu Wang and Yiquan Zhou and Hongwu Ding and Jiacheng Xu and Jihua Zhu",
note = "Publisher Copyright: {\textcopyright} 2024 IEEE.; 2024 IEEE Spoken Language Technology Workshop, SLT 2024 ; Conference date: 02-12-2024 Through 05-12-2024",
year = "2024",
doi = "10.1109/SLT61566.2024.10832216",
language = "英语",
series = "Proceedings of 2024 IEEE Spoken Language Technology Workshop, SLT 2024",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "379--384",
booktitle = "Proceedings of 2024 IEEE Spoken Language Technology Workshop, SLT 2024",
}