@inproceedings{326d74dc970f49cbb7acb8eba1802a0c,
title = "Cross-Graph Transformer Network for Temporal Sentence Grounding",
abstract = "Temporal sentence grounding aims to retrieve moments associated with the given sentences in untrimmed videos, which is a multi-modal problem and needs the adequate understanding of the sentence and video structure as well as the accurate interaction of the two modals. In this paper, we propose a cross-graph Transformer network (CGTN) model to address this problem, where the sentence is taken as a dependency tree and the video as a graph, according to their non-linear structures. Based on the graph structures, we design the self-graph attention and cross-graph attention to model the relationship between the nodes in the graph and cross the graphs. We test the proposed model on two challenging datasets. Extensive experiments demonstrate the strength of our method.",
keywords = "Cross-modal, Graph attention, Temporal grounding",
author = "Jiahui Shang and Ping Wei and Nanning Zheng",
note = "Publisher Copyright: {\textcopyright} 2023, The Author(s), under exclusive license to Springer Nature Switzerland AG.; 32nd International Conference on Artificial Neural Networks, ICANN 2023 ; Conference date: 26-09-2023 Through 29-09-2023",
year = "2023",
doi = "10.1007/978-3-031-44223-0\_28",
language = "英语",
isbn = "9783031442223",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "345--356",
editor = "Lazaros Iliadis and Antonios Papaleonidas and Plamen Angelov and Chrisina Jayne",
booktitle = "Artificial Neural Networks and Machine Learning – ICANN 2023 - 32nd International Conference on Artificial Neural Networks, Proceedings",
}