@inproceedings{a2a5301b688a459297dcb6f7b623bdda,
title = "POSTER: Accelerate GPU Concurrent Kernel Execution by Mitigating Memory Pipeline Stalls",
abstract = "In this study, we demonstrate that the performance may be undermined in the state-of-the-art intra-SM sharing schemes for concurrent kernel execution (CKE) on GPUs, due to the interference among concurrent kernels. We highlight that cache partitioning techniques proposed for CPUs are not effective for GPUs. Then we propose to balance memory accesses and limit the number of inflight memory instructions issued from concurrent kernels to reduce memory pipeline stalls. Our proposed schemes significantly improve the performance of two state-of-the-art intra-SM sharing schemes, Warped-Slicer and SMK.",
keywords = "Concurrent kernel execution, GPUs, Memory pipeline, Memory subsystem",
author = "Hongwen Dai and Zhen Lin and Chao Li and Chen Zhao and Fei Wang and Nanning Zheng and Huiyang Zhou",
note = "Publisher Copyright: {\textcopyright} 2017 IEEE.; 26th International Conference on Parallel Architectures and Compilation Techniques, PACT 2017 ; Conference date: 09-09-2017 Through 13-09-2017",
year = "2017",
month = oct,
day = "31",
doi = "10.1109/PACT.2017.30",
language = "英语",
series = "Parallel Architectures and Compilation Techniques - Conference Proceedings, PACT",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "144--145",
booktitle = "Proceedings - 26th International Conference on Parallel Architectures and Compilation Techniques, PACT 2017",
}