@inproceedings{5efed8c0396a481d96c0d1eba0768657,
title = "TEDM-PU: A Tax Evasion Detection Method Based on Positive and Unlabeled Learning",
abstract = "Tax evasion detection plays a crucial role in reducing tax revenue loss and many efforts have been made to develop detection models based on machine learning techniques. To train an effective model to detect tax evaders, a large amount of data is required, especially sufficient labeled data. However, the expensive and time-consuming annotation process results in small amount of labeled data being available, which makes the development of detection models difficult. To address this issue, we propose a tax evasion detection method based on positive and unlabeled learning (TEDM-PU), to identify tax evasion by utilizing limited annotated tax evasion taxpayers and a large amount of unlabeled data. The TEDM-PU framework consists of three stages: a preprocessing stage extracting taxpayer features based on random forest, a pseudo labeling stage assigning pseudo labels to unlabeled samples based on PUAdapter, and a model training stage based on LightGBM method. To evaluate the effectiveness of our proposed TEDM-PU, we conduct experimental tests on real-world tax data. The results demonstrate that TEDM-PU method can detect tax evaders with higher accuracy and better interpretability than state-of-the-art methods.",
keywords = "PU learning, interpretability, tax evasion detection",
author = "Yingchao Wu and Qinghua Zheng and Yuda Gao and Bo Dong and Rongzhe Wei and Fa Zhang and Huan He",
note = "Publisher Copyright: {\textcopyright} 2019 IEEE.; 2019 IEEE International Conference on Big Data, Big Data 2019 ; Conference date: 09-12-2019 Through 12-12-2019",
year = "2019",
month = dec,
doi = "10.1109/BigData47090.2019.9006325",
language = "英语",
series = "Proceedings - 2019 IEEE International Conference on Big Data, Big Data 2019",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "1681--1686",
editor = "Chaitanya Baru and Jun Huan and Latifur Khan and Hu, \{Xiaohua Tony\} and Ronay Ak and Yuanyuan Tian and Roger Barga and Carlo Zaniolo and Kisung Lee and Ye, \{Yanfang Fanny\}",
booktitle = "Proceedings - 2019 IEEE International Conference on Big Data, Big Data 2019",
}