% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Chen:271138,
author = {Chen, Dingfan and Oestreich, Marie and Afonja, Tejumade and
Kerkouche, Raouf and Becker, Matthias and Fritz, Mario},
title = {{T}owards {B}iologically {P}lausible and {P}rivate {G}ene
{E}xpression {D}ata {G}eneration},
journal = {Proceedings on privacy enhancing technologies},
volume = {2024},
number = {2},
issn = {2299-0984},
address = {Warsaw, Poland},
publisher = {De Gruyter Open},
reportid = {DZNE-2024-01006},
pages = {531 - 554},
year = {2024},
abstract = {Generative models trained with Differential Privacy (DP)
are becoming increasingly prominent in the creation of
synthetic data for downstream applications. Existing
literature, however, primarily focuses on basic benchmarking
datasets and tends to report promising results only for
elementary metrics and relatively simple data distributions.
In this paper, we initiate a systematic analysis of how DP
generative models perform in their natural application
scenarios, specifically focusing on real-world gene
expression data. We conduct a comprehensive analysis of five
representative DP generation methods, examining them from
various angles, such as downstream utility, statistical
properties, and biological plausibility. Our extensive
evaluation illuminates the unique characteristics of each DP
generation method, offering critical insights into the
strengths and weaknesses of each approach, and uncovering
intriguing possibilities for future developments. Perhaps
surprisingly, our analysis reveals that most methods are
capable of achieving seemingly reasonable downstream
utility, according to the standard evaluation metrics
considered in existing literature. Nevertheless, we find
that none of the DP methods are able to accurately capture
the biological characteristics of the real dataset. This
observation suggests a potential over-optimistic assessment
of current methodologies in this field and underscores a
pressing need for future enhancements in model design.},
cin = {AG Schultze / AG Becker},
ddc = {004},
cid = {I:(DE-2719)1013038 / I:(DE-2719)5000079},
pnm = {354 - Disease Prevention and Healthy Aging (POF4-354)},
pid = {G:(DE-HGF)POF4-354},
typ = {PUB:(DE-HGF)16},
doi = {10.56553/popets-2024-0062},
url = {https://pub.dzne.de/record/271138},
}