% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Heumos:272953,
author = {Heumos, Lukas and Ehmele, Philipp and Treis, Tim and
Upmeier Zu Belzen, Julius and Roellin, Eljas and May, Lilly
and Namsaraeva, Altana and Horlava, Nastassya and Shitov,
Vladimir A and Zhang, Xinyue and Zappia, Luke and Knoll,
Rainer and Lang, Niklas J and Hetzel, Leon and Virshup,
Isaac and Sikkema, Lisa and Curion, Fabiola and Eils, Roland
and Schiller, Herbert B and Hilgendorff, Anne and Theis,
Fabian J},
title = {{A}n open-source framework for end-to-end analysis of
electronic health record data.},
journal = {Nature medicine},
volume = {30},
number = {11},
issn = {1078-8956},
address = {New York, NY},
publisher = {Nature America Inc.},
reportid = {DZNE-2024-01332},
pages = {3369 - 3380},
year = {2024},
abstract = {With progressive digitalization of healthcare systems
worldwide, large-scale collection of electronic health
records (EHRs) has become commonplace. However, an
extensible framework for comprehensive exploratory analysis
that accounts for data heterogeneity is missing. Here we
introduce ehrapy, a modular open-source Python framework
designed for exploratory analysis of heterogeneous
epidemiology and EHR data. ehrapy incorporates a series of
analytical steps, from data extraction and quality control
to the generation of low-dimensional representations.
Complemented by rich statistical modules, ehrapy facilitates
associating patients with disease states, differential
comparison between patient clusters, survival analysis,
trajectory inference, causal inference and more. Leveraging
ontologies, ehrapy further enables data sharing and training
EHR deep learning models, paving the way for foundational
models in biomedical research. We demonstrate ehrapy's
features in six distinct examples. We applied ehrapy to
stratify patients affected by unspecified pneumonia into
finer-grained phenotypes. Furthermore, we reveal biomarkers
for significant differences in survival among these groups.
Additionally, we quantify medication-class effects of
pneumonia medications on length of stay. We further
leveraged ehrapy to analyze cardiovascular risks across
different data modalities. We reconstructed disease state
trajectories in patients with severe acute respiratory
syndrome coronavirus 2 (SARS-CoV-2) based on imaging data.
Finally, we conducted a case study to demonstrate how ehrapy
can detect and mitigate biases in EHR data. ehrapy, thus,
provides a framework that we envision will standardize
analysis pipelines on EHR data and serve as a cornerstone
for the community.},
keywords = {Humans / Electronic Health Records / COVID-19: epidemiology
/ SARS-CoV-2 / Pneumonia: epidemiology / Deep Learning},
cin = {AG Aschenbrenner},
ddc = {610},
cid = {I:(DE-2719)5000082},
pnm = {354 - Disease Prevention and Healthy Aging (POF4-354)},
pid = {G:(DE-HGF)POF4-354},
typ = {PUB:(DE-HGF)16},
pubmed = {pmid:39266748},
pmc = {pmc:PMC11564094},
doi = {10.1038/s41591-024-03214-0},
url = {https://pub.dzne.de/record/272953},
}