% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{Cai:164502,
author = {Fiosina, Jelena and Fiosins, Maksims and Bonn, Stefan},
editor = {Cai, Zhipeng and Skums, Pavel and Li, Min},
title = {{D}eep {L}earning and {R}andom {F}orest-{B}ased
{A}ugmentation of s{RNA} {E}xpression {P}rofiles},
volume = {11490},
address = {Cham},
publisher = {Springer International Publishing},
reportid = {DZNE-2022-01054},
isbn = {978-3-030-20241-5 (print)},
series = {Lecture Notes in Computer Science},
pages = {159 - 170},
year = {2019},
comment = {Bioinformatics Research and Applications / Cai, Zhipeng
(Editor) ; Cham : Springer International Publishing, 2019,
Chapter 14 ; ISSN: 0302-9743=1611-3349 ; ISBN:
978-3-030-20241-5=978-3-030-20242-2 ;
doi:10.1007/978-3-030-20242-2},
booktitle = {Bioinformatics Research and
Applications / Cai, Zhipeng (Editor) ;
Cham : Springer International
Publishing, 2019, Chapter 14 ; ISSN:
0302-9743=1611-3349 ; ISBN:
978-3-030-20241-5=978-3-030-20242-2 ;
doi:10.1007/978-3-030-20242-2},
abstract = {The lack of well-structured annotations in a growing amount
of RNA expression data complicates data interoperability and
reusability. Commonly used text mining methods extract
annotations from existing unstructured data descriptions and
often provide inaccurate output that requires manual
curation. Automatic data-based augmentation (generation of
annotations on the base of expression data) can considerably
improve the annotation quality and has not been
well-studied. We formulate an automatic augmentation of
small RNA-seq expression data as a classification problem
and investigate deep learning (DL) and random forest (RF)
approaches to solve it. We generate tissue and sex
annotations from small RNA-seq expression data for tissues
and cell lines of homo sapiens. We validate our approach on
4243 annotated small RNA-seq samples from the Small RNA
Expression Atlas (SEA) database. The average prediction
accuracy for tissue groups is $98\%$ (DL), for tissues -
$96.5\%$ (DL), and for sex - $77\%$ (DL). The “one dataset
out” average accuracy for tissue group prediction is
$83\%$ (DL) and $59\%$ (RF). On average, DL provides better
results as compared to RF, and considerably improves
classification performance for ‘unseen’ datasets.},
organization = {International Symposium on
Bioinformatics Research and
Applications,},
cin = {AG Heutink 1},
cid = {I:(DE-2719)1210002},
pnm = {899 - ohne Topic (POF4-899)},
pid = {G:(DE-HGF)POF4-899},
typ = {PUB:(DE-HGF)8 / PUB:(DE-HGF)7},
doi = {10.1007/978-3-030-20242-2_14},
url = {https://pub.dzne.de/record/164502},
}