% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@INPROCEEDINGS{Cai:164502,
      author       = {Fiosina, Jelena and Fiosins, Maksims and Bonn, Stefan},
      editor       = {Cai, Zhipeng and Skums, Pavel and Li, Min},
      title        = {{D}eep {L}earning and {R}andom {F}orest-{B}ased
                      {A}ugmentation of s{RNA} {E}xpression {P}rofiles},
      volume       = {11490},
      address      = {Cham},
      publisher    = {Springer International Publishing},
      reportid     = {DZNE-2022-01054},
      isbn         = {978-3-030-20241-5 (print)},
      series       = {Lecture Notes in Computer Science},
      pages        = {159 - 170},
      year         = {2019},
      comment      = {Bioinformatics Research and Applications / Cai, Zhipeng
                      (Editor) ; Cham : Springer International Publishing, 2019,
                      Chapter 14 ; ISSN: 0302-9743=1611-3349 ; ISBN:
                      978-3-030-20241-5=978-3-030-20242-2 ;
                      doi:10.1007/978-3-030-20242-2},
      booktitle     = {Bioinformatics Research and
                       Applications / Cai, Zhipeng (Editor) ;
                       Cham : Springer International
                       Publishing, 2019, Chapter 14 ; ISSN:
                       0302-9743=1611-3349 ; ISBN:
                       978-3-030-20241-5=978-3-030-20242-2 ;
                       doi:10.1007/978-3-030-20242-2},
      abstract     = {The lack of well-structured annotations in a growing amount
                      of RNA expression data complicates data interoperability and
                      reusability. Commonly used text mining methods extract
                      annotations from existing unstructured data descriptions and
                      often provide inaccurate output that requires manual
                      curation. Automatic data-based augmentation (generation of
                      annotations on the base of expression data) can considerably
                      improve the annotation quality and has not been
                      well-studied. We formulate an automatic augmentation of
                      small RNA-seq expression data as a classification problem
                      and investigate deep learning (DL) and random forest (RF)
                      approaches to solve it. We generate tissue and sex
                      annotations from small RNA-seq expression data for tissues
                      and cell lines of homo sapiens. We validate our approach on
                      4243 annotated small RNA-seq samples from the Small RNA
                      Expression Atlas (SEA) database. The average prediction
                      accuracy for tissue groups is $98\%$ (DL), for tissues -
                      $96.5\%$ (DL), and for sex - $77\%$ (DL). The “one dataset
                      out” average accuracy for tissue group prediction is
                      $83\%$ (DL) and $59\%$ (RF). On average, DL provides better
                      results as compared to RF, and considerably improves
                      classification performance for ‘unseen’ datasets.},
      organization  = {International Symposium on
                       Bioinformatics Research and
                       Applications,},
      cin          = {AG Heutink 1},
      cid          = {I:(DE-2719)1210002},
      pnm          = {899 - ohne Topic (POF4-899)},
      pid          = {G:(DE-HGF)POF4-899},
      typ          = {PUB:(DE-HGF)8 / PUB:(DE-HGF)7},
      doi          = {10.1007/978-3-030-20242-2_14},
      url          = {https://pub.dzne.de/record/164502},
}