% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Fiosina:151616,
      author       = {Fiosina, Jelena and Fiosins, Maksims and Bonn, Stefan},
      title        = {{E}xplainable {D}eep {L}earning for {A}ugmentation of
                      {S}mall {RNA} {E}xpression {P}rofiles.342},
      journal      = {Journal of computational biology},
      volume       = {27},
      number       = {2},
      issn         = {1557-8666},
      address      = {Larchmont, NY},
      publisher    = {Liebert},
      reportid     = {DZNE-2020-01198},
      pages        = {234 - 247},
      year         = {2019},
      abstract     = {The lack of well-structured metadata annotations
                      complicates the reusability and interpretation of the
                      growing amount of publicly available RNA expression data.
                      The machine learning-based prediction of metadata (data
                      augmentation) can considerably improve the quality of
                      expression data annotation. In this study, we systematically
                      benchmark deep learning (DL) and random forest (RF)-based
                      metadata augmentation of tissue, age, and sex using small
                      RNA (sRNA) expression profiles. We use 4243 annotated
                      sRNA-Seq samples from the sRNA expression atlas database to
                      train and test the augmentation performance. In general, the
                      DL machine learner outperforms the RF method in almost all
                      tested cases. The average cross-validated prediction
                      accuracy of the DL algorithm for tissues is $96.5\%,$ for
                      sex is $77\%,$ and for age is $77.2\%.$ The average tissue
                      prediction accuracy for a completely new data set is
                      $83.1\%$ (DL) and $80.8\%$ (RF). To understand which sRNAs
                      influence DL predictions, we employ backpropagation-based
                      feature importance scores using the DeepLIFT method, which
                      enable us to obtain information on biological relevance of
                      sRNAs.},
      cin          = {AG Bonn 1},
      ddc          = {570},
      cid          = {I:(DE-2719)1410003},
      pnm          = {342 - Disease Mechanisms and Model Systems (POF3-342)},
      pid          = {G:(DE-HGF)POF3-342},
      typ          = {PUB:(DE-HGF)16},
      pubmed       = {pmid:31855058},
      pmc          = {pmc:PMC7047095},
      doi          = {10.1089/cmb.2019.0320},
      url          = {https://pub.dzne.de/record/151616},
}