% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Breimann:272949,
      author       = {Breimann, Stephan and Frishman, Dmitrij},
      title        = {{AA}clust: k-optimized clustering for selecting
                      redundancy-reduced sets of amino acid scales.},
      journal      = {Bioinformatics advances},
      volume       = {4},
      number       = {1},
      issn         = {2635-0041},
      address      = {Oxford},
      publisher    = {Oxford University Press},
      reportid     = {DZNE-2024-01329},
      pages        = {vbae165},
      year         = {2024},
      abstract     = {Amino acid scales are crucial for sequence-based protein
                      prediction tasks, yet no gold standard scale set or simple
                      scale selection methods exist. We developed AAclust, a
                      wrapper for clustering models that require a pre-defined
                      number of clusters k, such as k-means. AAclust obtains
                      redundancy-reduced scale sets by clustering and selecting
                      one representative scale per cluster, where k can either be
                      optimized by AAclust or defined by the user. The utility of
                      AAclust scale selections was assessed by applying machine
                      learning models to 24 protein benchmark datasets. We found
                      that top-performing scale sets were different for each
                      benchmark dataset and significantly outperformed scale sets
                      used in previous studies. Noteworthy is the strong
                      dependence of the model performance on the scale set size.
                      AAclust enables a systematic optimization of scale-based
                      feature engineering in machine learning applications.The
                      AAclust algorithm is part of AAanalysis, a Python-based
                      framework for interpretable sequence-based protein
                      prediction, which is documented and accessible at
                      https://aaanalysis.readthedocs.io/en/latest and
                      https://github.com/breimanntools/aaanalysis.},
      cin          = {AG Steiner},
      ddc          = {004},
      cid          = {I:(DE-2719)1110000-1},
      pnm          = {352 - Disease Mechanisms (POF4-352)},
      pid          = {G:(DE-HGF)POF4-352},
      typ          = {PUB:(DE-HGF)16},
      pubmed       = {pmid:39544628},
      pmc          = {pmc:PMC11562964},
      doi          = {10.1093/bioadv/vbae165},
      url          = {https://pub.dzne.de/record/272949},
}