% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@INPROCEEDINGS{Kofler:265364,
      author       = {Kofler, Florian and Wahle, Johannes and Ezhov, Ivan and
                      Wagner, Sophia J. and Al-Maskari, Rami and Gryska, Emilia
                      and Todorov, Mihail and Bukas, Christina and Meissen, Felix
                      and Peng, Tingying and Ertürk, Ali and Rueckert, Daniel and
                      Heckemann, Rolf and Kirschke, Jan and Zimmer, Claus and
                      Wiestler, Benedikt and Menze, Bjoern and Piraud, Marie},
      title        = {{A}pproaching {P}eak {G}round {T}ruth},
      publisher    = {IEEE},
      reportid     = {DZNE-2023-00988},
      pages        = {1-6},
      year         = {2023},
      comment      = {2023 IEEE 20th International Symposium on Biomedical
                      Imaging (ISBI) : [Proceedings] - IEEE, 2023. - ISBN
                      978-1-6654-7358-3 - doi:10.1109/ISBI53787.2023.10230497},
      booktitle     = {2023 IEEE 20th International Symposium
                       on Biomedical Imaging (ISBI) :
                       [Proceedings] - IEEE, 2023. - ISBN
                       978-1-6654-7358-3 -
                       doi:10.1109/ISBI53787.2023.10230497},
      abstract     = {Machine learning models are typically evaluated by
                      computing similarity with reference annotations and trained
                      by maximizing similarity with such. Especially in the
                      biomedical domain, annotations are subjective and suffer
                      from low inter-and intra-rater reliability. Since
                      annotations only reflect one interpretation of the real
                      world, this can lead to sub-optimal predictions even though
                      the model achieves high similarity scores. Here, the
                      theoretical concept of Peak Ground Truth (PGT) is
                      introduced. PGT marks the point beyond which an increase in
                      similarity with the reference annotation stops translating
                      to better Real World Model Performance (RWMP). Additionally,
                      a quantitative technique to approximate PGT by computing
                      inter- and intra-rater reliability is proposed. Finally,
                      four categories of PGT-aware strategies to evaluate and
                      improve model performance are reviewed. © 2023 IEEE.},
      month         = {Apr},
      date          = {2023-04-18},
      organization  = {2023 IEEE 20th International Symposium
                       on Biomedical Imaging (ISBI), Cartagena
                       (Colombia), 18 Apr 2023 - 21 Apr 2023},
      cin          = {AG Mukherjee},
      cid          = {I:(DE-2719)1013030},
      pnm          = {354 - Disease Prevention and Healthy Aging (POF4-354)},
      pid          = {G:(DE-HGF)POF4-354},
      typ          = {PUB:(DE-HGF)8 / PUB:(DE-HGF)7},
      doi          = {10.1109/ISBI53787.2023.10230497},
      url          = {https://pub.dzne.de/record/265364},
}