% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Stber:273976,
      author       = {Stüber, Anna Theresa and Heimer, Maurice M and Ta, Johanna
                      and Fabritius, Matthias P and Hoppe, Boj F and Sheikh,
                      Gabriel and Brendel, Matthias and Unterrainer, Lena and
                      Jurmeister, Philip and Tufman, Amanda and Ricke, Jens and
                      Cyran, Clemens C and Ingrisch, Michael},
      title        = {{R}eplication study of {PD}-{L}1 status prediction in
                      {NSCLC} using {PET}/{CT} radiomics.},
      journal      = {European journal of radiology},
      volume       = {183},
      issn         = {0720-048X},
      address      = {Amsterdam [u.a.]},
      publisher    = {Elsevier Science},
      reportid     = {DZNE-2024-01425},
      pages        = {111825},
      year         = {2025},
      abstract     = {This study investigates the predictive capability of
                      radiomics in determining programmed cell death ligand 1
                      (PD-L1) expression $(>=1\%)$ status in non-small cell lung
                      cancer (NSCLC) patients using a newly collected [18F]FDG
                      PET/CT dataset. We aimed to replicate and validate the
                      radiomics-based machine learning (ML) model proposed by Zhao
                      et al. [1] predicting PD-L1 status from PET/CT-imaging. An
                      independent cohort of 254 NSCLC patients underwent [18F]FDG
                      PET/CT imaging, with primary tumor segmentation conducted
                      using lung tissue window (LTW) and more conservative soft
                      tissue window (STW) methods. Radiomics models ('Rad-score'
                      and 'complex model') and a clinical-stage model from Zhao et
                      al. were evaluated via 10-fold cross-validation and AUC
                      analysis, alongside a benchmark-study comparing different
                      ML-model pipelines. Clinicopathological data were collected
                      from medical records. On our data, the Rad-score model
                      yielded mean AUCs of 0.593 (STW) and 0.573 (LTW), below Zhao
                      et al.'s 0.761. The complex model achieved mean AUCs of
                      0.505 (STW) and 0.519 (LTW), lower than Zhao et al.'s 0.769.
                      The clinical model showed a mean AUC of 0.555, below Zhao et
                      al.'s 0.64. All models performed significantly lower than
                      Zhao et al.'s findings. Our benchmark study on four ML
                      pipelines revealed consistently low performance across all
                      configurations. Our study failed to replicate original
                      findings, suggesting poor model performance and questioning
                      predictive value of radiomics features in classifying PD-L1
                      expression from PET/CT imaging. These results highlight
                      challenges in replicating radiomics-based ML models and
                      stress the need for rigorous validation.},
      keywords     = {Machine learning benchmark (Other) / NSCLC (Other) / PD-L1
                      (Other) / PET/CT imaging data (Other) / Radiomics (Other) /
                      Replication study (Other)},
      cin          = {AG Haass},
      ddc          = {610},
      cid          = {I:(DE-2719)1110007},
      pnm          = {352 - Disease Mechanisms (POF4-352)},
      pid          = {G:(DE-HGF)POF4-352},
      typ          = {PUB:(DE-HGF)16},
      pubmed       = {pmid:39657546},
      doi          = {10.1016/j.ejrad.2024.111825},
      url          = {https://pub.dzne.de/record/273976},
}