% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Bendella:281369,
author = {Bendella, Zeynep and Wichtmann, Barbara Daria and Clauberg,
Ralf and Keil, Vera C and Lehnen, Nils C and Haase, Robert
and Sáez, Laura C and Wiest, Isabella C and Kather, Jakob
Nikolas and Endler, Christoph and Radbruch, Alexander and
Paech, Daniel and Deike, Katerina},
title = {{C}hat {GPT}-4 shows high agreement in {MRI} protocol
selection compared to board-certified neuroradiologists.},
journal = {European journal of radiology},
volume = {193},
issn = {0720-048X},
address = {Amsterdam [u.a.]},
publisher = {Elsevier Science},
reportid = {DZNE-2025-01116},
pages = {112416},
year = {2025},
abstract = {The aim of this study was to determine whether ChatGPT-4
can correctly suggest MRI protocols and additional MRI
sequences based on real-world Radiology Request Forms (RRFs)
as well as to investigate the ability of ChatGPT-4 to
suggest time saving protocols.Retrospectively, 1,001 RRFs of
our Department of Neuroradiology (in-house dataset), 200
RRFs of an independent Department of General Radiology
(independent dataset) and 300 RRFs from an external, foreign
Department of Neuroradiology (external dataset) were
included. Patients' age, sex, and clinical information were
extracted from the RRFs and used to prompt ChatGPT- 4 to
choose an adequate MRI protocol from predefined
institutional lists. Four independent raters then assessed
its performance. Additionally, ChatGPT-4 was tasked with
creating case-specific protocols aimed at saving time.Two
and 7 of 1,001 protocol suggestions of ChatGPT-4 were rated
'unacceptable' in the in-house dataset for reader 1 and 2,
respectively. No protocol suggestions were rated
'unacceptable' in both the independent and external dataset.
When assessing the inter-reader agreement, Coheńs weighted
ĸ ranged from 0.88 to 0.98 (each p < 0.001). ChatGPT-4's
freely composed protocols were approved in 766/1,001 (76.5
$\%)$ and 140/300 (46.67 $\%)$ cases of the in-house and
external dataset with mean time savings (standard deviation)
of 3:51 (minutes:seconds) (±2:40) minutes and 2:59 (±3:42)
minutes per adopted in-house and external MRI
protocol.ChatGPT-4 demonstrated a very high agreement with
board-certified (neuro-)radiologists in selecting MRI
protocols and was able to suggest approved time saving
protocols from the set of available sequences.},
keywords = {ChatGPT-4 (Other) / Large language model (LLM) (Other) /
MRI protocol (Other) / Radiology request form (Other)},
cin = {AG Radbruch},
ddc = {610},
cid = {I:(DE-2719)5000075},
pnm = {353 - Clinical and Health Care Research (POF4-353)},
pid = {G:(DE-HGF)POF4-353},
typ = {PUB:(DE-HGF)16},
pubmed = {pmid:40961911},
doi = {10.1016/j.ejrad.2025.112416},
url = {https://pub.dzne.de/record/281369},
}