% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Walger:279188,
author = {Walger, Lennart and Schmitz, Matthias H and Bauer, Tobias
and Kügler, David and Schuch, Fabiane and Arendt,
Christophe and Baumgartner, Tobias and Birkenheier, Johannes
and Borger, Valeri and Endler, Christoph and Grau, Franziska
and Immanuel, Christian and Kölle, Markus and Kupczyk,
Patrick and Lakghomi, Asadeh and Mackert, Sarah and Neuhaus,
Elisabeth and Nordsiek, Julia and Odenthal, Anna-Maria and
Dague, Karmele Olaciregui and Ostermann, Laura and
Pukropski, Jan and Racz, Attila and von der Ropp, Klaus and
Schmeel, Frederic Carsten and Schrader, Felix and Sitter,
Aileen and Unruh-Pinheiro, Alexander and Voigt, Marilia and
Vychopen, Martin and von Wedel, Philip and von Wrede, Randi
and Attenberger, Ulrike and Vatter, Hartmut and Philipsen,
Alexandra and Becker, Albert and Reuter, Martin and
Hattingen, Elke and Radbruch, Alexander and Surges, Rainer
and Rüber, Theodor},
title = {{A} public benchmark for human performance in the detection
of focal cortical dysplasia.},
journal = {Epilepsia open},
volume = {10},
number = {3},
issn = {2470-9239},
address = {Hoboken, NJ},
publisher = {Wiley},
reportid = {DZNE-2025-00716},
pages = {778 - 786},
year = {2025},
abstract = {This study aims to report human performance in the
detection of Focal Cortical Dysplasias (FCDs) using an
openly available dataset. Additionally, it defines a subset
of this data as a 'difficult' test set to establish a public
baseline benchmark against which new methods for automated
FCD detection can be evaluated.The performance of 28 human
readers with varying levels of expertise in detecting FCDs
was originally analyzed using 146 subjects (not all of which
are openly available), we analyzed the openly available
subset of 85 cases. Performance was measured based on the
overlap between predicted regions of interest (ROIs) and
ground-truth lesion masks, using the Dice-Soerensen
coefficient (DSC). The benchmark test set was chosen to
consist of 15 subjects most predictive for human performance
and 13 subjects identified by at most 3 of the 28
readers.Expert readers achieved an average detection rate of
$68\%,$ compared to $45\%$ for non-experts and $27\%$ for
laypersons. Neuroradiologists detected the highest
percentage of lesions $(64\%),$ while psychiatrists detected
the least $(34\%).$ Neurosurgeons had the highest ROI
sensitivity (0.70), and psychiatrists had the highest ROI
precision (0.78). The benchmark test set revealed an expert
detection rate of $49\%.Reporting$ human performance in FCD
detection provides a critical baseline for assessing the
effectiveness of automated detection methods in a clinically
relevant context. The defined benchmark test set serves as a
useful indicator for evaluating advancements in
computer-aided FCD detection approaches.Focal cortical
dysplasias (FCDs) are malformations of cortical development
and one of the most common causes of drug-resistant focal
epilepsy. Once found, FCDs can be neurosurgically resected,
which leads to seizure freedom in many cases. However, FCDs
are difficult to detect in the visual assessment of magnetic
resonance imaging. A myriad of algorithms for automated FCD
detection have been developed, but their true clinical value
remains unclear since there is no benchmark dataset for
evaluation and comparison to human performance. Here, we use
human FCD detection performance to define a benchmark
dataset with which new methods for automated detection can
be evaluated.},
keywords = {Humans / Benchmarking / Malformations of Cortical
Development: diagnostic imaging / Malformations of Cortical
Development: diagnosis / Magnetic Resonance Imaging / Female
/ Male / Adult / Focal Cortical Dysplasia / artificial
intelligence (Other) / computer‐aided detection (Other) /
human performance (Other) / reader study (Other)},
cin = {AG Reuter / AG Stöcker / AG Radbruch},
ddc = {610},
cid = {I:(DE-2719)1040310 / I:(DE-2719)1013026 /
I:(DE-2719)5000075},
pnm = {354 - Disease Prevention and Healthy Aging (POF4-354) / 353
- Clinical and Health Care Research (POF4-353)},
pid = {G:(DE-HGF)POF4-354 / G:(DE-HGF)POF4-353},
typ = {PUB:(DE-HGF)16},
pubmed = {pmid:40167314},
doi = {10.1002/epi4.70028},
url = {https://pub.dzne.de/record/279188},
}