% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Aldenhoven:285257,
author = {Aldenhoven, Céline Madeleine and Nissen, Leon and
Heinemann, Marie and Dogdu, Cem and Hanke, Alexander and
Jonas, Stephan and Reimer, Lara Marie},
title = {{R}eal-{T}ime {E}motion {R}ecognition {P}erformance of
{M}obile {D}evices: {A} {D}etailed {A}nalysis of {C}amera
and {T}rue{D}epth {S}ensors {U}sing {A}pple's {ARK}it.},
journal = {Sensors},
volume = {26},
number = {3},
issn = {1424-8220},
address = {Basel},
publisher = {MDPI},
reportid = {DZNE-2026-00199},
pages = {1060},
year = {2026},
abstract = {Facial features hold information about a person's emotions,
motor function, or genetic defects. Since most current
mobile devices are capable of real-time face detection using
cameras and depth sensors, real-time facial analysis can be
utilized in several mobile use cases. Understanding the
real-time emotion recognition capabilities of device sensors
and frameworks is vital for developing new, valid
applications. Therefore, we evaluated on-device emotion
recognition using Apple's ARKit on an iPhone 14 Pro. A
native app elicited 36 blend shape-specific movements and 7
discrete emotions from N=31 healthy adults. Per frame,
standardized ARKit blend shapes were classified using a
prototype-based cosine similarity metric; performance was
summarized as accuracy and area under the receiver operating
characteristic curves. Cosine similarity achieved an overall
accuracy of $68.3\%,$ exceeding the mean of three human
raters $(58.9\%;$ +9.4 percentage points, $≈16\%$
relative). Per-emotion accuracy was highest for joy, fear,
sadness, and surprise, and competitive for anger, disgust,
and contempt. AUCs were ≥0.84 for all classes. The method
runs in real time on-device using only vector operations,
preserving privacy and minimizing compute. These results
indicate that a simple, interpretable cosine-similarity
classifier over ARKit blend shapes delivers
human-comparable, real-time facial emotion recognition on
commodity hardware, supporting privacy-preserving mobile
applications.},
keywords = {Humans / Emotions: physiology / Adult / Male / Female /
Mobile Applications / Smartphone / Young Adult / Facial
Expression / ARKit (Other) / emotion recognition (Other) /
face tracking (Other) / real-time (Other) / sensors (Other)},
cin = {AG Schneider},
ddc = {620},
cid = {I:(DE-2719)1011305},
pnm = {353 - Clinical and Health Care Research (POF4-353)},
pid = {G:(DE-HGF)POF4-353},
typ = {PUB:(DE-HGF)16},
pubmed = {pmid:41682575},
pmc = {pmc:PMC12899966},
doi = {10.3390/s26031060},
url = {https://pub.dzne.de/record/285257},
}