% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@INPROCEEDINGS{en:281367,
author = {Şen, Mehmet Umut and Bilecen, Ali and Bilgin Taşdemir,
Esma Fatıma and Yanıkoğlu, Berrin},
title = {{T}ranscription of {O}ttoman {D}ocuments using
{T}ransformer {B}ased {M}odels | {O}smanlica {D}ok manlarin
{D} n st r c {T}abanli {M}odeller ile {T}ranskripsiyonu},
publisher = {IEEE},
reportid = {DZNE-2025-01114},
pages = {1 - 4},
year = {2025},
comment = {2025 33rd Signal Processing and Communications Applications
Conference (SIU) : [Proceedings] - IEEE, 2025. - ISBN
979-8-3315-6655-5 - doi:10.1109/SIU66497.2025.11112382},
booktitle = {2025 33rd Signal Processing and
Communications Applications Conference
(SIU) : [Proceedings] - IEEE, 2025. -
ISBN 979-8-3315-6655-5 -
doi:10.1109/SIU66497.2025.11112382},
abstract = {Although access to a large number of Ottoman documents has
become easier today, the Arabic-Persian-based Ottoman script
remains a barrier for interested users in utilizing these
documents. To address this challenge, there is a need for
automatic transcription systems. While some deep
learning-based commercial and academic models exist for
Ottoman transcription, no studies have yet explored models
based on transformer architectures. This paper introduces an
Ottoman transcription system developed using TrOCR, a
transformer-based model. Instead of the commonly used
two-step approach in the literature, a model was designed to
perform both optical character recognition and transcription
into Turkish in one step. Additionally, the decoder
responsible for language modeling was initialized with a
BERT-based model trained on Turkish data, achieving results
comparable to the original model. During testing, this model
produced outputs more quickly due to improved tokenization
performance.},
month = {Jun},
date = {2025-06-25},
organization = {33rd Signal Processing and
Communications Applications Conference,
Sile (Istanbul), 25 Jun 2025 - 28 Jun
2025},
cin = {AG Gokce},
cid = {I:(DE-2719)1013041},
pnm = {351 - Brain Function (POF4-351)},
pid = {G:(DE-HGF)POF4-351},
typ = {PUB:(DE-HGF)8 / PUB:(DE-HGF)7},
doi = {10.1109/SIU66497.2025.11112382},
url = {https://pub.dzne.de/record/281367},
}