% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Vitale:275875,
author = {Vitale, Dan and Koretsky, Mathew J and Kuznetsov, Nicole
and Hong, Samantha and Martin, Jessica and James, Mikayla
and Makarious, Mary B and Leonard, Hampton and Iwaki,
Hirotaka and Faghri, Faraz and Blauwendraat, Cornelis and
Singleton, Andrew B and Song, Yeajin and Levine, Kristin and
Kumar-Sreelatha, Ashwin Ashok and Fang, Zih-Hua and Nalls,
Mike},
title = {{G}eno{T}ools: an open-source {P}ython package for
efficient genotype data quality control and analysis.},
journal = {G3: Genes, genomes, genetics},
volume = {15},
number = {1},
issn = {2160-1836},
address = {Pittsburgh, PA},
publisher = {Genetics Soc. of America},
reportid = {DZNE-2025-00110},
pages = {jkae268},
year = {2025},
abstract = {GenoTools, a Python package, streamlines population
genetics research by integrating ancestry estimation,
quality control, and genome-wide association studies
capabilities into efficient pipelines. By tracking samples,
variants, and quality-specific measures throughout fully
customizable pipelines, users can easily manage genetics
data for large and small studies. GenoTools' 'Ancestry'
module renders highly accurate predictions, allowing for
high-quality ancestry-specific studies, and enables custom
ancestry model training and serialization specified to the
user's genotyping or sequencing platform. As the genotype
processing engine that powers several large initiatives,
including the NIH's Center for Alzheimer's and Related
Dementias and the Global Parkinson's Genetics Program,
GenoTools was used to process and analyze the UK Biobank and
major Alzheimer's disease and Parkinson's disease datasets
with over 400,000 genotypes from arrays and 5,000 whole
genome sequencing samples and has led to novel discoveries
in diverse populations. It has provided replicable ancestry
predictions, implemented rigorous quality control, and
conducted genetic ancestry-specific genome-wide association
studies to identify systematic errors or biases through a
single command. GenoTools is a customizable tool that
enables users to efficiently analyze and scale genotyping
and sequencing (whole genome sequencing and exome) data with
reproducible and scalable ancestry, quality control, and
genome-wide association studies pipelines.},
keywords = {Software / Humans / Genotype / Genome-Wide Association
Study: methods / Quality Control / Alzheimer Disease:
genetics / Parkinson Disease: genetics / Genetics,
Population / Computational Biology: methods / Uniform
Manifold Approximation and Projection (UMAP) (Other) /
ancestry (Other) / genotype (Other) / principal component
analysis (PCA) (Other) / python (Other)},
cin = {AG Heutink},
ddc = {570},
cid = {I:(DE-2719)1210002},
pnm = {354 - Disease Prevention and Healthy Aging (POF4-354)},
pid = {G:(DE-HGF)POF4-354},
typ = {PUB:(DE-HGF)16},
pubmed = {pmid:39566101},
pmc = {pmc:PMC11708233},
doi = {10.1093/g3journal/jkae268},
url = {https://pub.dzne.de/record/275875},
}