000269770 001__ 269770
000269770 005__ 20240809090116.0
000269770 0247_ $$2doi$$a10.1038/s41598-024-62724-6
000269770 0247_ $$2pmid$$apmid:38789621
000269770 0247_ $$2pmc$$apmc:PMC11126405
000269770 0247_ $$2altmetric$$aaltmetric:163793919
000269770 037__ $$aDZNE-2024-00612
000269770 041__ $$aEnglish
000269770 082__ $$a600
000269770 1001_ $$aYoung, Cameron C$$b0
000269770 245__ $$aDevelopment and validation of a reliable DNA copy-number-based machine learning algorithm (CopyClust) for breast cancer integrative cluster classification.
000269770 260__ $$a[London]$$bMacmillan Publishers Limited, part of Springer Nature$$c2024
000269770 3367_ $$2DRIVER$$aarticle
000269770 3367_ $$2DataCite$$aOutput Types/Journal article
000269770 3367_ $$0PUB:(DE-HGF)16$$2PUB:(DE-HGF)$$aJournal Article$$bjournal$$mjournal$$s1716795558_24509
000269770 3367_ $$2BibTeX$$aARTICLE
000269770 3367_ $$2ORCID$$aJOURNAL_ARTICLE
000269770 3367_ $$00$$2EndNote$$aJournal Article
000269770 520__ $$aThe Integrative Cluster subtypes (IntClusts) provide a framework for the classification of breast cancer tumors into 10 distinct groups based on copy number and gene expression, each with unique biological drivers of disease and clinical prognoses. Gene expression data is often lacking, and accurate classification of samples into IntClusts with copy number data alone is essential. Current classification methods achieve low accuracy when gene expression data are absent, warranting the development of new approaches to IntClust classification. Copy number data from 1980 breast cancer samples from METABRIC was used to train multiclass XGBoost machine learning algorithms (CopyClust). A piecewise constant fit was applied to the average copy number profile of each IntClust and unique breakpoints across the 10 profiles were identified and converted into ~ 500 genomic regions used as features for CopyClust. These models consisted of two approaches: a 10-class model with the final IntClust label predicted by a single multiclass model and a 6-class model with binary reclassification in which four pairs of IntClusts were combined for initial multiclass classification. Performance was validated on the TCGA dataset, with copy number data generated from both SNP arrays and WES platforms. CopyClust achieved 81% and 79% overall accuracy with the TCGA SNP and WES datasets, respectively, a nine-percentage point or greater improvement in overall IntClust subtype classification accuracy. CopyClust achieves a significant improvement over current methods in classification accuracy of IntClust subtypes for samples without available gene expression data and is an easily implementable algorithm for IntClust classification of breast cancer samples with copy number data.
000269770 536__ $$0G:(DE-HGF)POF4-354$$a354 - Disease Prevention and Healthy Aging (POF4-354)$$cPOF4-354$$fPOF IV$$x0
000269770 588__ $$aDataset connected to CrossRef, PubMed, , Journals: pub.dzne.de
000269770 650_2 $$2MeSH$$aHumans
000269770 650_2 $$2MeSH$$aBreast Neoplasms: genetics
000269770 650_2 $$2MeSH$$aBreast Neoplasms: classification
000269770 650_2 $$2MeSH$$aMachine Learning
000269770 650_2 $$2MeSH$$aFemale
000269770 650_2 $$2MeSH$$aDNA Copy Number Variations: genetics
000269770 650_2 $$2MeSH$$aAlgorithms
000269770 650_2 $$2MeSH$$aCluster Analysis
000269770 650_2 $$2MeSH$$aGene Expression Profiling: methods
000269770 7001_ $$aEason, Katherine$$b1
000269770 7001_ $$aManzano Garcia, Raquel$$b2
000269770 7001_ $$aMoulange, Richard$$b3
000269770 7001_ $$0P:(DE-2719)2811372$$aMukherjee, Sach$$b4$$udzne
000269770 7001_ $$aChin, Suet-Feung$$b5
000269770 7001_ $$aCaldas, Carlos$$b6
000269770 7001_ $$aRueda, Oscar M$$b7
000269770 773__ $$0PERI:(DE-600)2615211-3$$a10.1038/s41598-024-62724-6$$gVol. 14, no. 1, p. 11861$$n1$$p11861$$tScientific reports$$v14$$x2045-2322$$y2024
000269770 8564_ $$uhttps://pub.dzne.de/record/269770/files/DZNE-2024-00612.pdf$$yOpenAccess
000269770 8564_ $$uhttps://pub.dzne.de/record/269770/files/DZNE-2024-00612.pdf?subformat=pdfa$$xpdfa$$yOpenAccess
000269770 909CO $$ooai:pub.dzne.de:269770$$pdnbdelivery$$pdriver$$pVDB$$popen_access$$popenaire
000269770 9101_ $$0I:(DE-588)1065079516$$6P:(DE-2719)2811372$$aDeutsches Zentrum für Neurodegenerative Erkrankungen$$b4$$kDZNE
000269770 9131_ $$0G:(DE-HGF)POF4-354$$1G:(DE-HGF)POF4-350$$2G:(DE-HGF)POF4-300$$3G:(DE-HGF)POF4$$4G:(DE-HGF)POF$$aDE-HGF$$bGesundheit$$lNeurodegenerative Diseases$$vDisease Prevention and Healthy Aging$$x0
000269770 9141_ $$y2024
000269770 915__ $$0StatID:(DE-HGF)0200$$2StatID$$aDBCoverage$$bSCOPUS$$d2023-08-24
000269770 915__ $$0LIC:(DE-HGF)CCBYNV$$2V:(DE-HGF)$$aCreative Commons Attribution CC BY (No Version)$$bDOAJ$$d2023-04-12T15:11:06Z
000269770 915__ $$0StatID:(DE-HGF)1050$$2StatID$$aDBCoverage$$bBIOSIS Previews$$d2023-08-24
000269770 915__ $$0StatID:(DE-HGF)1190$$2StatID$$aDBCoverage$$bBiological Abstracts$$d2023-08-24
000269770 915__ $$0StatID:(DE-HGF)0600$$2StatID$$aDBCoverage$$bEbsco Academic Search$$d2023-08-24
000269770 915__ $$0StatID:(DE-HGF)1040$$2StatID$$aDBCoverage$$bZoological Record$$d2023-08-24
000269770 915__ $$0StatID:(DE-HGF)0100$$2StatID$$aJCR$$bSCI REP-UK : 2022$$d2023-08-24
000269770 915__ $$0StatID:(DE-HGF)0160$$2StatID$$aDBCoverage$$bEssential Science Indicators$$d2023-08-24
000269770 915__ $$0StatID:(DE-HGF)0501$$2StatID$$aDBCoverage$$bDOAJ Seal$$d2023-04-12T15:11:06Z
000269770 915__ $$0StatID:(DE-HGF)0500$$2StatID$$aDBCoverage$$bDOAJ$$d2023-04-12T15:11:06Z
000269770 915__ $$0StatID:(DE-HGF)0113$$2StatID$$aWoS$$bScience Citation Index Expanded$$d2023-08-24
000269770 915__ $$0StatID:(DE-HGF)0700$$2StatID$$aFees$$d2023-08-24
000269770 915__ $$0StatID:(DE-HGF)0150$$2StatID$$aDBCoverage$$bWeb of Science Core Collection$$d2023-08-24
000269770 915__ $$0StatID:(DE-HGF)9900$$2StatID$$aIF < 5$$d2023-08-24
000269770 915__ $$0StatID:(DE-HGF)0510$$2StatID$$aOpenAccess
000269770 915__ $$0StatID:(DE-HGF)0030$$2StatID$$aPeer Review$$bASC$$d2023-08-24
000269770 915__ $$0StatID:(DE-HGF)0561$$2StatID$$aArticle Processing Charges$$d2023-08-24
000269770 915__ $$0StatID:(DE-HGF)1150$$2StatID$$aDBCoverage$$bCurrent Contents - Physical, Chemical and Earth Sciences$$d2023-08-24
000269770 915__ $$0StatID:(DE-HGF)0300$$2StatID$$aDBCoverage$$bMedline$$d2023-08-24
000269770 915__ $$0StatID:(DE-HGF)0320$$2StatID$$aDBCoverage$$bPubMed Central$$d2023-08-24
000269770 915__ $$0StatID:(DE-HGF)0199$$2StatID$$aDBCoverage$$bClarivate Analytics Master Journal List$$d2023-08-24
000269770 915__ $$0StatID:(DE-HGF)0030$$2StatID$$aPeer Review$$bDOAJ : Anonymous peer review$$d2023-04-12T15:11:06Z
000269770 9201_ $$0I:(DE-2719)1013030$$kAG Mukherjee$$lStatistics and Machine Learning$$x0
000269770 980__ $$ajournal
000269770 980__ $$aVDB
000269770 980__ $$aUNRESTRICTED
000269770 980__ $$aI:(DE-2719)1013030
000269770 9801_ $$aFullTexts