@article{585f5a3efdd343459225bd45fa718d04,
title = "Inadvertent paralog inclusion drives artifactual topologies and timetree estimates in phylogenomics",
abstract = "Increasingly, large phylogenomic data sets include transcriptomic data from nonmodel organisms. This not only has allowed controversial and unexplored evolutionary relationships in the tree of life to be addressed but also increases the risk of inadvertent inclusion of paralogs in the analysis. Although this may be expected to result in decreased phylogenetic support, it is not clear if it could also drive highly supported artifactual relationships. Many groups, including the hyperdiverse Lissamphibia, are especially susceptible to these issues due to ancient gene duplication events and small numbers of sequenced genomes and because transcriptomes are increasingly applied to resolve historically conflicting taxonomic hypotheses. We tested the potential impact of paralog inclusion on the topologies and timetree estimates of the Lissamphibia using published and de novo sequencing data including 18 amphibian species, from which 2,656 single-copy gene families were identified. A novel paralog filtering approach resulted in four differently curated data sets, which were used for phylogenetic reconstructions using Bayesian inference, maximum likelihood, and quartet-based supertrees. We found that paralogs drive strongly supported conflicting hypotheses within the Lissamphibia (Batrachia and Procera) and older divergence time estimates even within groups where no variation in topology was observed. All investigated methods, except Bayesian inference with the CAT-GTR model, were found to be sensitive to paralogs, but with filtering convergence to the same answer (Batrachia) was observed. This is the first large-scale study to address the impact of orthology selection using transcriptomic data and emphasizes the importance of quality over quantity particularly for understanding relationships of poorly sampled taxa.",
keywords = "phylogenomics, orthology, paralogy, lissamphibia, timetree, Orthology, Paralogy, Lissamphibia, Timetree, Phylogenomics, Gene Duplication, Transcriptome, Phylogeny, Genetic Techniques, Amphibians/genetics, Animals",
author = "{Siu Ting}, Karen and Mar{\'i}a Torres-S{\'a}nchez and {San Mauro}, Diego and David Wilcockson and Mark Wilkinson and Davide Pisani and O'Connell, {Mary J.} and Christopher Creevey",
note = "Funding Information: The authors would like to thank the handling editor and three anonymous reviewers who provided insightful and constructive comments that helped to improve earlier versions of this manuscript, Sharon Huws (QUB) and Matthew Hegarty (IBERS, AU) for their support and advice in different stages of the wetlab work; and Martin Swain and the Bioinformatics Core at IBERS (AU) for their support and access to AU HPC clusters and HPC Wales. We would also like to thank Pablo Venegas, German Chavez (CORBIDI) for logistic support in Peru, and Julianne Diller, Moro Modena and his family (Panguana) for their welcoming accommodation and access to their facilities. K.S.-T. and C.J.C. are indebted to Edgar Siu Ting and Amelia Salvatierra (Lima, Peru) for their constant support in the logistics and paperwork necessary for this research. Research permits were issued by SERFOR (Authorisations No. 083-2015-SERFOR-DGGSPFFS and 119-2017-SERFOR/DGGSPFFS). This work was supported by a fellowship from the Irish Research Council–Marie Sklodowska-Curie cofund program (ELEVATEPD/2014/69 to K.S.-T.); Biotechnology and Biological Sciences Research Council (BB/E/W/10964A01 and BBS/OS/GC/000011B to C.J.C.); Ministry of Economy and Competitiveness of Spain (RYC-2011-09321 and CGL2012-40082 to D.S.M; BES-2013-062723 FPI predoctoral fellowship and EEBB-I-15-09665 research stay to M.T.-S.). M.W.{\textquoteright}s fieldwork was supported by the Natural History Museum, London. All de novo sequenced data from this work are available in NCBI under the Bioproject IDs PRJNA387587 and PRJNA430346. Alignments, resulting trees, methods employed in the simulations and simulated data are available at the Open Science Framework website at https://osf.io/hv5bk/. Funding Information: The authors would like to thank the handling editor and three anonymous reviewers who provided insightful and constructive comments that helped to improve earlier versions of this manuscript, Sharon Huws (QUB) and Matthew Hegarty (IBERS, AU) for their support and advice in different stages of the wetlab work; and Martin Swain and the Bioinformatics Core at IBERS (AU) for their support and access to AU HPC clusters and HPC Wales. We would also like to thank Pablo Venegas, German Ch{\'a}vez (CORBIDI) for logistic support in Peru, and Julianne Diller, Moro M{\'o}dena and his family (Panguana) for their welcoming accommodation and access to their facilities. K.S.-T. and C.J.C. are indebted to Edgar Siu Ting and Amelia Salvatierra (Lima, Peru) for their constant support in the logistics and paperwork necessary for this research. Research permits were issued by SERFOR (Authorisations No. 083-2015-SERFOR-DGGSPFFS and 119-2017-SERFOR/DGGSPFFS). This work was supported by a fellowship from the Irish Research Council–Marie Sklodowska-Curie cofund program (ELEVATEPD/2014/69 to K.S.-T.); Biotechnology and Biological Sciences Research Council (BB/E/W/10964A01 and BBS/OS/GC/000011B to C.J.C.); Ministry of Economy and Competitiveness of Spain (RYC-2011-09321 and CGL2012-40082 to D.S.M; BES-2013-062723 FPI predoctoral fellowship and EEBB-I-15-09665 research stay to M.T.-S.). M.W.{\textquoteright}s fieldwork was supported by the Natural History Museum, London. All de novo sequenced data from this work are available in NCBI under the Bioproject IDs PRJNA387587 and PRJNA430346. Alignments, resulting trees, methods employed in the simulations and simulated data are available at the Open Science Framework website at https:// osf.io/hv5bk/. Publisher Copyright: {\textcopyright} The Author(s) 2019.",
year = "2019",
month = jun,
day = "1",
doi = "10.1093/molbev/msz067",
language = "English",
volume = "36",
pages = "1344--1356",
journal = "Molecular Biology and Evolution",
issn = "0737-4038",
publisher = "Oxford University Press",
number = "6",
}