@article{murdoch4906, volume = {2}, number = {6}, author = {T. Imanishi and T. Itoh and Y. Suzuki and C. O'Donovan and S. Fukuchi and K.O. Koyanagi and R.A. Barrero and T. Tamura and Y. Yamaguchi-Kabata and M. Tanino and K. Yura and S. Miyazaki and K. Ikeo and K. Homma and A. Kasprzyk and T. Nishikawa and M. Hirakawa and J. Thierry-Mieg and D. Thierry-Mieg and J. Ashurst and L. Jia and M. Nakao and M.A. Thomas and N. Mulder and Y. Karavidopoulou and L. Jin and S. Kim and T. Yasuda and B. Lenhard and E. Eveno and Y. Suzuki and C. Yamasaki and J-I Takeda and C. Gough and P. Hilton and Y. Fujii and H. Sakai and S. Tanaka and Clara Amid and M. Bellgard and M. Bonaldo and H. Bono and S.K. Bromberg and A.J. Brookes and E. Bruford and P. Carninci and C. Chelala and C. Couillault and S. Souza and M. Debily and M. Devignes and I. Dubchak and T. Endo and A. Estreicher and E. Eyras and K. Fukami-Kobayashi and G. R. Gopinath and E. Graudens and Y. Hahn and M. Han and Z-G. Han and K. Hanada and H. Hanaoka and E. Harada and K. Hashimoto and U. Hinz and M. Hirai and T. Hishiki and I. Hopkinson and S. Imbeaud and H. Inoko and A. Kanapin and Y. Kaneko and T. Kasukawa and J. Kelso and P. Kersey and R. Kikuno and K. Kimura and B. Korn and V. Kuryshev and I. Makalowska and T. Makino and S. Mano and R. Mariage-Samson and J. Mashima and H. Matsuda and H-W Mewes and S. Minoshima and K. Nagai and H. Nagasaki and N. Nagata and R. Nigam and O. Ogasawara and O. Ohara and M. Ohtsubo and N. Okada and T. Okido and S. Oota and M. Ota and T. Ota and T. Otsuki and D. Piatier-Tonneau and A. Poustka and S-X Ren and N. Saitou and K. Sakai and S. Sakamoto and R. Sakate and I. Schupp and F. Servant and S. Sherry and R. Shiba and N. Shimizu and M. Shimoyama and A.J. Simpson and B. Soares and C. Steward and M. Suwa and M. Suzuki and A. Takahashi and G. Tamiya and H. Tanaka and T. Taylor and J.D. Terwilliger and P. Unneberg and V. Veeramachaneni and S. Watanabe and L. Wilming and N. Yasuda and H-S Yoo and M. Stodolsky and W. Makalowski and M. Go and K. Nakai and T. Takagi and M. Kanehisa and Y. Sakaki and J. Quackenbush and Y. Okazaki and Y. Hayashizaki and W. Hide and R. Chakraborty and K. Nishikawa and H. Sugawara and Y. Tateno and Z. Chen and M. Oishi and P. Tonellato and R. Apweiler and K. Okubo and L. Wagner and S. Wiemann and R.L. Strausberg and T. Isogai and C. Auffray and N. Nomura and T. Gojobori and S. Sugano}, note = {This is an open-access article distributed under the terms of the Creative Commons Attribution License, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.}, title = {Integrative annotation of 21,037 human genes validated by full-length cDNA clones}, publisher = {Public Library of Science}, doi = {10.1371/journal.pbio.0020162}, year = {2004}, journal = {PLoS Biology}, pages = {e162}, url = {https://researchrepository.murdoch.edu.au/id/eprint/4906/}, abstract = {The human genome sequence defines our inherent biological potential; the realization of the biology encoded therein requires knowledge of the function of each gene. Currently, our knowledge in this area is still limited. Several lines of investigation have been used to elucidate the structure and function of the genes in the human genome. Even so, gene prediction remains a difficult task, as the varieties of transcripts of a gene may vary to a great extent. We thus performed an exhaustive integrative characterization of 41,118 full-length cDNAs that capture the gene transcripts as complete functional cassettes, providing an unequivocal report of structural and functional diversity at the gene level. Our international collaboration has validated 21,037 human gene candidates by analysis of high-quality full-length cDNA clones through curation using unified criteria. This led to the identification of 5,155 new gene candidates. It also manifested the most reliable way to control the quality of the cDNA clones. We have developed a human gene database, called the H-Invitational Database (H-InvDB; http://www.h-invitational.jp/). It provides the following: integrative annotation of human genes, description of gene structures, details of novel alternative splicing isoforms, non-protein-coding RNAs, functional domains, subcellular localizations, metabolic pathways, predictions of protein three-dimensional structure, mapping of known single nucleotide polymorphisms (SNPs), identification of polymorphic microsatellite repeats within human genes, and comparative results with mouse full-length cDNAs. The H-InvDB analysis has shown that up to 4\% of the human genome sequence (National Center for Biotechnology Information build 34 assembly) may contain misassembled or missing regions. We found that 6.5\% of the human gene candidates (1,377 loci) did not have a good protein-coding open reading frame, of which 296 loci are strong candidates for non-protein-coding RNA genes. In addition, among 72,027 uniquely mapped SNPs and insertions/deletions localized within human genes, 13,215 nonsynonymous SNPs, 315 nonsense SNPs, and 452 indels occurred in coding regions. Together with 25 polymorphic microsatellite repeats present in coding regions, they may alter protein structure, causing phenotypic effects or resulting in disease. The H-InvDB platform represents a substantial contribution to resources needed for the exploration of human biology and pathology.} }