Seq_id.cpp
上传用户:yhdzpy8989
上传日期:2007-06-13
资源大小:13604k
文件大小:54k
- /*
- * ===========================================================================
- * PRODUCTION $Log: Seq_id.cpp,v $
- * PRODUCTION Revision 1000.4 2004/06/01 19:34:28 gouriano
- * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R6.81
- * PRODUCTION
- * ===========================================================================
- */
- /* $Id: Seq_id.cpp,v 1000.4 2004/06/01 19:34:28 gouriano Exp $
- * ===========================================================================
- *
- * PUBLIC DOMAIN NOTICE
- * National Center for Biotechnology Information
- *
- * This software/database is a "United States Government Work" under the
- * terms of the United States Copyright Act. It was written as part of
- * the author's official duties as a United States Government employee and
- * thus cannot be copyrighted. This software/database is freely available
- * to the public for use. The National Library of Medicine and the U.S.
- * Government have not placed any restriction on its use or reproduction.
- *
- * Although all reasonable efforts have been taken to ensure the accuracy
- * and reliability of the software and data, the NLM and the U.S.
- * Government do not and cannot warrant the performance or results that
- * may be obtained by using this software or data. The NLM and the U.S.
- * Government disclaim all warranties, express or implied, including
- * warranties of performance, merchantability or fitness for any particular
- * purpose.
- *
- * Please cite the author in any work or product based on this material.
- *
- * ===========================================================================
- *
- * Author: .......
- *
- * File Description:
- * .......
- *
- * Remark:
- * This code was originally generated by application DATATOOL
- * using specifications from the ASN data definition file
- * 'seqloc.asn'.
- */
- // standard includes
- // generated includes
- #include <ncbi_pch.hpp>
- #include <corelib/ncbiutil.hpp>
- #include <objects/seq/Bioseq.hpp>
- #include <objects/seq/Seq_inst.hpp>
- #include <objects/seqloc/Seq_id.hpp>
- #include <objects/seqloc/Textseq_id.hpp>
- #include <objects/seqloc/Giimport_id.hpp>
- #include <objects/seqloc/Patent_seq_id.hpp>
- #include <objects/seqloc/PDB_seq_id.hpp>
- #include <objects/biblio/Id_pat.hpp>
- #include <objects/general/Object_id.hpp>
- #include <objects/general/Dbtag.hpp>
- #include <objects/general/Date.hpp>
- #include <objects/general/Date_std.hpp>
- #include <objects/general/Date.hpp>
- // generated classes
- BEGIN_NCBI_SCOPE
- BEGIN_objects_SCOPE // namespace ncbi::objects::
- // constructor
- CSeq_id::CSeq_id(void)
- {
- return;
- }
- // destructor
- CSeq_id::~CSeq_id(void)
- {
- return;
- }
- const CTextseq_id* CSeq_id::GetTextseq_Id(void) const
- {
- switch ( Which() ) {
- case e_Genbank:
- return &GetGenbank();
- case e_Embl:
- return &GetEmbl();
- case e_Ddbj:
- return &GetDdbj();
- case e_Pir:
- return &GetPir();
- case e_Swissprot:
- return &GetSwissprot();
- case e_Other:
- return &GetOther();
- case e_Prf:
- return &GetPrf();
- case e_Tpg:
- return &GetTpg();
- case e_Tpe:
- return &GetTpe();
- case e_Tpd:
- return &GetTpd();
- default:
- return 0;
- }
- }
- inline
- void x_Assign(CObject_id& dst, const CObject_id& src)
- {
- switch ( src.Which() ) {
- case CObject_id::e_not_set:
- dst.Reset();
- return;
- case CObject_id::e_Id:
- dst.SetId(src.GetId());
- return;
- case CObject_id::e_Str:
- dst.SetStr(src.GetStr());
- return;
- default:
- THROW1_TRACE(runtime_error, "invalid Object-id variant");
- }
- }
- inline
- void x_Assign(CGiimport_id& dst, const CGiimport_id& src)
- {
- dst.SetId(src.GetId());
- if ( src.IsSetDb() ) {
- dst.SetDb(src.GetDb());
- }
- else {
- dst.ResetDb();
- }
- if ( src.IsSetRelease() ) {
- dst.SetRelease(src.GetRelease());
- }
- else {
- dst.ResetRelease();
- }
- }
- inline
- void x_Assign(CTextseq_id& dst, const CTextseq_id& src)
- {
- if ( src.IsSetName() ) {
- dst.SetName(src.GetName());
- }
- else {
- dst.ResetName();
- }
- if ( src.IsSetAccession() ) {
- dst.SetAccession(src.GetAccession());
- }
- else {
- dst.ResetAccession();
- }
- if ( src.IsSetRelease() ) {
- dst.SetRelease(src.GetRelease());
- }
- else {
- dst.ResetRelease();
- }
- if ( src.IsSetVersion() ) {
- dst.SetVersion(src.GetVersion());
- }
- else {
- dst.ResetVersion();
- }
- }
- inline
- void x_Assign(CDbtag& dst, const CDbtag& src)
- {
- dst.SetDb(src.GetDb());
- x_Assign(dst.SetTag(), src.GetTag());
- }
- inline
- void x_Assign(CPatent_seq_id& dst, const CPatent_seq_id& src)
- {
- dst.SetSeqid(src.GetSeqid());
- dst.SetCit().Assign(src.GetCit());
- }
- inline
- void x_Assign(CDate& dst, const CDate& src)
- {
- dst.Assign(src);
- }
- inline
- void x_Assign(CPDB_seq_id& dst, const CPDB_seq_id& src)
- {
- dst.SetMol().Set(src.GetMol());
- if ( src.IsSetChain() ) {
- dst.SetChain(src.GetChain());
- }
- else {
- dst.ResetChain();
- }
- if ( src.IsSetRel() ) {
- dst.SetRel().Assign(src.GetRel());
- }
- else {
- dst.ResetRel();
- }
- }
- void CSeq_id::Assign(const CSerialObject& obj, ESerialRecursionMode how)
- {
- if ( GetTypeInfo() == obj.GetThisTypeInfo() ) {
- const CSeq_id& id = static_cast<const CSeq_id&>(obj);
- switch ( id.Which() ) {
- case e_not_set:
- Reset();
- return;
- case e_Local:
- x_Assign(SetLocal(), id.GetLocal());
- return;
- case e_Gibbsq:
- SetGibbsq(id.GetGibbsq());
- return;
- case e_Gibbmt:
- SetGibbmt(id.GetGibbmt());
- return;
- case e_Giim:
- x_Assign(SetGiim(), id.GetGiim());
- return;
- case e_Pir:
- x_Assign(SetPir(), id.GetPir());
- return;
- case e_Swissprot:
- x_Assign(SetSwissprot(), id.GetSwissprot());
- return;
- case e_Patent:
- x_Assign(SetPatent(), id.GetPatent());
- return;
- case e_Other:
- x_Assign(SetOther(), id.GetOther());
- return;
- case e_General:
- x_Assign(SetGeneral(), id.GetGeneral());
- return;
- case e_Gi:
- SetGi(id.GetGi());
- return;
- case e_Prf:
- x_Assign(SetPrf(), id.GetPrf());
- return;
- case e_Pdb:
- x_Assign(SetPdb(), id.GetPdb());
- return;
- case e_Genbank:
- x_Assign(SetGenbank(), id.GetGenbank());
- return;
- case e_Embl:
- x_Assign(SetEmbl(), id.GetEmbl());
- return;
- case e_Ddbj:
- x_Assign(SetDdbj(), id.GetDdbj());
- return;
- case e_Tpg:
- x_Assign(SetTpg(), id.GetTpg());
- return;
- case e_Tpe:
- x_Assign(SetTpe(), id.GetTpe());
- return;
- case e_Tpd:
- x_Assign(SetTpd(), id.GetTpd());
- return;
- }
- }
- CSerialObject::Assign(obj, how);
- }
- // Compare() - are SeqIds equivalent?
- CSeq_id::E_SIC CSeq_id::Compare(const CSeq_id& sid2) const
- {
- if ( Which() != sid2.Which() ) { // Only one case where this will work
- const CTextseq_id *tsip1 = GetTextseq_Id();
- if ( !tsip1 )
- return e_DIFF;
- const CTextseq_id *tsip2 = sid2.GetTextseq_Id();
- if ( !tsip2 )
- return e_DIFF;
- if ( tsip1->Match(*tsip2) ) // id Textseq_id match
- return e_YES;
- else
- return e_NO;
- }
- switch ( Which() ) { // Now we only need to know one
- case e_Local:
- return GetLocal().Match(sid2.GetLocal()) ? e_YES : e_NO;
- case e_Gibbsq:
- return GetGibbsq() == sid2.GetGibbsq() ? e_YES : e_NO;
- case e_Gibbmt:
- return GetGibbmt() == sid2.GetGibbmt() ? e_YES : e_NO;
- case e_Giim:
- return GetGiim().GetId() == sid2.GetGiim().GetId() ? e_YES : e_NO;
- case e_Pir:
- return GetPir().Match(sid2.GetPir()) ? e_YES : e_NO;
- case e_Swissprot:
- return GetSwissprot().Match(sid2.GetSwissprot()) ? e_YES : e_NO;
- case e_Patent:
- return GetPatent().Match(sid2.GetPatent()) ? e_YES : e_NO;
- case e_Other:
- return GetOther().Match(sid2.GetOther()) ? e_YES : e_NO;
- case e_General:
- return GetGeneral().Match(sid2.GetGeneral()) ? e_YES : e_NO;
- case e_Gi:
- return GetGi() == sid2.GetGi() ? e_YES : e_NO;
- case e_Prf:
- return GetPrf().Match(sid2.GetPrf()) ? e_YES : e_NO;
- case e_Pdb:
- return GetPdb().Match(sid2.GetPdb()) ? e_YES : e_NO;
- case e_Genbank:
- return GetGenbank().Match(sid2.GetGenbank()) ? e_YES : e_NO;
- case e_Embl:
- return GetEmbl().Match(sid2.GetEmbl()) ? e_YES : e_NO;
- case e_Ddbj:
- return GetDdbj().Match(sid2.GetDdbj()) ? e_YES : e_NO;
- case e_Tpg:
- return GetTpg().Match(sid2.GetTpg()) ? e_YES : e_NO;
- case e_Tpe:
- return GetTpe().Match(sid2.GetTpe()) ? e_YES : e_NO;
- case e_Tpd:
- return GetTpd().Match(sid2.GetTpd()) ? e_YES : e_NO;
- default:
- return e_error;
- }
- }
- int CSeq_id::CompareOrdered(const CSeq_id& sid2) const
- {
- int ret = Which() - sid2.Which();
- if ( ret != 0 ) {
- return ret;
- }
- const CTextseq_id *tsip1 = GetTextseq_Id();
- const CTextseq_id *tsip2 = sid2.GetTextseq_Id();
- if ( tsip1 && tsip2 ) {
- return tsip1->Compare(*tsip2);
- }
- switch ( Which() ) { // Now we only need to know one
- case e_Local:
- return GetLocal().Compare(sid2.GetLocal());
- case e_Gibbsq:
- return GetGibbsq() - sid2.GetGibbsq();
- case e_Gibbmt:
- return GetGibbmt() - sid2.GetGibbmt();
- case e_Giim:
- return GetGiim().GetId() - sid2.GetGiim().GetId();
- case e_Patent:
- return GetPatent().Compare(sid2.GetPatent());
- case e_General:
- return GetGeneral().Compare(sid2.GetGeneral());
- case e_Gi:
- return GetGi() - sid2.GetGi();
- case e_Pdb:
- return 0;
- default:
- return this == &sid2? 0: this < &sid2? -1: 1;
- }
- }
- static const char* const s_TextId[20] = { // FASTA_LONG formats
- "???" , // not-set = ???
- "lcl", // local = lcl|integer or string
- "bbs", // gibbsq = bbs|integer
- "bbm", // gibbmt = bbm|integer
- "gim", // giim = gim|integer
- "gb", // genbank = gb|accession|locus
- "emb", // embl = emb|accession|locus
- "pir", // pir = pir|accession|name
- "sp", // swissprot = sp|accession|name
- "pat", // patent = pat|country|patent number (string)|seq number (integer)
- "ref", // other = ref|accession|name|release - changed from oth to ref
- "gnl", // general = gnl|database(string)|id (string or number)
- "gi", // gi = gi|integer
- "dbj", // ddbj = dbj|accession|locus
- "prf", // prf = prf|accession|name
- "pdb", // pdb = pdb|entry name (string)|chain id (char)
- "tpg", // tpg = tpg|accession|name
- "tpe", // tpe = tpe|accession|name
- "tpd", // tpd = tpd|accession|name
- "" // Placeholder for end of list
- };
- CSeq_id::E_Choice CSeq_id::WhichInverseSeqId(const char* SeqIdCode)
- {
- int retval = 0;
- int dex;
- // Last item in list has null byte for first character, so
- // *s_TextId[dex] will be zero at end.
- for (dex = 0; *s_TextId[dex]; dex++) {
- if ( !NStr::CompareNocase(s_TextId[dex], SeqIdCode) ) {
- break;
- }
- }
- if ( !*s_TextId[dex] ) {
- retval = 0;
- } else {
- retval = dex;
- }
- return static_cast<CSeq_id_Base::E_Choice> (retval);
- }
- static CSeq_id::EAccessionInfo s_IdentifyNAcc(const string& acc)
- {
- _ASSERT(acc[0] == 'N');
- int n = NStr::StringToInt(acc.substr(1), 10, NStr::eCheck_Skip);
- if (n >= 20000) {
- return CSeq_id::eAcc_gb_est;
- } else { // big mess; fortunately, these are all secondary
- switch (n) {
- case 1: case 2: case 11: case 57:
- return CSeq_id::eAcc_gb_embl;
- case 3: case 4: case 6: case 7: case 10: case 14: case 15:
- case 16: case 17: case 21: case 23: case 24: case 26: case 29:
- case 30: case 31: case 32: case 33: case 34: case 36: case 38:
- case 39: case 40: case 42: case 43: case 44: case 45: case 47:
- case 49: case 50: case 51: case 55: case 56: case 59:
- return CSeq_id::eAcc_gb_ddbj;
- case 5: case 9: case 12: case 20: case 22: case 25: case 58:
- return CSeq_id::eAcc_gb_embl_ddbj;
- case 8: case 13: case 18: case 19: case 27: case 41: case 46:
- case 48: case 52: case 54: case 18624:
- return CSeq_id::eAcc_gb_other_nuc;
- case 28: case 35: case 37: case 53: case 61: case 62: case 63:
- case 65: case 66: case 67: case 68: case 69: case 78: case 79:
- case 83: case 88: case 90: case 91: case 92: case 93: case 94:
- return CSeq_id::eAcc_ddbj_other_nuc;
- case 60: case 64:
- return CSeq_id::eAcc_embl_other_nuc;
- case 70:
- return CSeq_id::eAcc_embl_ddbj;
- default: // unassigned or ambiguous
- return CSeq_id::eAcc_unknown;
- }
- }
- }
- CSeq_id::EAccessionInfo CSeq_id::IdentifyAccession(const string& acc)
- {
- SIZE_TYPE digit_pos = acc.find_first_of("0123456789");
- if (digit_pos == NPOS) {
- return eAcc_unknown;
- }
- SIZE_TYPE main_size = acc.find('.');
- if (main_size == NPOS) {
- main_size = acc.size();
- }
- string pfx = acc.substr(0, digit_pos);
- NStr::ToUpper(pfx);
- switch (pfx.size()) {
- case 0:
- if (acc.find_first_not_of("0123456789") == NPOS) { // just digits
- return eAcc_gi;
- } else if (main_size == 4 || (main_size > 4 && acc[4] == '|')) {
- return eAcc_pdb;
- } else {
- return eAcc_unknown;
- }
- case 1:
- switch (pfx[0]) {
- case 'A': return eAcc_embl_patent;
- case 'B': return eAcc_gb_gss;
- case 'C': return eAcc_ddbj_est;
- case 'D': return eAcc_ddbj_dirsub;
- case 'E': return eAcc_ddbj_patent;
- case 'F': return eAcc_embl_est;
- case 'G': return eAcc_gb_sts;
- case 'H': case 'R': case 'T': case 'W': return eAcc_gb_est;
- case 'I': return eAcc_gb_patent;
- case 'J': case 'K': case 'L': case 'M': return eAcc_gsdb_dirsub;
- case 'N': return s_IdentifyNAcc(acc);
- case 'O': case 'P': case 'Q': return eAcc_swissprot;
- case 'S': return eAcc_gb_backbone;
- case 'U': return eAcc_gb_dirsub;
- case 'V': case 'X': case 'Y': case 'Z': return eAcc_embl_dirsub;
- default: return eAcc_unreserved_nuc;
- }
- case 2:
- switch (pfx[0]) {
- case 'A':
- switch (pfx[1]) {
- case 'A': case 'I': case 'W': return eAcc_gb_est;
- case 'B': return eAcc_ddbj_dirsub;
- case 'C': return eAcc_gb_htgs;
- case 'D': return eAcc_gb_gsdb;
- case 'E': return eAcc_gb_genome;
- case 'F': case 'Y': return eAcc_gb_dirsub;
- case 'G': case 'P': return eAcc_ddbj_genome;
- case 'H': return eAcc_gb_con;
- case 'J': case 'M': return eAcc_embl_dirsub;
- case 'K': return eAcc_ddbj_htgs;
- case 'L': return eAcc_embl_genome;
- case 'N': return eAcc_embl_con;
- case 'Q': case 'Z': return eAcc_gb_gss;
- case 'R': return eAcc_gb_patent;
- case 'S': return eAcc_gb_other_nuc;
- case 'T': case 'U': case 'V': return eAcc_ddbj_est;
- case 'X': return eAcc_embl_patent;
- default: return eAcc_unreserved_nuc;
- }
- case 'B':
- switch (pfx[1]) {
- case 'A': return eAcc_ddbj_con;
- case 'B': case 'J': case 'P': case 'W':
- case 'Y': return eAcc_ddbj_est;
- case 'C': case 'T': return eAcc_gb_cdna;
- case 'D': return eAcc_ddbj_patent;
- case 'E': case 'F': case 'G': case 'I':
- case 'M': case 'Q': case 'U': return eAcc_gb_est;
- case 'H': case 'Z': return eAcc_gb_gss;
- case 'K': case 'L': return eAcc_gb_tpa_nuc;
- case 'N': return eAcc_embl_tpa_nuc;
- case 'R': return eAcc_ddbj_tpa_nuc;
- case 'S': return eAcc_ddbj_genome;
- // BS is actually chimp genomes.
- case 'V': return eAcc_gb_sts;
- case 'X': return eAcc_embl_genome;
- default: return eAcc_unreserved_nuc;
- }
- case 'C':
- switch (pfx[1]) {
- case 'A': case 'B': case 'D': case 'F':
- case 'K': case 'N': return eAcc_gb_est;
- case 'C': case 'E': case 'G': case 'L': return eAcc_gb_gss;
- case 'H': case 'M': return eAcc_gb_con;
- case 'I': case 'J': return eAcc_ddbj_est;
- // no specific assignment for CO-CP yet
- case 'O': case 'P': return eAcc_gb_other_nuc;
- case 'Q': return eAcc_embl_patent;
- case 'R': return eAcc_embl_genome;
- case 'S': case 'T': case 'U': return eAcc_embl_other_nuc;
- default: return eAcc_unreserved_nuc;
- }
- default: return eAcc_unreserved_nuc;
- }
- case 3:
- if (pfx[2] == '_') { // refseq-style
- if (pfx == "NC_") { return eAcc_refseq_chromosome; }
- else if (pfx == "NG_") { return eAcc_refseq_genomic; }
- else if (pfx == "NM_") { return eAcc_refseq_mrna; }
- else if (pfx == "NP_") { return eAcc_refseq_prot; }
- else if (pfx == "NR_") { return eAcc_refseq_ncrna; }
- else if (pfx == "NS_") { return eAcc_refseq_genome; /* ? */ }
- else if (pfx == "NT_") { return eAcc_refseq_contig; }
- else if (pfx == "NW_") { return eAcc_refseq_wgs_intermed; }
- // else if (pfx == "NZ_") { return eAcc_refseq_wgs_nuc; }
- else if (pfx == "XM_") { return eAcc_refseq_mrna_predicted; }
- else if (pfx == "XP_") { return eAcc_refseq_prot_predicted; }
- else if (pfx == "XR_") { return eAcc_refseq_ncrna_predicted; }
- else if (pfx == "ZP_") { return eAcc_refseq_wgs_prot; }
- else { return eAcc_refseq_unreserved; }
- } else { // protein
- switch (pfx[0]) {
- case 'A': return (pfx == "AAE") ? eAcc_gb_patent_prot
- : eAcc_gb_prot;
- case 'B': return eAcc_ddbj_prot;
- case 'C': return eAcc_embl_prot;
- case 'D': return eAcc_gb_tpa_prot;
- case 'E': return eAcc_gb_wgs_prot;
- case 'F': return eAcc_ddbj_tpa_prot;
- case 'G': return eAcc_ddbj_wgs_prot;
- default: return eAcc_unreserved_prot;
- }
- }
- case 4:
- switch (pfx[0]) {
- case 'A': return eAcc_gb_wgs_nuc;
- case 'B': return eAcc_ddbj_wgs_nuc;
- case 'C': return eAcc_embl_wgs_nuc;
- default: return eAcc_unknown;
- }
- case 7:
- if (NStr::StartsWith(acc, "NZ_")) {
- return eAcc_refseq_wgs_nuc;
- } else {
- return eAcc_unknown;
- }
- default:
- return eAcc_unknown;
- }
- }
- CSeq_id::EAccessionInfo CSeq_id::IdentifyAccession(void) const
- {
- EAccessionInfo type = (EAccessionInfo)Which();
- switch (type) {
- case CSeq_id::e_Pir: case CSeq_id::e_Swissprot: case CSeq_id::e_Prf:
- case CSeq_id::e_Pdb:
- return (EAccessionInfo)(type | fAcc_prot); // always just protein
-
- case CSeq_id::e_Genbank: case CSeq_id::e_Embl: case CSeq_id::e_Ddbj:
- case CSeq_id::e_Tpg: case CSeq_id::e_Tpe: case CSeq_id::e_Tpd:
- case CSeq_id::e_Other:
- {
- const CTextseq_id* tsid = GetTextseq_Id();
- if (tsid->IsSetAccession()) {
- EAccessionInfo ai = IdentifyAccession(tsid->GetAccession());
- if ((ai & eAcc_type_mask) == e_not_set) {
- // We *know* what the type should be....
- return (EAccessionInfo)((ai & eAcc_flag_mask) | type);
- } else if ((ai & eAcc_type_mask) == type) {
- return ai;
- } else { // misidentified or mislabeled; assume the former
- return type;
- }
- } else {
- return type;
- }
- }
-
- default:
- return type;
- }
- }
- static inline
- void x_GetLabel_Type(const CSeq_id& id, string* label,
- CSeq_id::TLabelFlags flags)
- {
- CSeq_id::E_Choice choice = id.Which();
- _ASSERT(choice < CSeq_id::e_MaxChoice);
- if (choice >= CSeq_id::e_MaxChoice) {
- return;
- }
- switch (choice) {
- default:
- *label += s_TextId[choice];
- break;
- case CSeq_id::e_General:
- // for general IDs, use the db-name only
- *label += "gnl";
- break;
- }
- // no extra flag interpretation currently
- }
- static inline
- void x_GetLabel_Content(const CSeq_id& id, string* label,
- CSeq_id::TLabelFlags flags)
- {
- const CTextseq_id* tsid = id.GetTextseq_Id();
- //text id
- if (tsid) {
- string str;
- if (tsid->IsSetAccession()) {
- str = tsid->GetAccession();
- } else if (tsid->IsSetName()) {
- str = tsid->GetName();
- }
- if ( !str.empty() ) {
- if ( (flags & CSeq_id::fLabel_Version) && tsid->IsSetVersion()) {
- str += "." + NStr::IntToString(tsid->GetVersion());
- }
- }
- *label += str;
- } else { //non-text id
- switch (id.Which()) {
- case CSeq_id::e_not_set:
- break;
- case CSeq_id::e_Local:
- {{
- const CObject_id& oid = id.GetLocal();
- if (oid.Which() == CObject_id::e_Id) {
- *label += NStr::IntToString(oid.GetId());
- } else if (oid.Which() == CObject_id::e_Str) {
- *label += oid.GetStr();
- }
- }}
- break;
- case CSeq_id::e_Gibbsq:
- *label += NStr::IntToString(id.GetGibbsq());
- break;
- case CSeq_id::e_Gibbmt:
- *label += NStr::IntToString(id.GetGibbmt());
- break;
- case CSeq_id::e_Giim:
- *label += NStr::IntToString(id.GetGiim().GetId());
- break;
- case CSeq_id::e_General:
- {{
- const CDbtag& dbt = id.GetGeneral();
- *label += dbt.GetDb() + '|';
- if (dbt.GetTag().Which() == CObject_id::e_Id) {
- *label += NStr::IntToString(dbt.GetTag().GetId());
- } else if (dbt.GetTag().Which()==CObject_id::e_Str) {
- *label += dbt.GetTag().GetStr();
- }
- }}
- break;
- case CSeq_id::e_Patent:
- {{
- const CId_pat& idp = id.GetPatent().GetCit();
- *label += idp.GetCountry() +
- (idp.GetId().IsNumber() ?
- idp.GetId().GetNumber() :
- idp.GetId().GetApp_number()) +
- NStr::IntToString(id.GetPatent().GetSeqid());
- }}
- break;
- case CSeq_id::e_Gi:
- *label += NStr::IntToString(id.GetGi());
- break;
- case CSeq_id::e_Pdb:
- {{
- const CPDB_seq_id& pid = id.GetPdb();
- char chain = (char)pid.GetChain();
- if (chain == '|') {
- *label += pid.GetMol().Get() + "|VB";
- } else if (islower(chain) != 0) {
- *label += pid.GetMol().Get() + "-" + (char) toupper(chain);
- } else if ( chain == ' ' ) {
- *label += pid.GetMol().Get() + "-";
- } else {
- *label += pid.GetMol().Get() + "-" + chain;
- }
- }}
- break;
- default:
- break;
- }
- }
- }
- void CSeq_id::GetLabel(string* label, ELabelType type, TLabelFlags flags) const
- {
- if ( !label ) {
- return;
- }
- switch (type) {
- case eFasta:
- *label = AsFastaString();
- break;
- case eBoth:
- x_GetLabel_Type(*this, label, flags);
- *label += "|";
- x_GetLabel_Content(*this, label, flags);
- break;
- case eType:
- x_GetLabel_Type(*this, label, flags);
- break;
- case eContent:
- x_GetLabel_Content(*this, label, flags);
- break;
- }
- }
- /*Return seqid string with optional version for text seqid type
- (default no version).*/
- string CSeq_id::GetSeqIdString(bool with_version) const
- {
- string label;
- TLabelFlags flags = 0;
- if (with_version) {
- flags |= fLabel_Version;
- }
- GetLabel(&label, eContent, flags);
- return label;
- }
- void CSeq_id::WriteAsFasta(ostream& out)
- const
- {
- E_Choice the_type = Which();
- if (the_type > e_Tpd) // New SeqId type
- the_type = e_not_set;
- out << s_TextId[the_type] << '|';
- switch (the_type) {
- case e_not_set:
- break;
- case e_Local:
- GetLocal().AsString(out);
- break;
- case e_Gibbsq:
- out << GetGibbsq();
- break;
- case e_Gibbmt:
- out << GetGibbmt();
- break;
- case e_Giim:
- out << (GetGiim().GetId());
- break;
- case e_Genbank:
- GetGenbank().AsFastaString(out);
- break;
- case e_Embl:
- GetEmbl().AsFastaString(out);
- break;
- case e_Pir:
- GetPir().AsFastaString(out);
- break;
- case e_Swissprot:
- GetSwissprot().AsFastaString(out);
- break;
- case e_Patent:
- GetPatent().AsFastaString(out);
- break;
- case e_Other:
- GetOther().AsFastaString(out);
- break;
- case e_General:
- {
- const CDbtag& dbt = GetGeneral();
- out << (dbt.GetDb()) << '|'; // no Upcase per Ostell - Karl 7/2001
- dbt.GetTag().AsString(out);
- }
- break;
- case e_Gi:
- out << GetGi();
- break;
- case e_Ddbj:
- GetDdbj().AsFastaString(out);
- break;
- case e_Prf:
- GetPrf().AsFastaString(out);
- break;
- case e_Pdb:
- GetPdb().AsFastaString(out);
- break;
- case e_Tpg:
- GetTpg().AsFastaString(out);
- break;
- case e_Tpe:
- GetTpe().AsFastaString(out);
- break;
- case e_Tpd:
- GetTpd().AsFastaString(out);
- break;
- default:
- out << "[UnknownSeqIdType]";
- break;
- }
- }
- const string CSeq_id::AsFastaString(void) const
- {
- CNcbiOstrstream str;
- WriteAsFasta(str);
- return CNcbiOstrstreamToString(str);
- }
- //
- // Local functions for producing a sequence ID 'score'
- // These functions produce scores in FastA order
- //
- static int s_ScoreNAForFasta(const CSeq_id* id)
- {
- switch (id->Which()) {
- case CSeq_id::e_not_set:
- case CSeq_id::e_Giim:
- case CSeq_id::e_Pir:
- case CSeq_id::e_Swissprot:
- case CSeq_id::e_Prf: return kMax_Int;
- case CSeq_id::e_Local: return 230;
- case CSeq_id::e_Gi: return 120;
- case CSeq_id::e_General: return 50;
- case CSeq_id::e_Patent: return 40;
- case CSeq_id::e_Gibbsq:
- case CSeq_id::e_Gibbmt:
- case CSeq_id::e_Pdb: return 30;
- case CSeq_id::e_Other: return 15;
- default: return 20; // [third party] GenBank/EMBL/DDBJ
- }
- }
- static int s_ScoreAAForFasta(const CSeq_id* id)
- {
- switch (id->Which()) {
- case CSeq_id::e_not_set:
- case CSeq_id::e_Giim: return kMax_Int;
- case CSeq_id::e_Local: return 230;
- case CSeq_id::e_Gi: return 120;
- case CSeq_id::e_General: return 90;
- case CSeq_id::e_Patent: return 80;
- case CSeq_id::e_Prf: return 70;
- case CSeq_id::e_Pdb: return 50;
- case CSeq_id::e_Gibbsq:
- case CSeq_id::e_Gibbmt: return 40;
- case CSeq_id::e_Pir: return 30;
- case CSeq_id::e_Swissprot: return 20;
- case CSeq_id::e_Other: return 15;
- default: return 60; // [third party] GenBank/EMBL/DDBJ
- }
- }
- //
- // GetStringDescr()
- // Given a bioseq, return the best possible ID description, in a number of
- // appealing formats. This function can produce FastA-formatted titles or a
- // number of sub-titles (GI only, Best Accession with or without version).
- //
- string CSeq_id::GetStringDescr(const CBioseq& bioseq, EStringFormat fmt)
- {
- bool is_na = bioseq.GetInst().GetMol() != CSeq_inst::eMol_aa;
- CRef<CSeq_id> best_id = FindBestChoice(bioseq.GetId(),
- is_na ? s_ScoreNAForFasta
- : s_ScoreAAForFasta);
- switch (fmt) {
- case eFormat_FastA:
- {{
- // FastA format
- // Here we have something like:
- // gi|###|SOME_ACCESSION|title
- bool found_gi = false;
- CNcbiOstrstream out_str;
- ITERATE (CBioseq::TId, id, bioseq.GetId()) {
- if ((*id)->IsGi()) {
- (*id)->WriteAsFasta(out_str);
- found_gi = true;
- break;
- }
- }
- if (best_id.NotEmpty() && best_id->Which() != CSeq_id::e_Gi) {
- if (found_gi) {
- out_str << '|';
- }
- best_id->WriteAsFasta(out_str);
- }
- return CNcbiOstrstreamToString(out_str);
- }}
- break;
- case eFormat_ForceGI:
- // eForceGI produces a string containing only the GI in FastA format
- // so we have:
- // gi|####
- ITERATE (CBioseq::TId, iter, bioseq.GetId()) {
- if ( (*iter)->IsGi() ) {
- CNcbiOstrstream out_str;
- (*iter)->WriteAsFasta(out_str);
- return CNcbiOstrstreamToString(out_str);
- }
- }
- break;
- case eFormat_BestWithVersion:
- // eBestWithVersion produces only the 'best' accession name, with
- // its version indicator
- if (best_id.NotEmpty()) {
- string label;
- best_id->GetLabel(&label, eDefault, fLabel_Version);
- return label;
- }
- break;
-
- case eFormat_BestWithoutVersion:
- // eBestWithoutVersion produces only the 'best' accession name,
- // without its version indicator
- if (best_id.NotEmpty()) {
- string label;
- best_id->GetLabel(&label, eDefault, 0);
- return label;
- }
- break;
- }
- // catch-all for unusual events
- return "";
- }
- CSeq_id::CSeq_id(const CDbtag& dbtag, bool set_as_general)
- {
- int version = -1;
- string acc;
- if (dbtag.GetTag().IsStr()) {
- acc = dbtag.GetTag().GetStr();
- string::size_type pos = acc.find_last_of(".");
- if (pos != string::npos) {
- version = NStr::StringToInt(acc.substr(pos + 1, acc.length() - pos));
- acc.erase(pos);
- }
- }
- switch (dbtag.GetType()) {
- case CDbtag::eDbtagType_GenBank:
- try {
- int gi = NStr::StringToInt(acc);
- SetGi(gi);
- }
- catch (...) {
- SetGenbank().SetAccession(acc);
- if (version != -1) {
- SetGenbank().SetVersion(version);
- }
- }
- break;
- case CDbtag::eDbtagType_EMBL:
- SetEmbl().SetAccession(acc);
- if (version != -1) {
- SetEmbl().SetVersion(version);
- }
- break;
- case CDbtag::eDbtagType_DDBJ:
- SetDdbj().SetAccession(acc);
- if (version != -1) {
- SetDdbj().SetVersion(version);
- }
- break;
- case CDbtag::eDbtagType_GI:
- if (dbtag.GetTag().IsStr()) {
- SetGi(NStr::StringToInt(dbtag.GetTag().GetStr()));
- } else {
- SetGi(dbtag.GetTag().GetId());
- }
- break;
- case CDbtag::eDbtagType_bad:
- default:
- // not understood as a sequence id - leave as e_not_set
- if (set_as_general) {
- SetGeneral().Assign(dbtag);
- }
- break;
- }
- }
- //SeqIdFastAConstructors
- CSeq_id::CSeq_id( const string& the_id )
- {
- // If no vertical bar, tries to interpret the string as a pure
- // accession, inferring the type from the initial letter(s).
- if (the_id.find('|') == NPOS) {
- SIZE_TYPE dot = the_id.find('.');
- string acc_in = the_id.substr(0, dot);
- EAccessionInfo info = IdentifyAccession(acc_in);
- int ver = 0;
- if (dot != NPOS) {
- ver = NStr::StringToNumeric(the_id.substr(dot + 1));
- }
- if (GetAccType(info) != e_not_set) {
- x_Init(GetAccType(info), acc_in, kEmptyStr, ver);
- }
- return;
- }
- // Create an istrstream on string the_id
- std::istrstream myin(the_id.c_str() );
- string the_type_in, acc_in, name_in, version_in, release_in;
- // Read the part of the_id up to the vertical bar ( "|" )
- NcbiGetline(myin, the_type_in, '|');
- // Remove spaces from front and back of the_type_in
- string the_type_use = NStr::TruncateSpaces(the_type_in, NStr::eTrunc_Both);
- // Determine the type from the string
- CSeq_id_Base::E_Choice the_type = WhichInverseSeqId(the_type_use.c_str());
- // Construct according to type
- if ( the_type == CSeq_id::e_Local ) {
- NcbiGetline( myin, acc_in, 0 ); // take rest
- x_Init( the_type, acc_in );
- return;
- }
- if ( !NcbiGetline( myin, acc_in, '|' ) )
- return;
- if ( the_type == CSeq_id::e_General || the_type == CSeq_id::e_Pdb ) {
- //Take the rest of the line
- NcbiGetline( myin, name_in, 0 );
- x_Init( the_type, acc_in, name_in );
- return;
- } else if ( the_type == CSeq_id::e_Gi ) {
- x_Init( the_type, acc_in );
- return;
- }
- if ( NcbiGetline(myin, name_in, '|') ) {
- if ( NcbiGetline(myin, version_in, '|') ) {
- NcbiGetline(myin, release_in, '|');
- }
- }
- string version = NStr::TruncateSpaces( version_in, NStr::eTrunc_Both );
- int ver = 0;
- if ( ! version.empty() ) {
- if ( (ver = NStr::StringToNumeric(version) ) < 0) {
- THROW1_TRACE(invalid_argument,
- "Unexpected non-numeric version: " +
- version +
- "nthe_id: " + the_id);
- }
- }
- x_Init(the_type, acc_in, name_in, ver, release_in);
- }
- // acc_in is just first string, as in text seqid, for
- // wierd cases (patents, pdb) not really an acc
- CSeq_id::CSeq_id
- (CSeq_id_Base::E_Choice the_type,
- const string& acc_in,
- const string& name_in,
- const string& version_in,
- const string& release_in )
- {
- string version = NStr::TruncateSpaces(version_in, NStr::eTrunc_Both);
- int ver = 0;
- if ( !version.empty() ) {
- if ( (ver = NStr::StringToNumeric(version)) < 0 ) {
- THROW1_TRACE(invalid_argument,
- "Unexpected non-numeric version. "
- "nthe_type = " + string(s_TextId[the_type]) +
- "nacc_in = " + acc_in +
- "nname_in = " + name_in +
- "version_in = " + version_in +
- "nrelease_in = " + release_in);
- }
- }
- x_Init(the_type, acc_in, name_in, ver, release_in);
- }
- static void s_InitThrow
- (const string& message,
- const string& type,
- const string& acc,
- const string& name,
- const string& version,
- const string& release)
- {
- THROW1_TRACE(invalid_argument,
- "CSeq_id:: " + message +
- "ntype = " + type +
- "naccession = " + acc +
- "nname = " + name +
- "nversion = " + version +
- "nrelease = " + release);
- }
- CSeq_id::CSeq_id
- (const string& the_type_in,
- const string& acc_in,
- const string& name_in,
- const string& version_in,
- const string& release_in)
- {
- string the_type_use = NStr::TruncateSpaces(the_type_in, NStr::eTrunc_Both);
- string version = NStr::TruncateSpaces(version_in, NStr::eTrunc_Both);
- int ver = 0;
- CSeq_id_Base::E_Choice the_type = WhichInverseSeqId(the_type_use.c_str());
- if ( !version.empty() ) {
- if ( (ver = NStr::StringToNumeric(version)) < 0) {
- s_InitThrow("Unexpected non-numeric version.",
- the_type_in, acc_in, name_in, version_in, release_in);
- }
- }
- x_Init(the_type, acc_in, name_in, ver, release_in);
- }
- CSeq_id::CSeq_id
- (const string& the_type_in,
- const string& acc_in,
- const string& name_in,
- int version,
- const string& release_in )
- {
- string the_type_use = NStr::TruncateSpaces(the_type_in, NStr::eTrunc_Both);
- CSeq_id_Base::E_Choice the_type = WhichInverseSeqId (the_type_use.c_str());
- x_Init(the_type, acc_in, name_in, version, release_in);
- }
- CSeq_id::CSeq_id
- ( CSeq_id_Base::E_Choice the_type,
- const string& acc_in,
- const string& name_in,
- int version,
- const string& release_in)
- {
- x_Init(the_type, acc_in, name_in, version, release_in);
- }
- CSeq_id::CSeq_id
- ( CSeq_id_Base::E_Choice the_type,
- int the_id)
- {
- if(the_id<=0)
- THROW1_TRACE(invalid_argument, "Specified Seq-id value is negative");
-
- switch (the_type) {
- case CSeq_id::e_Local:
- SetLocal().SetId(the_id);
- break;
- case CSeq_id::e_Gibbsq:
- SetGibbsq(the_id);
- break;
- case CSeq_id::e_Gibbmt:
- SetGibbmt(the_id);
- break;
- case CSeq_id::e_Giim:
- SetGiim().SetId(the_id);
- break;
- case CSeq_id::e_Gi:
- SetGi(the_id);
- break;
- default:
- THROW1_TRACE(invalid_argument, "Specified Seq-id type is not numeric seq-id");
- }
- }
- // Karl Sirotkin 7/2001
- void
- CSeq_id::x_Init
- ( CSeq_id_Base::E_Choice the_type,
- const string& acc_in,
- const string& name_in,
- int version ,
- const string& release_in)
- {
- int the_id;
- string acc = NStr::TruncateSpaces(acc_in, NStr::eTrunc_Both);
- string name = NStr::TruncateSpaces(name_in, NStr::eTrunc_Both);
- string release = NStr::TruncateSpaces(release_in, NStr::eTrunc_Both);
- switch (the_type) {
- case CSeq_id::e_not_set: // Will cause unspecified SeqId to be returned.
- break;
- case CSeq_id::e_Local:
- {
- CSeq_id::TLocal & loc = SetLocal();
- string::const_iterator it = acc.begin();
- if ( (the_id = NStr::StringToNumeric(acc)) >= 0 && *it != '0' ) {
- loc.SetId(the_id);
- } else { // to cover case where embedded vertical bar in
- // string, could add code here, to concat a
- // '|' and name string, if not null/empty
- loc.SetStr(acc);
- }
- break;
- }
- case CSeq_id::e_Gibbsq:
- if ( (the_id = NStr::StringToNumeric (acc)) >= 0 ) {
- SetGibbsq(the_id);
- } else {
- s_InitThrow("Unexpected non-numeric accession.",
- string(s_TextId[the_type]), acc_in, name_in,
- NStr::IntToString(version), release_in);
- }
- break;
- case CSeq_id::e_Gibbmt:
- if ( (the_id =NStr::StringToNumeric (acc)) >= 0 ) {
- SetGibbmt(the_id);
- } else {
- s_InitThrow("Unexpected non-numeric accession.",
- string(s_TextId[the_type]), acc_in, name_in,
- NStr::IntToString(version), release_in);
- }
- break;
- case CSeq_id::e_Giim:
- {
- CGiimport_id & giim = SetGiim();
- if ( (the_id =NStr::StringToNumeric (acc)) >= 0 ) {
- giim.SetId(the_id);
- } else {
- s_InitThrow("Unexpected non-numeric accession.",
- string(s_TextId[the_type]), acc_in, name_in,
- NStr::IntToString(version), release_in);
- }
- break;
- }
- case CSeq_id::e_Genbank:
- {
- CTextseq_id* text
- = new CTextseq_id(acc, name, version, release);
- SetGenbank(*text );
- break;
- }
- case CSeq_id::e_Embl:
- {
- CTextseq_id* text
- = new CTextseq_id(acc, name, version, release);
- SetEmbl(*text);
- break;
- }
- case CSeq_id::e_Pir:
- {
- CTextseq_id* text
- = new CTextseq_id(acc, name, version, release, false);
- SetPir(*text);
- break;
- }
- case CSeq_id::e_Swissprot:
- {
- CTextseq_id* text
- = new CTextseq_id(acc, name, version, release, false);
- SetSwissprot(*text);
- break;
- }
- case CSeq_id::e_Tpg:
- {
- CTextseq_id* text
- = new CTextseq_id(acc, name, version, release);
- SetTpg(*text);
- break;
- }
- case CSeq_id::e_Tpe:
- {
- CTextseq_id* text
- = new CTextseq_id(acc, name, version, release);
- SetTpe(*text);
- break;
- }
- case CSeq_id::e_Tpd:
- {
- CTextseq_id* text
- = new CTextseq_id(acc, name, version, release);
- SetTpd(*text);
- break;
- }
- case CSeq_id::e_Patent:
- {
- CPatent_seq_id& pat = SetPatent();
- CId_pat& id_pat = pat.SetCit();
- CId_pat::C_Id& id_pat_id = id_pat.SetId();
- id_pat.SetCountry(acc);
- const char app_str[] = "App=";
- const SIZE_TYPE app_str_len = sizeof(app_str) - 1;
- if (name.substr(0, app_str_len) == app_str) {
- id_pat_id.SetApp_number(name.substr(app_str_len));
- } else {
- id_pat_id.SetNumber(name);
- }
- pat.SetSeqid(version);
- break;
- }
- case CSeq_id::e_Other: // RefSeq, allow dot version
- {
- CTextseq_id* text
- = new CTextseq_id(acc,name,version,release);
- SetOther(*text);
- break;
- }
- case CSeq_id::e_General:
- {
- CDbtag& dbt = SetGeneral();
- dbt.SetDb(acc);
- CObject_id& oid = dbt.SetTag();
- the_id = NStr::StringToNumeric(name);
- if (the_id >= 0 && (name.size() == 1 || name[0] != '0')) {
- oid.SetId(the_id);
- }else{
- oid.SetStr(name);
- }
- break;
- }
- case CSeq_id::e_Gi:
- the_id = NStr::StringToNumeric(acc);
- if (the_id >= 0 ) {
- SetGi(the_id);
- } else {
- s_InitThrow("Unexpected non-numeric accession.",
- string(s_TextId[the_type]), acc_in, name_in,
- NStr::IntToString(version), release_in);
- }
- break;
- case CSeq_id::e_Ddbj:
- {
- CTextseq_id* text
- = new CTextseq_id(acc,name,version,release);
- SetDdbj(*text);
- break;
- }
- case CSeq_id::e_Prf:
- {
- CTextseq_id* text
- = new CTextseq_id(acc,name,version,release,false);
- SetPrf(*text);
- break;
- }
- case CSeq_id::e_Pdb:
- {
- CPDB_seq_id& pdb = SetPdb();
- CPDB_mol_id& pdb_mol = pdb.SetMol();
- pdb_mol.Set(acc);
- if (name.empty()) {
- pdb.SetChain(' ');
- } else if (name.size() == 1) {
- pdb.SetChain(static_cast<unsigned char> (name[0]));
- } else if ( name.compare("VB") == 0) {
- pdb.SetChain('|');
- } else if (name.size() == 2 && name[0] == name[1]) {
- pdb.SetChain( Locase(static_cast<unsigned char> (name[0])) );
- } else {
- s_InitThrow("Unexpected PDB chain id.",
- string(s_TextId[the_type]), acc_in, name_in,
- NStr::IntToString(version), release_in);
- }
- break;
- }
- default:
- THROW1_TRACE(invalid_argument, "Specified Seq-id type not supported");
- }
- }
- bool CSeq_id::Equals(const CSerialObject& object, ESerialRecursionMode how) const
- {
- if ( typeid(object) != typeid(*this) ) {
- ERR_POST(Fatal <<
- "CSeq_id::Assign() -- Assignment of incompatible types: " <<
- typeid(*this).name() << " = " << typeid(object).name());
- }
- return CSerialObject::Equals(object, how);
- }
- END_objects_SCOPE // namespace ncbi::objects::
- END_NCBI_SCOPE
- /*
- * ===========================================================================
- *
- * $Log: Seq_id.cpp,v $
- * Revision 1000.4 2004/06/01 19:34:28 gouriano
- * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R6.81
- *
- * Revision 6.81 2004/06/01 15:26:07 johnson
- * Make CompareOrdered a true model of strict weak ordering
- *
- * Revision 6.80 2004/05/28 20:09:44 johnson
- * Added Compare for seq-id type General (CDbtag)
- *
- * Revision 6.79 2004/05/21 14:41:46 dicuccio
- * Moved database tag for general IDs to content part of label
- *
- * Revision 6.78 2004/05/19 17:26:25 gorelenk
- * Added include of PCH - ncbi_pch.hpp
- *
- * Revision 6.77 2004/05/16 16:57:44 dicuccio
- * Removed insertion of db type in general seq-id labels of type content - led to
- * duplicates of type
- *
- * Revision 6.76 2004/05/14 14:34:02 dicuccio
- * Include database name in label content if the db-tag is just an integer
- *
- * Revision 6.75 2004/03/25 15:59:06 gouriano
- * Added possibility to copy and compare serial object non-recursively
- *
- * Revision 6.74 2004/03/22 16:24:19 ucko
- * CN is now specifically assigned to GenBank ESTs.
- *
- * Revision 6.73 2004/01/22 21:03:58 dicuccio
- * Separated functionality of enums in GetLabel() into discrete mode and flags
- *
- * Revision 6.72 2004/01/22 18:45:46 dicuccio
- * Added new API: CSeq_id::GetLabel(). Rewired GetSeqIdString() to feed into
- * GetLabel(). Rewired GetStringDescr() to feed into GetLabel() directly instead
- * of feeding through GetSeqIdString().
- *
- * Revision 6.71 2004/01/21 22:55:47 ucko
- * GetSeqIdString: drop the database name from general IDs for
- * compatibility with code that can't handle its presence.
- *
- * Revision 6.70 2004/01/21 18:04:20 dicuccio
- * Added ctor to create a seq-id from a given dbtag, performing conversion to
- * specific seq-id types where possible
- *
- * Revision 6.69 2004/01/20 16:59:38 ucko
- * CSeq_id::IdentifyAccession: identify IDs consisting solely of digits as GIs.
- *
- * Revision 6.68 2004/01/16 17:39:17 vasilche
- * Fixed parsing 'gnl|xxx|999' format - integer tag detection.
- *
- * Revision 6.67 2004/01/16 15:58:19 ucko
- * CM now specifically assigned to eAcc_gb_con.
- *
- * Revision 6.66 2003/12/18 18:55:59 ucko
- * CSeq_id::IdentifyAccession: CQ is EMBL patents, CR is EMBL genomes,
- * CS-CU are reserved for future EMBL nucleotide use.
- *
- * Revision 6.65 2003/12/16 16:00:16 ucko
- * CSeq_id::IdentifyAccession: CL is GenBank GSS, CM-CP are GenBank
- * nucleotides to be assigned to specific projects as needed.
- *
- * Revision 6.64 2003/11/10 15:05:42 ucko
- * +CK to eAcc_gb_est
- *
- * Revision 6.63 2003/10/31 20:16:07 ucko
- * CSeq_id::IdentifyAccession: CI and CJ are both DDBJ EST.
- *
- * Revision 6.62 2003/10/24 14:57:03 ucko
- * IdentifyAccession: CH -> eAcc_gb_con.
- * GetSeqIdString: include the database name for IDs of type general.
- *
- * Revision 6.61 2003/08/25 21:15:41 ucko
- * Tweak slightly for efficiency.
- *
- * Revision 6.60 2003/08/22 15:16:48 dondosha
- * Correction in CSeq_id constructor, to allow id strings starting with a gi id
- *
- * Revision 6.59 2003/08/11 14:37:20 ucko
- * IdentifyAccession: "CG" is GenBank GSS.
- *
- * Revision 6.58 2003/07/14 20:11:59 ucko
- * +CF (eAcc_gb_est)
- *
- * Revision 6.57 2003/07/02 13:46:14 ucko
- * +CE (eAcc_gb_gss)
- *
- * Revision 6.56 2003/06/24 16:33:48 ucko
- * CSeq_id::IdentifyAccession: always return unknown for accessions that
- * contain no digits, even if they happen to look like prefixes.
- *
- * Revision 6.55 2003/05/09 14:22:56 ucko
- * CSeq_id::x_Init: treat missing (chain) names as spaces (reported by
- * Michel Dumontier) and get rid of some unnecessary calls to c_str().
- *
- * Revision 6.54 2003/04/30 14:41:01 ucko
- * CSeq_id::IdentifyAccession: CDnnnnnn -> eAcc_gb_est
- *
- * Revision 6.53 2003/03/25 15:37:18 ucko
- * CSeq_id::IdentifyAccession("CC...") -> eAcc_gb_gss
- *
- * Revision 6.52 2003/03/11 15:55:44 kuznets
- * iterate -> ITERATE
- *
- * Revision 6.51 2003/02/06 22:23:29 vasilche
- * Added CSeq_id::Assign(), CSeq_loc::Assign().
- * Added int CSeq_id::Compare() (not safe).
- * Added caching of CSeq_loc::GetTotalRange().
- *
- * Revision 6.50 2003/02/04 15:15:12 grichenk
- * Overrided Assign() for CSeq_loc and CSeq_id
- *
- * Revision 6.49 2003/01/18 08:40:03 kimelman
- * addes seqid constructor for numeric types
- *
- * Revision 6.48 2003/01/15 18:27:13 ucko
- * +AK (accidentally skipped earlier -- sigh)
- *
- * Revision 6.47 2003/01/10 15:57:30 ucko
- * Identify CB as gb_est.
- *
- * Revision 6.46 2003/01/08 16:48:03 ucko
- * +AL (accidentally skipped earlier)
- *
- * Revision 6.45 2003/01/07 19:52:15 ucko
- * Add more refseq types (NR_, NS_, NW_).
- *
- * Revision 6.44 2002/12/30 23:44:42 vakatov
- * CSeq_id::GetStringDescr() -- un-freeze "strstream" to avoid a mem.leak
- *
- * Revision 6.43 2002/12/26 16:39:25 vasilche
- * Object manager class CSeqMap rewritten.
- *
- * Revision 6.42 2002/12/03 15:55:12 dicuccio
- * Corrected processing of text id accessions with no accession set (in
- * GetSeqIdString()) - use name instead.
- *
- * Revision 6.41 2002/11/26 15:13:32 dicuccio
- * Added CSeq_id::GetStringDescr() - provides text representations of seq-ids in a
- * number of formats.
- *
- * Revision 6.40 2002/10/23 18:23:07 ucko
- * Add self-classification (using known type information).
- *
- * Revision 6.39 2002/10/22 20:19:14 jianye
- * Added GetSeqIdString()
- *
- * Revision 6.38 2002/10/18 16:03:08 ucko
- * +CA (eAcc_gb_est)
- *
- * Revision 6.37 2002/10/03 18:55:04 clausen
- * Removed extra whitespace
- *
- * Revision 6.36 2002/10/03 17:17:11 clausen
- * Added BestRank() and WorstRank()
- *
- * Revision 6.35 2002/10/03 16:03:00 ucko
- * +BZ (eAcc_gb_gss)
- *
- * Revision 6.34 2002/09/23 16:43:46 ucko
- * Change check for absence of '|' to use string::find.
- *
- * Revision 6.33 2002/09/20 19:55:29 ucko
- * +BY (eAcc_ddbj_est)
- *
- * Revision 6.32 2002/08/28 14:07:56 ucko
- * IdentifyAccession: handle BX (EMBL genome)
- *
- * Revision 6.31 2002/08/26 20:44:06 ucko
- * CSeq_id::IdentifyAccession: +BW (DDBJ EST)
- *
- * Revision 6.30 2002/08/19 16:54:30 ucko
- * IdentifyAccession: add BV (eAcc_gb_sts).
- *
- * Revision 6.29 2002/08/19 15:42:14 ucko
- * IdentifyAccession: add BU (eAcc_gb_est).
- *
- * Revision 6.28 2002/08/16 19:27:01 ucko
- * Recognize new WGS RefSeq accessions.
- *
- * Revision 6.27 2002/08/14 15:52:27 ucko
- * Add BT and XR_.
- *
- * Revision 6.26 2002/08/06 18:22:19 ucko
- * Properly handle versioned PDB accessions.
- *
- * Revision 6.25 2002/08/01 20:33:10 ucko
- * s_IdentifyAccession -> IdentifyAccession; s_ is only for module-static names.
- *
- * Revision 6.24 2002/07/30 19:42:44 ucko
- * Add s_IdentifyAccession, and use it in the string-based constructor if
- * the input isn't FASTA-format.
- * Move CVS log to end.
- *
- * Revision 6.23 2002/06/06 20:31:33 clausen
- * Moved methods using object manager to objects/util
- *
- * Revision 6.22 2002/05/22 14:03:40 grichenk
- * CSerialUserOp -- added prefix UserOp_ to Assign() and Equals()
- *
- * Revision 6.21 2002/05/06 03:39:12 vakatov
- * OM/OM1 renaming
- *
- * Revision 6.20 2002/05/03 21:28:17 ucko
- * Introduce T(Signed)SeqPos.
- *
- * Revision 6.19 2002/01/16 18:56:32 grichenk
- * Removed CRef<> argument from choice variant setter, updated sources to
- * use references instead of CRef<>s
- *
- * Revision 6.18 2002/01/10 19:00:04 clausen
- * Added GetLength
- *
- * Revision 6.17 2002/01/09 15:59:30 grichenk
- * Fixed includes
- *
- * Revision 6.16 2001/10/15 23:00:00 vakatov
- * CSeq_id::x_Init() -- get rid of unreachable "break;"
- *
- * Revision 6.15 2001/08/31 20:05:44 ucko
- * Fix ICC build.
- *
- * Revision 6.14 2001/08/31 16:02:10 clausen
- * Added new constructors for Fasta and added new id types, tpd, tpe, tpg
- *
- * Revision 6.13 2001/07/16 16:22:48 grichenk
- * Added CSerialUserOp class to create Assign() and Equals() methods for
- * user-defind classes.
- * Added SerialAssign<>() and SerialEquals<>() functions.
- *
- * Revision 6.12 2001/05/24 20:24:27 grichenk
- * Renamed seq/objmgrstub.hpp -> obgmgr/objmgr_base.hpp
- * Added Genbank, Embl and Ddbj support in CSeq_id::Compare()
- * Fixed General output by CSeq_id::WriteAsFasta()
- *
- * Revision 6.11 2001/04/17 04:14:49 vakatov
- * CSeq_id::AsFastaString() --> CSeq_id::WriteAsFasta()
- *
- * Revision 6.10 2001/01/03 16:39:05 vasilche
- * Added CAbstractObjectManager - stub for object manager.
- * CRange extracted to separate file.
- *
- * Revision 6.9 2000/12/26 17:28:55 vasilche
- * Simplified and formatted code.
- *
- * Revision 6.8 2000/12/15 19:30:31 ostell
- * Used Upcase() in AsFastaString() and changed to PNocase().Equals() style
- *
- * Revision 6.7 2000/12/08 22:19:45 ostell
- * changed MakeFastString to AsFastaString and to use ostream instead of string
- *
- * Revision 6.6 2000/12/08 20:45:14 ostell
- * added MakeFastaString()
- *
- * Revision 6.5 2000/12/04 15:09:41 vasilche
- * Added missing include.
- *
- * Revision 6.4 2000/11/30 22:08:18 ostell
- * finished Match()
- *
- * Revision 6.3 2000/11/30 16:13:12 ostell
- * added support for Textseq_id to Seq_id.Match()
- *
- * Revision 6.2 2000/11/28 12:47:41 ostell
- * fixed first switch statement to break properly
- *
- * Revision 6.1 2000/11/21 18:58:29 vasilche
- * Added Match() methods for CSeq_id, CObject_id and CDbtag.
- *
- * ===========================================================================
- */