readfeat.cpp
上传用户:yhdzpy8989
上传日期:2007-06-13
资源大小:13604k
文件大小:66k
源码类别:

生物技术

开发平台:

C/C++

  1. /*
  2.  * ===========================================================================
  3.  * PRODUCTION $Log: readfeat.cpp,v $
  4.  * PRODUCTION Revision 1000.5  2004/06/01 19:46:24  gouriano
  5.  * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.35
  6.  * PRODUCTION
  7.  * ===========================================================================
  8.  */
  9. /*
  10.  * ===========================================================================
  11.  *
  12.  *                            PUBLIC DOMAIN NOTICE
  13.  *               National Center for Biotechnology Information
  14.  *
  15.  *  This software/database is a "United States Government Work" under the
  16.  *  terms of the United States Copyright Act.  It was written as part of
  17.  *  the author's official duties as a United States Government employee and
  18.  *  thus cannot be copyrighted.  This software/database is freely available
  19.  *  to the public for use. The National Library of Medicine and the U.S.
  20.  *  Government have not placed any restriction on its use or reproduction.
  21.  *
  22.  *  Although all reasonable efforts have been taken to ensure the accuracy
  23.  *  and reliability of the software and data, the NLM and the U.S.
  24.  *  Government do not and cannot warrant the performance or results that
  25.  *  may be obtained by using this software or data. The NLM and the U.S.
  26.  *  Government disclaim all warranties, express or implied, including
  27.  *  warranties of performance, merchantability or fitness for any particular
  28.  *  purpose.
  29.  *
  30.  *  Please cite the author in any work or product based on this material.
  31.  *
  32.  * ===========================================================================
  33.  *
  34.  * Author:  Jonathan Kans
  35.  *
  36.  * File Description:
  37.  *   Feature table reader
  38.  *
  39.  */
  40. #include <ncbi_pch.hpp>
  41. #include <corelib/ncbistd.hpp>
  42. #include <corelib/ncbithr.hpp>
  43. #include <serial/iterator.hpp>
  44. #include <serial/objistrasn.hpp>
  45. // Objects includes
  46. #include <objects/general/Int_fuzz.hpp>
  47. #include <objects/general/Object_id.hpp>
  48. #include <objects/general/Dbtag.hpp>
  49. #include <objects/seqloc/Seq_id.hpp>
  50. #include <objects/seqloc/Seq_loc.hpp>
  51. #include <objects/seqloc/Seq_interval.hpp>
  52. #include <objects/seqloc/Seq_point.hpp>
  53. #include <objects/seq/Seq_annot.hpp>
  54. #include <objects/seq/Annotdesc.hpp>
  55. #include <objects/seq/Annot_descr.hpp>
  56. #include <objects/seqfeat/SeqFeatData.hpp>
  57. #include <objects/seqfeat/Seq_feat.hpp>
  58. #include <objects/seqfeat/BioSource.hpp>
  59. #include <objects/seqfeat/Org_ref.hpp>
  60. #include <objects/seqfeat/OrgName.hpp>
  61. #include <objects/seqfeat/SubSource.hpp>
  62. #include <objects/seqfeat/OrgMod.hpp>
  63. #include <objects/seqfeat/Gene_ref.hpp>
  64. #include <objects/seqfeat/Cdregion.hpp>
  65. #include <objects/seqfeat/Code_break.hpp>
  66. #include <objects/seqfeat/Genetic_code.hpp>
  67. #include <objects/seqfeat/Genetic_code_table.hpp>
  68. #include <objects/seqfeat/RNA_ref.hpp>
  69. #include <objects/seqfeat/Trna_ext.hpp>
  70. #include <objects/seqfeat/Imp_feat.hpp>
  71. #include <objects/seqfeat/Gb_qual.hpp>
  72. #include <objtools/readers/readfeat.hpp>
  73. #include <algorithm>
  74. BEGIN_NCBI_SCOPE
  75. BEGIN_objects_SCOPE // namespace ncbi::objects::
  76. class /* NCBI_XOBJREAD_EXPORT */ CFeature_table_reader_imp
  77. {
  78. public:
  79.     enum EQual {
  80.         eQual_allele,
  81.         eQual_anticodon,
  82.         eQual_bond_type,
  83.         eQual_bound_moiety,
  84.         eQual_citation,
  85.         eQual_clone,
  86.         eQual_codon_start,
  87.         eQual_cons_splice,
  88.         eQual_db_xref,
  89.         eQual_direction,
  90.         eQual_EC_number,
  91.         eQual_evidence,
  92.         eQual_exception,
  93.         eQual_frequency,
  94.         eQual_function,
  95.         eQual_gene,
  96.         eQual_gene_desc,
  97.         eQual_gene_syn,
  98.         eQual_go_component,
  99.         eQual_go_function,
  100.         eQual_go_process,
  101.         eQual_insertion_seq,
  102.         eQual_label,
  103.         eQual_locus_tag,
  104.         eQual_macronuclear,
  105.         eQual_map,
  106.         eQual_MEDLINE,
  107.         eQual_mod_base,
  108.         eQual_muid,
  109.         eQual_note,
  110.         eQual_number,
  111.         eQual_operon,
  112.         eQual_organism,
  113.         eQual_partial,
  114.         eQual_PCR_conditions,
  115.         eQual_phenotype,
  116.         eQual_pmid,
  117.         eQual_product,
  118.         eQual_prot_desc,
  119.         eQual_prot_note,
  120.         eQual_protein_id,
  121.         eQual_pseudo,
  122.         eQual_PubMed,
  123.         eQual_region_name,
  124.         eQual_replace,
  125.         eQual_rpt_family,
  126.         eQual_rpt_type,
  127.         eQual_rpt_unit,
  128.         eQual_site_type,
  129.         eQual_standard_name,
  130.         eQual_transcript_id,
  131.         eQual_transl_except,
  132.         eQual_transl_table,
  133.         eQual_translation,
  134.         eQual_transposon,
  135.         eQual_usedin
  136.     };
  137.     enum EOrgRef {
  138.         eOrgRef_organism,
  139.         eOrgRef_organelle,
  140.         eOrgRef_div,
  141.         eOrgRef_lineage,
  142.         eOrgRef_gcode,
  143.         eOrgRef_mgcode
  144.     };
  145.     typedef map< string, CSeqFeatData::ESubtype > TFeatReaderMap;
  146.     typedef map< string, EQual > TQualReaderMap;
  147.     typedef map< string, EOrgRef > TOrgRefReaderMap;
  148.     typedef map< string, CBioSource::EGenome > TGenomeReaderMap;
  149.     typedef map< string, CSubSource::ESubtype > TSubSrcReaderMap;
  150.     typedef map< string, COrgMod::ESubtype > TOrgModReaderMap;
  151.     typedef map< string, CSeqFeatData::EBond > TBondReaderMap;
  152.     typedef map< string, CSeqFeatData::ESite > TSiteReaderMap;
  153.     typedef map< string, int > TTrnaReaderMap;
  154.     typedef vector< string > TSingleQualList;
  155.     // constructor
  156.     CFeature_table_reader_imp(void);
  157.     // destructor
  158.     ~CFeature_table_reader_imp(void);
  159.     // read 5-column feature table and return Seq-annot
  160.     CRef<CSeq_annot> ReadSequinFeatureTable (CNcbiIstream& ifs,
  161.                                              const string& seqid,
  162.                                              const string& annotname,
  163.                                              const CFeature_table_reader::TFlags flags);
  164.     // create single feature from key
  165.     CRef<CSeq_feat> CreateSeqFeat (const string& feat,
  166.                                    CSeq_loc& location,
  167.                                    const CFeature_table_reader::TFlags flags);
  168.     // add single qualifier to feature
  169.     void AddFeatQual (CRef<CSeq_feat> sfp,
  170.                       const string& qual,
  171.                       const string& val,
  172.                       const CFeature_table_reader::TFlags flags);
  173. private:
  174.     // Prohibit copy constructor and assignment operator
  175.     CFeature_table_reader_imp(const CFeature_table_reader_imp& value);
  176.     CFeature_table_reader_imp& operator=(const CFeature_table_reader_imp& value);
  177.     bool x_ParseFeatureTableLine (const string& line, Int4* startP, Int4* stopP,
  178.                                   bool* partial5P, bool* partial3P, bool* ispointP,
  179.                                   string& featP, string& qualP, string& valP, Int4 offset);
  180.     bool x_AddIntervalToFeature (CRef<CSeq_feat> sfp, CSeq_loc_mix *mix,
  181.                                  const string& seqid, Int4 start,
  182.                                  Int4 stop, bool partial5, bool partial3);
  183.     bool x_AddQualifierToFeature (CRef<CSeq_feat> sfp,
  184.                                   const string& qual, const string& val);
  185.     bool x_AddQualifierToGene     (CSeqFeatData& sfdata,
  186.                                    EQual qtype, const string& val);
  187.     bool x_AddQualifierToCdregion (CRef<CSeq_feat> sfp, CSeqFeatData& sfdata,
  188.                                    EQual qtype, const string& val);
  189.     bool x_AddQualifierToRna      (CSeqFeatData& sfdata,
  190.                                    EQual qtype, const string& val);
  191.     bool x_AddQualifierToImp      (CRef<CSeq_feat> sfp, CSeqFeatData& sfdata,
  192.                                    EQual qtype, const string& qual, const string& val);
  193.     bool x_AddQualifierToBioSrc   (CSeqFeatData& sfdata,
  194.                                    EOrgRef rtype, const string& val);
  195.     bool x_AddQualifierToBioSrc   (CSeqFeatData& sfdata,
  196.                                    CSubSource::ESubtype stype, const string& val);
  197.     bool x_AddQualifierToBioSrc   (CSeqFeatData& sfdata,
  198.                                    COrgMod::ESubtype mtype, const string& val);
  199.     int x_ParseTrnaString (const string& val);
  200.     TFeatReaderMap    m_FeatKeys;
  201.     TQualReaderMap    m_QualKeys;
  202.     TOrgRefReaderMap  m_OrgRefKeys;
  203.     TGenomeReaderMap  m_GenomeKeys;
  204.     TSubSrcReaderMap  m_SubSrcKeys;
  205.     TOrgModReaderMap  m_OrgModKeys;
  206.     TBondReaderMap    m_BondKeys;
  207.     TSiteReaderMap    m_SiteKeys;
  208.     TTrnaReaderMap    m_TrnaKeys;
  209.     TSingleQualList   m_SingleKeys;
  210. };
  211. auto_ptr<CFeature_table_reader_imp> CFeature_table_reader::sm_Implementation;
  212. void CFeature_table_reader::x_InitImplementation()
  213. {
  214.     DEFINE_STATIC_FAST_MUTEX(s_Implementation_mutex);
  215.     CFastMutexGuard   LOCK(s_Implementation_mutex);
  216.     if ( !sm_Implementation.get() ) {
  217.         sm_Implementation.reset(new CFeature_table_reader_imp());
  218.     }
  219. }
  220. typedef struct featinit {
  221.     const char *           key;
  222.     CSeqFeatData::ESubtype subtype;
  223. } FeatInit;
  224. static FeatInit feat_key_to_subtype [] = {
  225.     { "-10_signal",         CSeqFeatData::eSubtype_10_signal          },
  226.     { "-35_signal",         CSeqFeatData::eSubtype_35_signal          },
  227.     { "3'clip",             CSeqFeatData::eSubtype_3clip              },
  228.     { "3'UTR",              CSeqFeatData::eSubtype_3UTR               },
  229.     { "5'clip",             CSeqFeatData::eSubtype_5clip              },
  230.     { "5'UTR",              CSeqFeatData::eSubtype_5UTR               },
  231.     { "attenuator",         CSeqFeatData::eSubtype_attenuator         },
  232.     { "Bond",               CSeqFeatData::eSubtype_bond               },
  233.     { "CAAT_signal",        CSeqFeatData::eSubtype_CAAT_signal        },
  234.     { "CDS",                CSeqFeatData::eSubtype_cdregion           },
  235.     { "Cit",                CSeqFeatData::eSubtype_pub                },
  236.     { "Comment",            CSeqFeatData::eSubtype_comment            },
  237.     { "conflict",           CSeqFeatData::eSubtype_conflict           },
  238.     { "C_region",           CSeqFeatData::eSubtype_C_region           },
  239.     { "D-loop",             CSeqFeatData::eSubtype_D_loop             },
  240.     { "D_segment",          CSeqFeatData::eSubtype_D_segment          },
  241.     { "enhancer",           CSeqFeatData::eSubtype_enhancer           },
  242.     { "exon",               CSeqFeatData::eSubtype_exon               },
  243.     { "GC_signal",          CSeqFeatData::eSubtype_GC_signal          },
  244.     { "gene",               CSeqFeatData::eSubtype_gene               },
  245.     { "Het",                CSeqFeatData::eSubtype_het                },
  246.     { "iDNA",               CSeqFeatData::eSubtype_iDNA               },
  247.     { "intron",             CSeqFeatData::eSubtype_intron             },
  248.     { "J_segment",          CSeqFeatData::eSubtype_J_segment          },
  249.     { "LTR",                CSeqFeatData::eSubtype_LTR                },
  250.     { "mat_peptide",        CSeqFeatData::eSubtype_mat_peptide_aa     },
  251.     { "mat_peptide_nt",     CSeqFeatData::eSubtype_mat_peptide        },
  252.     { "misc_binding",       CSeqFeatData::eSubtype_misc_binding       },
  253.     { "misc_difference",    CSeqFeatData::eSubtype_misc_difference    },
  254.     { "misc_feature",       CSeqFeatData::eSubtype_misc_feature       },
  255.     { "misc_recomb",        CSeqFeatData::eSubtype_misc_recomb        },
  256.     { "misc_RNA",           CSeqFeatData::eSubtype_otherRNA           },
  257.     { "misc_signal",        CSeqFeatData::eSubtype_misc_signal        },
  258.     { "misc_structure",     CSeqFeatData::eSubtype_misc_structure     },
  259.     { "modified_base",      CSeqFeatData::eSubtype_modified_base      },
  260.     { "mRNA",               CSeqFeatData::eSubtype_mRNA               },
  261.     { "NonStdRes",          CSeqFeatData::eSubtype_non_std_residue    },
  262.     { "Num",                CSeqFeatData::eSubtype_num                },
  263.     { "N_region",           CSeqFeatData::eSubtype_N_region           },
  264.     { "old_sequence",       CSeqFeatData::eSubtype_old_sequence       },
  265.     { "operon",             CSeqFeatData::eSubtype_operon             },
  266.     { "oriT",               CSeqFeatData::eSubtype_oriT               },
  267.     { "polyA_signal",       CSeqFeatData::eSubtype_polyA_signal       },
  268.     { "polyA_site",         CSeqFeatData::eSubtype_polyA_site         },
  269.     { "precursor_RNA",      CSeqFeatData::eSubtype_preRNA             },
  270.     { "pre_RNA",            CSeqFeatData::eSubtype_preRNA             },
  271.     { "preprotein",         CSeqFeatData::eSubtype_preprotein         },
  272.     { "primer_bind",        CSeqFeatData::eSubtype_primer_bind        },
  273.     { "prim_transcript",    CSeqFeatData::eSubtype_prim_transcript    },
  274.     { "promoter",           CSeqFeatData::eSubtype_promoter           },
  275.     { "Protein",            CSeqFeatData::eSubtype_prot               },
  276.     { "protein_bind",       CSeqFeatData::eSubtype_protein_bind       },
  277.     { "RBS",                CSeqFeatData::eSubtype_RBS                },
  278.     { "REFERENCE",          CSeqFeatData::eSubtype_pub                },
  279.     { "Region",             CSeqFeatData::eSubtype_region             },
  280.     { "repeat_region",      CSeqFeatData::eSubtype_repeat_region      },
  281.     { "repeat_unit",        CSeqFeatData::eSubtype_repeat_unit        },
  282.     { "rep_origin",         CSeqFeatData::eSubtype_rep_origin         },
  283.     { "rRNA",               CSeqFeatData::eSubtype_rRNA               },
  284.     { "Rsite",              CSeqFeatData::eSubtype_rsite              },
  285.     { "satellite",          CSeqFeatData::eSubtype_satellite          },
  286.     { "scRNA",              CSeqFeatData::eSubtype_scRNA              },
  287.     { "SecStr",             CSeqFeatData::eSubtype_psec_str           },
  288.     { "sig_peptide",        CSeqFeatData::eSubtype_sig_peptide_aa     },
  289.     { "sig_peptide_nt",     CSeqFeatData::eSubtype_sig_peptide        },
  290.     { "Site",               CSeqFeatData::eSubtype_site               },
  291.     { "Site-ref",           CSeqFeatData::eSubtype_site_ref           },
  292.     { "snoRNA",             CSeqFeatData::eSubtype_snoRNA             },
  293.     { "snRNA",              CSeqFeatData::eSubtype_snRNA              },
  294.     { "source",             CSeqFeatData::eSubtype_biosrc             },
  295.     { "Src",                CSeqFeatData::eSubtype_biosrc             },
  296.     { "stem_loop",          CSeqFeatData::eSubtype_stem_loop          },
  297.     { "STS",                CSeqFeatData::eSubtype_STS                },
  298.     { "S_region",           CSeqFeatData::eSubtype_S_region           },
  299.     { "TATA_signal",        CSeqFeatData::eSubtype_TATA_signal        },
  300.     { "terminator",         CSeqFeatData::eSubtype_terminator         },
  301.     { "transit_peptide",    CSeqFeatData::eSubtype_transit_peptide_aa },
  302.     { "transit_peptide_nt", CSeqFeatData::eSubtype_transit_peptide    },
  303.     { "tRNA",               CSeqFeatData::eSubtype_tRNA               },
  304.     { "TxInit",             CSeqFeatData::eSubtype_txinit             },
  305.     { "unsure",             CSeqFeatData::eSubtype_unsure             },
  306.     { "User",               CSeqFeatData::eSubtype_user               },
  307.     { "variation",          CSeqFeatData::eSubtype_variation          },
  308.     { "virion",             CSeqFeatData::eSubtype_virion             },
  309.     { "V_region",           CSeqFeatData::eSubtype_V_region           },
  310.     { "V_segment",          CSeqFeatData::eSubtype_V_segment          },
  311.     { "Xref",               CSeqFeatData::eSubtype_seq                }
  312. };
  313. typedef struct qualinit {
  314.     const char *                         key;
  315.     CFeature_table_reader_imp::EQual subtype;
  316. } QualInit;
  317. static QualInit qual_key_to_subtype [] = {
  318.     { "allele",               CFeature_table_reader_imp::eQual_allele               },
  319.     { "anticodon",            CFeature_table_reader_imp::eQual_anticodon            },
  320.     { "bond_type",            CFeature_table_reader_imp::eQual_bond_type            },
  321.     { "bound_moiety",         CFeature_table_reader_imp::eQual_bound_moiety         },
  322.     { "citation",             CFeature_table_reader_imp::eQual_citation             },
  323.     { "clone",                CFeature_table_reader_imp::eQual_clone                },
  324.     { "codon_start",          CFeature_table_reader_imp::eQual_codon_start          },
  325.     { "cons_splice",          CFeature_table_reader_imp::eQual_cons_splice          },
  326.     { "db_xref",              CFeature_table_reader_imp::eQual_db_xref              },
  327.     { "direction",            CFeature_table_reader_imp::eQual_direction            },
  328.     { "EC_number",            CFeature_table_reader_imp::eQual_EC_number            },
  329.     { "evidence",             CFeature_table_reader_imp::eQual_evidence             },
  330.     { "exception",            CFeature_table_reader_imp::eQual_exception            },
  331.     { "frequency",            CFeature_table_reader_imp::eQual_frequency            },
  332.     { "function",             CFeature_table_reader_imp::eQual_function             },
  333.     { "gene",                 CFeature_table_reader_imp::eQual_gene                 },
  334.     { "gene_desc",            CFeature_table_reader_imp::eQual_gene_desc            },
  335.     { "gene_syn",             CFeature_table_reader_imp::eQual_gene_syn             },
  336.     { "go_component",         CFeature_table_reader_imp::eQual_go_component         },
  337.     { "go_function",          CFeature_table_reader_imp::eQual_go_function          },
  338.     { "go_process",           CFeature_table_reader_imp::eQual_go_process           },
  339.     { "insertion_seq",        CFeature_table_reader_imp::eQual_insertion_seq        },
  340.     { "label",                CFeature_table_reader_imp::eQual_label                },
  341.     { "locus_tag",            CFeature_table_reader_imp::eQual_locus_tag            },
  342.     { "macronuclear",         CFeature_table_reader_imp::eQual_macronuclear         },
  343.     { "map",                  CFeature_table_reader_imp::eQual_map                  },
  344.     { "mod_base",             CFeature_table_reader_imp::eQual_mod_base             },
  345.     { "note",                 CFeature_table_reader_imp::eQual_note                 },
  346.     { "number",               CFeature_table_reader_imp::eQual_number               },
  347.     { "operon",               CFeature_table_reader_imp::eQual_operon               },
  348.     { "organism",             CFeature_table_reader_imp::eQual_organism             },
  349.     { "partial",              CFeature_table_reader_imp::eQual_partial              },
  350.     { "PCR_conditions",       CFeature_table_reader_imp::eQual_PCR_conditions       },
  351.     { "phenotype",            CFeature_table_reader_imp::eQual_phenotype            },
  352.     { "product",              CFeature_table_reader_imp::eQual_product              },
  353.     { "protein_id",           CFeature_table_reader_imp::eQual_protein_id           },
  354.     { "prot_desc",            CFeature_table_reader_imp::eQual_prot_desc            },
  355.     { "pseudo",               CFeature_table_reader_imp::eQual_pseudo               },
  356.     { "replace",              CFeature_table_reader_imp::eQual_replace              },
  357.     { "rpt_family",           CFeature_table_reader_imp::eQual_rpt_family           },
  358.     { "rpt_type",             CFeature_table_reader_imp::eQual_rpt_type             },
  359.     { "rpt_unit",             CFeature_table_reader_imp::eQual_rpt_unit             },
  360.     { "standard_name",        CFeature_table_reader_imp::eQual_standard_name        },
  361.     { "transcript_id",        CFeature_table_reader_imp::eQual_transcript_id        },
  362.     { "transl_except",        CFeature_table_reader_imp::eQual_transl_except        },
  363.     { "transl_table",         CFeature_table_reader_imp::eQual_transl_table         },
  364.     { "translation",          CFeature_table_reader_imp::eQual_translation          },
  365.     { "transposon",           CFeature_table_reader_imp::eQual_transposon           },
  366.     { "usedin",               CFeature_table_reader_imp::eQual_usedin               }
  367. };
  368. typedef struct orgrefinit {
  369.     const char *                       key;
  370.     CFeature_table_reader_imp::EOrgRef subtype;
  371. } OrgRefInit;
  372. static OrgRefInit orgref_key_to_subtype [] = {
  373.     { "div",        CFeature_table_reader_imp::eOrgRef_div       },
  374.     { "gcode",      CFeature_table_reader_imp::eOrgRef_gcode     },
  375.     { "lineage",    CFeature_table_reader_imp::eOrgRef_lineage   },
  376.     { "mgcode",     CFeature_table_reader_imp::eOrgRef_mgcode    },
  377.     { "organelle",  CFeature_table_reader_imp::eOrgRef_organelle },
  378.     { "organism",   CFeature_table_reader_imp::eOrgRef_organism  }
  379. };
  380. typedef struct genomeinit {
  381.     const char *        key;
  382.     CBioSource::EGenome subtype;
  383. } GenomeInit;
  384. static GenomeInit genome_key_to_subtype [] = {
  385.     { "unknown",                   CBioSource::eGenome_unknown          },
  386.     { "genomic",                   CBioSource::eGenome_genomic          },
  387.     { "chloroplast",               CBioSource::eGenome_chloroplast      },
  388.     { "plastid:chloroplast",       CBioSource::eGenome_chloroplast      },
  389.     { "chromoplast",               CBioSource::eGenome_chromoplast      },
  390.     { "plastid:chromoplast",       CBioSource::eGenome_chromoplast      },
  391.     { "kinetoplast",               CBioSource::eGenome_kinetoplast      },
  392.     { "mitochondrion:kinetoplast", CBioSource::eGenome_kinetoplast      },
  393.     { "mitochondrion",             CBioSource::eGenome_mitochondrion    },
  394.     { "plastid",                   CBioSource::eGenome_plastid          },
  395.     { "macronuclear",              CBioSource::eGenome_macronuclear     },
  396.     { "extrachrom",                CBioSource::eGenome_extrachrom       },
  397.     { "plasmid",                   CBioSource::eGenome_plasmid          },
  398.     { "transposon",                CBioSource::eGenome_transposon       },
  399.     { "insertion_seq",             CBioSource::eGenome_insertion_seq    },
  400.     { "cyanelle",                  CBioSource::eGenome_cyanelle         },
  401.     { "plastid:cyanelle",          CBioSource::eGenome_cyanelle         },
  402.     { "proviral",                  CBioSource::eGenome_proviral         },
  403.     { "virion",                    CBioSource::eGenome_virion           },
  404.     { "nucleomorph",               CBioSource::eGenome_nucleomorph      },
  405.     { "apicoplast",                CBioSource::eGenome_apicoplast       },
  406.     { "plastid:apicoplast",        CBioSource::eGenome_apicoplast       },
  407.     { "plastid:leucoplast",        CBioSource::eGenome_leucoplast       },
  408.     { "plastid:proplastid",        CBioSource::eGenome_proplastid       },
  409.     { "endogenous_virus",          CBioSource::eGenome_endogenous_virus }
  410. };
  411. typedef struct subsrcinit {
  412.     const char *         key;
  413.     CSubSource::ESubtype subtype;
  414. } SubSrcInit;
  415. static SubSrcInit subsrc_key_to_subtype [] = {
  416.     { "cell_line",            CSubSource::eSubtype_cell_line             },
  417.     { "cell_type",            CSubSource::eSubtype_cell_type             },
  418.     { "chromosome",           CSubSource::eSubtype_chromosome            },
  419.     { "clone",                CSubSource::eSubtype_clone                 },
  420.     { "clone_lib",            CSubSource::eSubtype_clone_lib             },
  421.     { "country",              CSubSource::eSubtype_country               },
  422.     { "dev_stage",            CSubSource::eSubtype_dev_stage             },
  423.     { "endogenous_virus",     CSubSource::eSubtype_endogenous_virus_name },
  424.     { "environmental_sample", CSubSource::eSubtype_environmental_sample  },
  425.     { "frequency",            CSubSource::eSubtype_frequency             },
  426.     { "genotype",             CSubSource::eSubtype_genotype              },
  427.     { "germline",             CSubSource::eSubtype_germline              },
  428.     { "haplotype",            CSubSource::eSubtype_haplotype             },
  429.     { "insertion_seq",        CSubSource::eSubtype_insertion_seq_name    },
  430.     { "isolation_source",     CSubSource::eSubtype_isolation_source      },
  431.     { "lab_host",             CSubSource::eSubtype_lab_host              },
  432.     { "map",                  CSubSource::eSubtype_map                   },
  433.     { "plasmid",              CSubSource::eSubtype_plasmid_name          },
  434.     { "plastid",              CSubSource::eSubtype_plastid_name          },
  435.     { "pop_variant",          CSubSource::eSubtype_pop_variant           },
  436.     { "rearranged",           CSubSource::eSubtype_rearranged            },
  437.     { "segment",              CSubSource::eSubtype_segment               },
  438.     { "sex",                  CSubSource::eSubtype_sex                   },
  439.     { "subclone",             CSubSource::eSubtype_subclone              },
  440.     { "tissue_lib ",          CSubSource::eSubtype_tissue_lib            },
  441.     { "tissue_type",          CSubSource::eSubtype_tissue_type           },
  442.     { "transgenic",           CSubSource::eSubtype_transgenic            },
  443.     { "transposon",           CSubSource::eSubtype_transposon_name       }
  444. };
  445. typedef struct orgmodinit {
  446.     const char *      key;
  447.     COrgMod::ESubtype subtype;
  448. } OrgModInit;
  449. static OrgModInit orgmod_key_to_subtype [] = {
  450.     { "acronym",          COrgMod::eSubtype_acronym          },
  451.     { "anamorph",         COrgMod::eSubtype_anamorph         },
  452.     { "authority",        COrgMod::eSubtype_authority        },
  453.     { "biotype",          COrgMod::eSubtype_biotype          },
  454.     { "biovar",           COrgMod::eSubtype_biovar           },
  455.     { "breed",            COrgMod::eSubtype_breed            },
  456.     { "chemovar",         COrgMod::eSubtype_chemovar         },
  457.     { "common",           COrgMod::eSubtype_common           },
  458.     { "cultivar",         COrgMod::eSubtype_cultivar         },
  459.     { "dosage",           COrgMod::eSubtype_dosage           },
  460.     { "ecotype",          COrgMod::eSubtype_ecotype          },
  461.     { "forma_specialis",  COrgMod::eSubtype_forma_specialis  },
  462.     { "forma",            COrgMod::eSubtype_forma            },
  463.     { "gb_acronym",       COrgMod::eSubtype_gb_acronym       },
  464.     { "gb_anamorph",      COrgMod::eSubtype_gb_anamorph      },
  465.     { "gb_synonym",       COrgMod::eSubtype_gb_synonym       },
  466.     { "group",            COrgMod::eSubtype_group            },
  467.     { "isolate",          COrgMod::eSubtype_isolate          },
  468.     { "nat_host",         COrgMod::eSubtype_nat_host         },
  469.     { "pathovar",         COrgMod::eSubtype_pathovar         },
  470.     { "serogroup",        COrgMod::eSubtype_serogroup        },
  471.     { "serotype",         COrgMod::eSubtype_serotype         },
  472.     { "serovar",          COrgMod::eSubtype_serovar          },
  473.     { "specimen_voucher", COrgMod::eSubtype_specimen_voucher },
  474.     { "strain",           COrgMod::eSubtype_strain           },
  475.     { "sub_species",      COrgMod::eSubtype_sub_species      },
  476.     { "subgroup",         COrgMod::eSubtype_subgroup         },
  477.     { "substrain",        COrgMod::eSubtype_substrain        },
  478.     { "subtype",          COrgMod::eSubtype_subtype          },
  479.     { "synonym",          COrgMod::eSubtype_synonym          },
  480.     { "teleomorph",       COrgMod::eSubtype_teleomorph       },
  481.     { "type",             COrgMod::eSubtype_type             },
  482.     { "variety",          COrgMod::eSubtype_variety          }
  483. };
  484. typedef struct bondinit {
  485.     const char *        key;
  486.     CSeqFeatData::EBond subtype;
  487. } BondInit;
  488. static BondInit bond_key_to_subtype [] = {
  489.     { "disulfide",         CSeqFeatData::eBond_disulfide  },
  490.     { "thiolester",        CSeqFeatData::eBond_thiolester },
  491.     { "xlink",             CSeqFeatData::eBond_xlink      },
  492.     { "thioether",         CSeqFeatData::eBond_thioether  },
  493.     { "",                  CSeqFeatData::eBond_other      }
  494. };
  495. typedef struct siteinit {
  496.     const char *        key;
  497.     CSeqFeatData::ESite subtype;
  498. } SiteInit;
  499. static SiteInit site_key_to_subtype [] = {
  500.     { "active",                      CSeqFeatData::eSite_active                      },
  501.     { "binding",                     CSeqFeatData::eSite_binding                     },
  502.     { "cleavage",                    CSeqFeatData::eSite_cleavage                    },
  503.     { "inhibit",                     CSeqFeatData::eSite_inhibit                     },
  504.     { "modified",                    CSeqFeatData::eSite_modified                    },
  505.     { "glycosylation",               CSeqFeatData::eSite_glycosylation               },
  506.     { "myristoylation",              CSeqFeatData::eSite_myristoylation              },
  507.     { "mutagenized",                 CSeqFeatData::eSite_mutagenized                 },
  508.     { "metal binding",               CSeqFeatData::eSite_metal_binding               },
  509.     { "phosphorylation",             CSeqFeatData::eSite_phosphorylation             },
  510.     { "acetylation",                 CSeqFeatData::eSite_acetylation                 },
  511.     { "amidation",                   CSeqFeatData::eSite_amidation                   },
  512.     { "methylation",                 CSeqFeatData::eSite_methylation                 },
  513.     { "hydroxylation",               CSeqFeatData::eSite_hydroxylation               },
  514.     { "sulfatation",                 CSeqFeatData::eSite_sulfatation                 },
  515.     { "oxidative deamination",       CSeqFeatData::eSite_oxidative_deamination       },
  516.     { "pyrrolidone carboxylic acid", CSeqFeatData::eSite_pyrrolidone_carboxylic_acid },
  517.     { "gamma carboxyglutamic acid",  CSeqFeatData::eSite_gamma_carboxyglutamic_acid  },
  518.     { "blocked",                     CSeqFeatData::eSite_blocked                     },
  519.     { "lipid binding",               CSeqFeatData::eSite_lipid_binding               },
  520.     { "np binding",                  CSeqFeatData::eSite_np_binding                  },
  521.     { "DNA binding",                 CSeqFeatData::eSite_dna_binding                 },
  522.     { "signal peptide",              CSeqFeatData::eSite_signal_peptide              },
  523.     { "transit peptide",             CSeqFeatData::eSite_transit_peptide             },
  524.     { "transmembrane region",        CSeqFeatData::eSite_transmembrane_region        },
  525.     { "",                            CSeqFeatData::eSite_other                       }
  526. };
  527. typedef struct trnainit {
  528.     const char * key;
  529.     int          subtype;
  530. } TrnaInit;
  531. static TrnaInit trna_key_to_subtype [] = {
  532.     { "Ala",   'A' },
  533.     { "Asx",   'B' },
  534.     { "Cys",   'C' },
  535.     { "Asp",   'D' },
  536.     { "Glu",   'E' },
  537.     { "Phe",   'F' },
  538.     { "Gly",   'G' },
  539.     { "His",   'H' },
  540.     { "Ile",   'I' },
  541.     { "Lys",   'K' },
  542.     { "Leu",   'L' },
  543.     { "Met",   'M' },
  544.     { "fMet",  'M' },
  545.     { "Asn",   'N' },
  546.     { "Pro",   'P' },
  547.     { "Gln",   'Q' },
  548.     { "Arg",   'R' },
  549.     { "Ser",   'S' },
  550.     { "Thr",   'T' },
  551.     { "Val",   'V' },
  552.     { "Trp",   'W' },
  553.     { "Xxx",   'X' },
  554.     { "OTHER", 'X' },
  555.     { "Tyr",   'Y' },
  556.     { "Glx",   'Z' },
  557.     { "Sec",   'U' },
  558.     { "Ter",   '*' },
  559.     { "TERM",  '*' }
  560. };
  561. typedef struct singleinit {
  562.     const char * key;
  563. } SingleInit;
  564. static SingleInit single_key_list [] = {
  565.     { "pseudo"               },
  566.     { "germline"             },
  567.     { "rearranged"           },
  568.     { "transgenic"           },
  569.     { "environmental_sample" }
  570. };
  571. // constructor
  572. CFeature_table_reader_imp::CFeature_table_reader_imp(void)
  573. {
  574.     // initialize common CFeature_table_reader_imp tables
  575.     for (int i = 0; i < sizeof (feat_key_to_subtype) / sizeof (FeatInit); i++) {
  576.         string str = string (feat_key_to_subtype [i].key);
  577.         m_FeatKeys [string (feat_key_to_subtype [i].key)] = feat_key_to_subtype [i].subtype;
  578.     }
  579.     for (int i = 0; i < sizeof (qual_key_to_subtype) / sizeof (QualInit); i++) {
  580.         string str = string (qual_key_to_subtype [i].key);
  581.         m_QualKeys [string (qual_key_to_subtype [i].key)] = qual_key_to_subtype [i].subtype;
  582.     }
  583.     for (int i = 0; i < sizeof (orgref_key_to_subtype) / sizeof (OrgRefInit); i++) {
  584.         string str = string (orgref_key_to_subtype [i].key);
  585.         m_OrgRefKeys [string (orgref_key_to_subtype [i].key)] = orgref_key_to_subtype [i].subtype;
  586.     }
  587.     for (int i = 0; i < sizeof (genome_key_to_subtype) / sizeof (GenomeInit); i++) {
  588.         string str = string (genome_key_to_subtype [i].key);
  589.         m_GenomeKeys [string (genome_key_to_subtype [i].key)] = genome_key_to_subtype [i].subtype;
  590.     }
  591.     for (int i = 0; i < sizeof (subsrc_key_to_subtype) / sizeof (SubSrcInit); i++) {
  592.         string str = string (subsrc_key_to_subtype [i].key);
  593.         m_SubSrcKeys [string (subsrc_key_to_subtype [i].key)] = subsrc_key_to_subtype [i].subtype;
  594.     }
  595.     for (int i = 0; i < sizeof (orgmod_key_to_subtype) / sizeof (OrgModInit); i++) {
  596.         string str = string (orgmod_key_to_subtype [i].key);
  597.         m_OrgModKeys [string (orgmod_key_to_subtype [i].key)] = orgmod_key_to_subtype [i].subtype;
  598.     }
  599.     for (int i = 0; i < sizeof (bond_key_to_subtype) / sizeof (BondInit); i++) {
  600.         string str = string (bond_key_to_subtype [i].key);
  601.         m_BondKeys [string (bond_key_to_subtype [i].key)] = bond_key_to_subtype [i].subtype;
  602.     }
  603.     for (int i = 0; i < sizeof (site_key_to_subtype) / sizeof (SiteInit); i++) {
  604.         string str = string (site_key_to_subtype [i].key);
  605.         m_SiteKeys [string (site_key_to_subtype [i].key)] = site_key_to_subtype [i].subtype;
  606.     }
  607.     for (int i = 0; i < sizeof (trna_key_to_subtype) / sizeof (TrnaInit); i++) {
  608.         string str = string (trna_key_to_subtype [i].key);
  609.         m_TrnaKeys [string (trna_key_to_subtype [i].key)] = trna_key_to_subtype [i].subtype;
  610.     }
  611.     for (int i = 0; i < sizeof (single_key_list) / sizeof (SingleInit); i++) {
  612.         string str = string (single_key_list [i].key);
  613.         m_SingleKeys.push_back (str);
  614.     }
  615. }
  616. // destructor
  617. CFeature_table_reader_imp::~CFeature_table_reader_imp(void)
  618. {
  619. }
  620. bool CFeature_table_reader_imp::x_ParseFeatureTableLine (const string& line, Int4* startP, Int4* stopP,
  621.                                                          bool* partial5P, bool* partial3P, bool* ispointP,
  622.                                                          string& featP, string& qualP, string& valP, Int4 offset)
  623. {
  624.     SIZE_TYPE      numtkns;
  625.     bool           badNumber = false;
  626.     bool           ispoint = false;
  627.     size_t         len;
  628.     bool           partial5 = false;
  629.     bool           partial3 = false;
  630.     Int4           startv = -1;
  631.     Int4           stopv = -1;
  632.     Int4           swp;
  633.     string         start, stop, feat, qual, val, stnd;
  634.     vector<string> tkns;
  635.     if (line.empty ()) return false;
  636.     tkns.clear ();
  637.     NStr::Tokenize (line, "t", tkns);
  638.     numtkns = tkns.size ();
  639.     if (numtkns > 0) {
  640.         start = tkns [0];
  641.     }
  642.     if (numtkns > 1) {
  643.         stop = tkns [1];
  644.     }
  645.     if (numtkns > 2) {
  646.         feat = tkns [2];
  647.     }
  648.     if (numtkns > 3) {
  649.         qual = tkns [3];
  650.     }
  651.     if (numtkns > 4) {
  652.         val = tkns [4];
  653.     }
  654.     if (numtkns > 5) {
  655.         stnd = tkns [5];
  656.     }
  657.     if (! start.empty ()) {
  658.         if (start [0] == '<') {
  659.             partial5 = true;
  660.             start.erase (0, 1);
  661.         }
  662.         len = start.length ();
  663.         if (len > 1 && start [len - 1] == '^') {
  664.           ispoint = true;
  665.           start [len - 1] = '';
  666.         }
  667.         try {
  668.             startv = NStr::StringToLong (start);
  669.         } catch (...) {
  670.             badNumber = true;
  671.         }
  672.     }
  673.     if (! stop.empty ()) {
  674.         if (stop [0] == '>') {
  675.             partial3 = true;
  676.             stop.erase (0, 1);
  677.         }
  678.         try {
  679.             stopv = NStr::StringToLong (stop);
  680.         } catch (CStringException) {
  681.             badNumber = true;
  682.         }
  683.     }
  684.     if (badNumber) {
  685.         startv = -1;
  686.         stopv = -1;
  687.     } else {
  688.         startv--;
  689.         stopv--;
  690.         if (! stnd.empty ()) {
  691.             if (stnd == "minus" || stnd == "-" || stnd == "complement") {
  692.                 if (start < stop) {
  693.                     swp = startv;
  694.                     startv = stopv;
  695.                     stopv = swp;
  696.                 }
  697.             }
  698.         }
  699.     }
  700.     *startP = startv + offset;
  701.     *stopP = stopv + offset;
  702.     *partial5P = partial5;
  703.     *partial3P = partial3;
  704.     *ispointP = ispoint;
  705.     featP = feat;
  706.     qualP = qual;
  707.     valP = val;
  708.     return true;
  709. }
  710. bool CFeature_table_reader_imp::x_AddQualifierToGene (CSeqFeatData& sfdata,
  711.                                                       EQual qtype, const string& val)
  712. {
  713.     CGene_ref& grp = sfdata.SetGene ();
  714.     switch (qtype) {
  715.         case eQual_gene:
  716.             grp.SetLocus (val);
  717.             return true;
  718.         case eQual_allele:
  719.             grp.SetAllele (val);
  720.             return true;
  721.         case eQual_gene_desc:
  722.             grp.SetDesc (val);
  723.             return true;
  724.         case eQual_gene_syn:
  725.             {
  726.                 CGene_ref::TSyn& syn = grp.SetSyn ();
  727.                 syn.push_back (val);
  728.                 return true;
  729.             }
  730.         case eQual_map:
  731.             grp.SetMaploc (val);
  732.             return true;
  733.         case eQual_locus_tag:
  734.             grp.SetLocus_tag (val);
  735.             return true;
  736.         default:
  737.             break;
  738.     }
  739.     return false;
  740. }
  741. bool CFeature_table_reader_imp::x_AddQualifierToCdregion (CRef<CSeq_feat> sfp, CSeqFeatData& sfdata,
  742.                                                           EQual qtype, const string& val)
  743. {
  744.     CCdregion& crp = sfdata.SetCdregion ();
  745.     switch (qtype) {
  746.         case eQual_codon_start:
  747.             {
  748.                 int frame = NStr::StringToInt (val);
  749.                 switch (frame) {
  750.                     case 0:
  751.                         crp.SetFrame (CCdregion::eFrame_not_set);
  752.                         break;
  753.                     case 1:
  754.                         crp.SetFrame (CCdregion::eFrame_one);
  755.                         break;
  756.                     case 2:
  757.                         crp.SetFrame (CCdregion::eFrame_two);
  758.                         break;
  759.                     case 3:
  760.                         crp.SetFrame (CCdregion::eFrame_three);
  761.                         break;
  762.                     default:
  763.                         break;
  764.                 }
  765.                 return true;
  766.             }
  767.         case eQual_EC_number:
  768.             {
  769.                 CProt_ref& prp = sfp->SetProtXref ();
  770.                 CProt_ref::TEc& ec = prp.SetEc ();
  771.                 ec.push_back (val);
  772.                 return true;
  773.             }
  774.         case eQual_function:
  775.             {
  776.                 CProt_ref& prp = sfp->SetProtXref ();
  777.                 CProt_ref::TActivity& fun = prp.SetActivity ();
  778.                 fun.push_back (val);
  779.                 return true;
  780.             }
  781.         case eQual_product:
  782.             {
  783.                 CProt_ref& prp = sfp->SetProtXref ();
  784.                 CProt_ref::TName& prod = prp.SetName ();
  785.                 prod.push_back (val);
  786.                 return true;
  787.             }
  788.         case eQual_prot_desc:
  789.             {
  790.                 CProt_ref& prp = sfp->SetProtXref ();
  791.                 prp.SetDesc (val);
  792.                 return true;
  793.             }
  794.         case eQual_prot_note:
  795.             return true;
  796.         case eQual_transl_except:
  797.             return true;
  798.         default:
  799.             break;
  800.     }
  801.     return false;
  802. }
  803. int CFeature_table_reader_imp::x_ParseTrnaString (const string& val)
  804. {
  805.     string fst, scd;
  806.     scd = val;
  807.     if (NStr::StartsWith (val, "tRNA-")) {
  808.         NStr::SplitInTwo (val, "-", fst, scd);
  809.     }
  810.     if (m_TrnaKeys.find (scd) != m_TrnaKeys.end ()) {
  811.         return m_TrnaKeys [scd];
  812.     }
  813.     return 0;
  814. }
  815. bool CFeature_table_reader_imp::x_AddQualifierToRna (CSeqFeatData& sfdata,
  816.                                                      EQual qtype, const string& val)
  817. {
  818.     CRNA_ref& rrp = sfdata.SetRna ();
  819.     CRNA_ref::EType rnatyp = rrp.GetType ();
  820.     switch (rnatyp) {
  821.         case CRNA_ref::eType_premsg:
  822.         case CRNA_ref::eType_mRNA:
  823.         case CRNA_ref::eType_rRNA:
  824.         case CRNA_ref::eType_snRNA:
  825.         case CRNA_ref::eType_scRNA:
  826.         case CRNA_ref::eType_snoRNA:
  827.         case CRNA_ref::eType_other:
  828.             switch (qtype) {
  829.                 case eQual_product:
  830.                     {
  831.                         CRNA_ref::TExt& tex = rrp.SetExt ();
  832.                         CRNA_ref::C_Ext::E_Choice exttype = tex.Which ();
  833.                         if (exttype == CRNA_ref::C_Ext::e_TRNA) return false;
  834.                         tex.SetName (val);
  835.                         return true;
  836.                     }
  837.                 default:
  838.                     break;
  839.             }
  840.             break;
  841.         case CRNA_ref::eType_tRNA:
  842.             switch (qtype) {
  843.                 case eQual_product: {
  844.                         CRNA_ref::TExt& tex = rrp.SetExt ();
  845.                         CRNA_ref::C_Ext::E_Choice exttype = tex.Which ();
  846.                         if (exttype == CRNA_ref::C_Ext::e_Name) return false;
  847.                         CTrna_ext& trx = tex.SetTRNA ();
  848.                         int aaval = x_ParseTrnaString (val);
  849.                         if (aaval > 0) {
  850.                             CTrna_ext::TAa& taa = trx.SetAa ();
  851.                             taa.SetNcbieaa (aaval);
  852.                             trx.SetAa (taa);
  853.                             tex.SetTRNA (trx);
  854.                             return true;
  855.                         }
  856.                     }
  857.                     break;
  858.                 default:
  859.                     break;
  860.             }
  861.             break;
  862.         default:
  863.             break;
  864.     }
  865.     return false;
  866. }
  867. bool CFeature_table_reader_imp::x_AddQualifierToImp (CRef<CSeq_feat> sfp, CSeqFeatData& sfdata,
  868.                                                      EQual qtype, const string& qual, const string& val)
  869. {
  870.     switch (qtype) {
  871.         case eQual_allele:
  872.         case eQual_bound_moiety:
  873.         case eQual_clone:
  874.         case eQual_cons_splice:
  875.         case eQual_direction:
  876.         case eQual_EC_number:
  877.         case eQual_frequency:
  878.         case eQual_function:
  879.         case eQual_insertion_seq:
  880.         case eQual_label:
  881.         case eQual_map:
  882.         case eQual_number:
  883.         case eQual_operon:
  884.         case eQual_organism:
  885.         case eQual_PCR_conditions:
  886.         case eQual_phenotype:
  887.         case eQual_product:
  888.         case eQual_replace:
  889.         case eQual_rpt_family:
  890.         case eQual_rpt_type:
  891.         case eQual_rpt_unit:
  892.         case eQual_standard_name:
  893.         case eQual_transposon:
  894.         case eQual_usedin:
  895.             {
  896.                 CSeq_feat::TQual& qlist = sfp->SetQual ();
  897.                 CRef<CGb_qual> gbq (new CGb_qual);
  898.                 gbq->SetQual (qual);
  899.                 gbq->SetVal (val);
  900.                 qlist.push_back (gbq);
  901.                 return true;
  902.             }
  903.         default:
  904.             break;
  905.     }
  906.     return false;
  907. }
  908. bool CFeature_table_reader_imp::x_AddQualifierToBioSrc (CSeqFeatData& sfdata,
  909.                                                         EOrgRef rtype, const string& val)
  910. {
  911.     CBioSource& bsp = sfdata.SetBiosrc ();
  912.     switch (rtype) {
  913.         case eOrgRef_organism:
  914.             {
  915.                 CBioSource::TOrg& orp = bsp.SetOrg ();
  916.                 orp.SetTaxname (val);
  917.                 return true;
  918.             }
  919.         case eOrgRef_organelle:
  920.             {
  921.                 if (m_GenomeKeys.find (val) != m_GenomeKeys.end ()) {
  922.                     CBioSource::EGenome gtype = m_GenomeKeys [val];
  923.                     bsp.SetGenome (gtype);
  924.                     return true;
  925.                 }
  926.             }
  927.         case eOrgRef_div:
  928.             {
  929.                 CBioSource::TOrg& orp = bsp.SetOrg ();
  930.                 COrg_ref::TOrgname& onp = orp.SetOrgname ();
  931.                 onp.SetDiv (val);
  932.                 return true;
  933.             }
  934.         case eOrgRef_lineage:
  935.             {
  936.                 CBioSource::TOrg& orp = bsp.SetOrg ();
  937.                 COrg_ref::TOrgname& onp = orp.SetOrgname ();
  938.                 onp.SetLineage (val);
  939.                 return true;
  940.             }
  941.         case eOrgRef_gcode:
  942.             {
  943.                 CBioSource::TOrg& orp = bsp.SetOrg ();
  944.                 COrg_ref::TOrgname& onp = orp.SetOrgname ();
  945.                 int code = NStr::StringToInt (val);
  946.                 onp.SetGcode (code);
  947.                 return true;
  948.             }
  949.         case eOrgRef_mgcode:
  950.             {
  951.                 CBioSource::TOrg& orp = bsp.SetOrg ();
  952.                 COrg_ref::TOrgname& onp = orp.SetOrgname ();
  953.                 int code = NStr::StringToInt (val);
  954.                 onp.SetMgcode (code);
  955.                 return true;
  956.             }
  957.         default:
  958.             break;
  959.     }
  960.     return false;
  961. }
  962. bool CFeature_table_reader_imp::x_AddQualifierToBioSrc (CSeqFeatData& sfdata,
  963.                                                         CSubSource::ESubtype stype, const string& val)
  964. {
  965.     CBioSource& bsp = sfdata.SetBiosrc ();
  966.     CBioSource::TSubtype& slist = bsp.SetSubtype ();
  967.     CRef<CSubSource> ssp (new CSubSource);
  968.     ssp->SetSubtype (stype);
  969.     ssp->SetName (val);
  970.     slist.push_back (ssp);
  971.     return true;
  972. }
  973. bool CFeature_table_reader_imp::x_AddQualifierToBioSrc (CSeqFeatData& sfdata,
  974.                                                         COrgMod::ESubtype  mtype, const string& val)
  975. {
  976.     CBioSource& bsp = sfdata.SetBiosrc ();
  977.     CBioSource::TOrg& orp = bsp.SetOrg ();
  978.     COrg_ref::TOrgname& onp = orp.SetOrgname ();
  979.     COrgName::TMod& mlist = onp.SetMod ();
  980.     CRef<COrgMod> omp (new COrgMod);
  981.     omp->SetSubtype (mtype);
  982.     omp->SetSubname (val);
  983.     mlist.push_back (omp);
  984.     return true;
  985. }
  986. bool CFeature_table_reader_imp::x_AddQualifierToFeature (CRef<CSeq_feat> sfp,
  987.                                                          const string& qual, const string& val)
  988. {
  989.     CSeqFeatData&          sfdata = sfp->SetData ();
  990.     CSeqFeatData::E_Choice typ = sfdata.Which ();
  991.     if (typ == CSeqFeatData::e_Biosrc) {
  992.         if (m_OrgRefKeys.find (qual) != m_OrgRefKeys.end ()) {
  993.             EOrgRef rtype = m_OrgRefKeys [qual];
  994.             if (x_AddQualifierToBioSrc (sfdata, rtype, val)) return true;
  995.         } else if (m_SubSrcKeys.find (qual) != m_SubSrcKeys.end ()) {
  996.             CSubSource::ESubtype stype = m_SubSrcKeys [qual];
  997.             if (x_AddQualifierToBioSrc (sfdata, stype, val)) return true;
  998.         } else if (m_OrgModKeys.find (qual) != m_OrgModKeys.end ()) {
  999.             COrgMod::ESubtype  mtype = m_OrgModKeys [qual];
  1000.             if (x_AddQualifierToBioSrc (sfdata, mtype, val)) return true;
  1001.         }
  1002.     } else if (m_QualKeys.find (qual) != m_QualKeys.end ()) {
  1003.         EQual qtype = m_QualKeys [qual];
  1004.         switch (typ) {
  1005.             case CSeqFeatData::e_Gene:
  1006.                 if (x_AddQualifierToGene (sfdata, qtype, val)) return true;
  1007.                 break;
  1008.             case CSeqFeatData::e_Cdregion:
  1009.                 if (x_AddQualifierToCdregion (sfp, sfdata, qtype, val)) return true;
  1010.                 break;
  1011.             case CSeqFeatData::e_Rna:
  1012.                 if (x_AddQualifierToRna (sfdata, qtype, val)) return true;
  1013.                 break;
  1014.            case CSeqFeatData::e_Imp:
  1015.                 if (x_AddQualifierToImp (sfp, sfdata, qtype, qual, val)) return true;
  1016.                 break;
  1017.             case CSeqFeatData::e_Region:
  1018.                 if (qtype == eQual_region_name) {
  1019.                     sfdata.SetRegion (val);
  1020.                     return true;
  1021.                 }
  1022.                 break;
  1023.             case CSeqFeatData::e_Bond:
  1024.                 if (qtype == eQual_bond_type) {
  1025.                     if (m_BondKeys.find (val) != m_BondKeys.end ()) {
  1026.                         CSeqFeatData::EBond btyp = m_BondKeys [val];
  1027.                         sfdata.SetBond (btyp);
  1028.                         return true;
  1029.                     }
  1030.                 }
  1031.                 break;
  1032.             case CSeqFeatData::e_Site:
  1033.                 if (qtype == eQual_site_type) {
  1034.                     if (m_SiteKeys.find (val) != m_SiteKeys.end ()) {
  1035.                         CSeqFeatData::ESite styp = m_SiteKeys [val];
  1036.                         sfdata.SetSite (styp);
  1037.                         return true;
  1038.                     }
  1039.                 }
  1040.                 break;
  1041.             default:
  1042.                 break;
  1043.         }
  1044.         switch (qtype) {
  1045.             case eQual_pseudo:
  1046.                 sfp->SetPseudo (true);
  1047.                 return true;
  1048.             case eQual_partial:
  1049.                 sfp->SetPartial (true);
  1050.                 return true;
  1051.             case eQual_exception:
  1052.                 sfp->SetExcept (true);
  1053.                 sfp->SetExcept_text (val);
  1054.                 return true;
  1055.             case eQual_evidence:
  1056.                 if (val == "experimental") {
  1057.                     sfp->SetExp_ev (CSeq_feat::eExp_ev_experimental);
  1058.                 } else if (val == "not_experimental" || val == "non_experimental" ||
  1059.                            val == "not-experimental" || val == "non-experimental") {
  1060.                     sfp->SetExp_ev (CSeq_feat::eExp_ev_not_experimental);
  1061.                 }
  1062.                 return true;
  1063.             case eQual_note:
  1064.                 {
  1065.                     if (sfp->CanGetComment ()) {
  1066.                         const CSeq_feat::TComment& comment = sfp->GetComment ();
  1067.                         CSeq_feat::TComment revised = comment + "; " + val;
  1068.                         sfp->SetComment (revised);
  1069.                     } else {
  1070.                         sfp->SetComment (val);
  1071.                     }
  1072.                     return true;
  1073.                 }
  1074.             case eQual_allele:
  1075.             case eQual_bound_moiety:
  1076.             case eQual_clone:
  1077.             case eQual_cons_splice:
  1078.             case eQual_direction:
  1079.             case eQual_EC_number:
  1080.             case eQual_frequency:
  1081.             case eQual_function:
  1082.             case eQual_insertion_seq:
  1083.             case eQual_label:
  1084.             case eQual_map:
  1085.             case eQual_number:
  1086.             case eQual_operon:
  1087.             case eQual_organism:
  1088.             case eQual_PCR_conditions:
  1089.             case eQual_phenotype:
  1090.             case eQual_product:
  1091.             case eQual_protein_id:
  1092.             case eQual_replace:
  1093.             case eQual_rpt_family:
  1094.             case eQual_rpt_type:
  1095.             case eQual_rpt_unit:
  1096.             case eQual_standard_name:
  1097.             case eQual_transcript_id:
  1098.             case eQual_transposon:
  1099.             case eQual_usedin:
  1100.                 {
  1101.                     CSeq_feat::TQual& qlist = sfp->SetQual ();
  1102.                     CRef<CGb_qual> gbq (new CGb_qual);
  1103.                     gbq->SetQual (qual);
  1104.                     gbq->SetVal (val);
  1105.                     qlist.push_back (gbq);
  1106.                     return true;
  1107.                 }
  1108.             case eQual_gene:
  1109.                 {
  1110.                     CGene_ref& grp = sfp->SetGeneXref ();
  1111.                     if (val == "-") {
  1112.                         grp.SetLocus ("");
  1113.                     } else {
  1114.                         grp.SetLocus (val);
  1115.                     }
  1116.                     return true;
  1117.                 }
  1118.             case eQual_gene_desc:
  1119.                 {
  1120.                     CGene_ref& grp = sfp->SetGeneXref ();
  1121.                     grp.SetDesc (val);
  1122.                     return true;
  1123.                 }
  1124.             case eQual_gene_syn:
  1125.                 {
  1126.                     CGene_ref& grp = sfp->SetGeneXref ();
  1127.                     CGene_ref::TSyn& syn = grp.SetSyn ();
  1128.                     syn.push_back (val);
  1129.                     return true;
  1130.                 }
  1131.             case eQual_locus_tag:
  1132.                 {
  1133.                     CGene_ref& grp = sfp->SetGeneXref ();
  1134.                     grp.SetLocus_tag (val);
  1135.                     return true;
  1136.                 }
  1137.             case eQual_db_xref:
  1138.                 {
  1139.                     string db, tag;
  1140.                     if (NStr::SplitInTwo (val, ":", db, tag)) {
  1141.                         CSeq_feat::TDbxref& dblist = sfp->SetDbxref ();
  1142.                         CRef<CDbtag> dbt (new CDbtag);
  1143.                         dbt->SetDb (db);
  1144.                         CRef<CObject_id> oid (new CObject_id);
  1145.                         oid->SetStr (tag);
  1146.                         dbt->SetTag (*oid);
  1147.                         dblist.push_back (dbt);
  1148.                         return true;
  1149.                     }
  1150.                     return true;
  1151.                 }
  1152.             default:
  1153.                 break;
  1154.         }
  1155.     }
  1156.     return false;
  1157. }
  1158. bool CFeature_table_reader_imp::x_AddIntervalToFeature (CRef<CSeq_feat> sfp, CSeq_loc_mix *mix,
  1159.                                                         const string& seqid, Int4 start, Int4 stop,
  1160.                                                         bool partial5, bool partial3)
  1161. {
  1162.     CSeq_interval::TStrand strand = eNa_strand_plus;
  1163.     if (start > stop) {
  1164.         Int4 flip = start;
  1165.         start = stop;
  1166.         stop = flip;
  1167.         strand = eNa_strand_minus;
  1168.     }
  1169.     if (start == stop) {
  1170.         // just a point
  1171.         CRef<CSeq_loc> loc(new CSeq_loc);
  1172.         CSeq_point& point = loc->SetPnt();
  1173.         point.SetPoint(start);
  1174.         point.SetStrand(strand);
  1175.         CSeq_id seq_id(seqid);
  1176.         point.SetId().Assign (seq_id);
  1177.         mix->Set().push_back(loc);
  1178.     } else {
  1179.         // interval
  1180.         CRef<CSeq_loc> loc(new CSeq_loc);
  1181.         CSeq_interval& ival = loc->SetInt();
  1182.         ival.SetFrom(start);
  1183.         ival.SetTo(stop);
  1184.         ival.SetStrand(strand);
  1185.         CSeq_id seq_id(seqid);
  1186.         ival.SetId().Assign (seq_id);
  1187.         mix->Set().push_back(loc);
  1188.     }
  1189.     if (partial5 || partial3) {
  1190.         sfp->SetPartial (true);
  1191.     }
  1192.     return true;
  1193. }
  1194. CRef<CSeq_annot> CFeature_table_reader_imp::ReadSequinFeatureTable (
  1195.     CNcbiIstream& ifs,
  1196.     const string& seqid,
  1197.     const string& annotname,
  1198.     const CFeature_table_reader::TFlags flags
  1199. )
  1200. {
  1201.     string line;
  1202.     string feat, qual, val;
  1203.     Int4 start, stop;
  1204.     bool partial5, partial3, ispoint;
  1205.     Int4 offset = 0;
  1206.     CSeqFeatData::ESubtype sbtyp = CSeqFeatData::eSubtype_bad;
  1207.     CSeqFeatData::E_Choice typ = CSeqFeatData::e_not_set;
  1208.     CRef<CSeq_annot> sap(new CSeq_annot);
  1209.     CSeq_annot::C_Data::TFtable& ftable = sap->SetData().SetFtable();
  1210.     CRef<CSeq_feat> sfp;
  1211.     CSeq_loc_mix *mix = 0;
  1212.     CT_POS_TYPE pos(0);
  1213.     if (! annotname.empty ()) {
  1214.       CAnnot_descr& descr = sap->SetDesc ();
  1215.       CRef<CAnnotdesc> annot(new CAnnotdesc);
  1216.       annot->SetName (annotname);
  1217.       descr.Set().push_back (annot);
  1218.     }
  1219.     while (ifs.good ()) {
  1220.         pos = ifs.tellg ();
  1221.         NcbiGetlineEOL (ifs, line);
  1222.         if (! line.empty ()) {
  1223.             if (line [0] == '>') {
  1224.                 // if next feature table, reposition and return current sap
  1225.                 ifs.seekg (pos);
  1226.                 return sap;
  1227.             } else if (line [0] == '[') {
  1228.                 // set offset !!!!!!!!
  1229.             } else if (x_ParseFeatureTableLine (line, &start, &stop, &partial5, &partial3,
  1230.                                                 &ispoint, feat, qual, val, offset)) {
  1231.                 // process line in feature table
  1232.                 if ((! feat.empty ()) && start >= 0 && stop >= 0) {
  1233.                     // process start - stop - feature line
  1234.                     if (m_FeatKeys.find (feat) != m_FeatKeys.end ()) {
  1235.                         sbtyp = m_FeatKeys [feat];
  1236.                         if (sbtyp != CSeqFeatData::eSubtype_bad) {
  1237.                             // populate *sfp here...
  1238.                             sfp.Reset (new CSeq_feat);
  1239.                             sfp->ResetLocation ();
  1240.                             typ = CSeqFeatData::GetTypeFromSubtype (sbtyp);
  1241.                             sfp->SetData ().Select (typ);
  1242.                             CSeqFeatData& sfdata = sfp->SetData ();
  1243.                             if (typ == CSeqFeatData::e_Rna) {
  1244.                                 CRNA_ref& rrp = sfdata.SetRna ();
  1245.                                 CRNA_ref::EType rnatyp = CRNA_ref::eType_unknown;
  1246.                                 switch (sbtyp) {
  1247.                                     case CSeqFeatData::eSubtype_preRNA :
  1248.                                         rnatyp = CRNA_ref::eType_premsg;
  1249.                                         break;
  1250.                                     case CSeqFeatData::eSubtype_mRNA :
  1251.                                         rnatyp = CRNA_ref::eType_mRNA;
  1252.                                         break;
  1253.                                     case CSeqFeatData::eSubtype_tRNA :
  1254.                                         rnatyp = CRNA_ref::eType_tRNA;
  1255.                                         break;
  1256.                                     case CSeqFeatData::eSubtype_rRNA :
  1257.                                         rnatyp = CRNA_ref::eType_rRNA;
  1258.                                         break;
  1259.                                     case CSeqFeatData::eSubtype_snRNA :
  1260.                                         rnatyp = CRNA_ref::eType_snRNA;
  1261.                                         break;
  1262.                                     case CSeqFeatData::eSubtype_scRNA :
  1263.                                         rnatyp = CRNA_ref::eType_scRNA;
  1264.                                         break;
  1265.                                     case CSeqFeatData::eSubtype_snoRNA :
  1266.                                         rnatyp = CRNA_ref::eType_snoRNA;
  1267.                                         break;
  1268.                                     case CSeqFeatData::eSubtype_otherRNA :
  1269.                                         rnatyp = CRNA_ref::eType_other;
  1270.                                         break;
  1271.                                     default :
  1272.                                         break;
  1273.                                 }
  1274.                                 rrp.SetType (rnatyp);
  1275.                             } else if (typ == CSeqFeatData::e_Imp) {
  1276.                                 CImp_feat_Base& imp = sfdata.SetImp ();
  1277.                                 imp.SetKey (feat);
  1278.                             }
  1279.                             ftable.push_back (sfp);
  1280.                             // now create location
  1281.                             CRef<CSeq_loc> location (new CSeq_loc);
  1282.                             mix = &(location->SetMix ());
  1283.                             sfp->SetLocation (*location);
  1284.                             // and add first interval
  1285.                             x_AddIntervalToFeature (sfp, mix, seqid, start, stop, partial5, partial3);
  1286.                         }
  1287.                     } else {
  1288.                         // unrecognized feature key
  1289.                         if ((flags & CFeature_table_reader::fReportBadKey) != 0) {
  1290.                             ERR_POST (Warning << "Unrecognized feature " << feat);
  1291.                         }
  1292.                         if ((flags & CFeature_table_reader::fKeepBadKey) != 0) {
  1293.                             sfp.Reset (new CSeq_feat);
  1294.                             sfp->ResetLocation ();
  1295.                             sfp->SetData ().Select (CSeqFeatData::e_Imp);
  1296.                             CSeqFeatData& sfdata = sfp->SetData ();
  1297.                             CImp_feat_Base& imp = sfdata.SetImp ();
  1298.                             imp.SetKey (feat);
  1299.                             ftable.push_back (sfp);
  1300.                             CRef<CSeq_loc> location (new CSeq_loc);
  1301.                             mix = &(location->SetMix ());
  1302.                             sfp->SetLocation (*location);
  1303.                             x_AddIntervalToFeature (sfp, mix, seqid, start, stop, partial5, partial3);
  1304.                         }
  1305.                     }
  1306.                 } else if (start >= 0 && stop >= 0 && feat.empty () && qual.empty () && val.empty ()) {
  1307.                     // process start - stop multiple interval line
  1308.                     x_AddIntervalToFeature (sfp, mix, seqid, start, stop, partial5, partial3);
  1309.                 } else if ((! qual.empty ()) && (! val.empty ())) {
  1310.                     // process qual - val qualifier line
  1311.                     if (! x_AddQualifierToFeature (sfp, qual, val)) {
  1312.                         // unrecognized qualifier key
  1313.                         if ((flags & CFeature_table_reader::fReportBadKey) != 0) {
  1314.                             ERR_POST (Warning << "Unrecognized qualifier " << qual);
  1315.                         }
  1316.                         if ((flags & CFeature_table_reader::fKeepBadKey) != 0) {
  1317.                             CSeq_feat::TQual& qlist = sfp->SetQual ();
  1318.                             CRef<CGb_qual> gbq (new CGb_qual);
  1319.                             gbq->SetQual (qual);
  1320.                             gbq->SetVal (val);
  1321.                             qlist.push_back (gbq);
  1322.                         }
  1323.                     }
  1324.                 } else if ((! qual.empty ()) && (val.empty ())) {
  1325.                     // check for the few qualifiers that do not need a value
  1326.                     if (find (m_SingleKeys.begin (), m_SingleKeys.end (), qual) != m_SingleKeys.end ()) {
  1327.                         x_AddQualifierToFeature (sfp, qual, val);
  1328.                     }
  1329.                 } else if (! feat.empty ()) {
  1330.                 
  1331.                     // unrecognized location
  1332.                     if ((flags & CFeature_table_reader::fReportBadKey) != 0) {
  1333.                         ERR_POST (Warning << "Bad location on feature " << feat <<
  1334.                                  " (start " << start << ", stop " << stop << ")");
  1335.                     }
  1336.                 }
  1337.             }
  1338.         }
  1339.     }
  1340.     return sap;
  1341. }
  1342. CRef<CSeq_feat> CFeature_table_reader_imp::CreateSeqFeat (
  1343.     const string& feat,
  1344.     CSeq_loc& location,
  1345.     const CFeature_table_reader::TFlags flags
  1346. )
  1347. {
  1348.     CRef<CSeq_feat> sfp (new CSeq_feat);
  1349.     if (! feat.empty ()) {
  1350.         if (m_FeatKeys.find (feat) != m_FeatKeys.end ()) {
  1351.             CSeqFeatData::ESubtype sbtyp = m_FeatKeys [feat];
  1352.             CSeqFeatData::E_Choice typ = CSeqFeatData::GetTypeFromSubtype (sbtyp);
  1353.             sfp->SetData ().Select (typ);
  1354.             CSeqFeatData& sfdata = sfp->SetData ();
  1355.             sfp->SetLocation (location);
  1356.             if (typ == CSeqFeatData::e_Rna) {
  1357.                 CRNA_ref& rrp = sfdata.SetRna ();
  1358.                 CRNA_ref::EType rnatyp = CRNA_ref::eType_unknown;
  1359.                 switch (sbtyp) {
  1360.                     case CSeqFeatData::eSubtype_preRNA :
  1361.                         rnatyp = CRNA_ref::eType_premsg;
  1362.                         break;
  1363.                     case CSeqFeatData::eSubtype_mRNA :
  1364.                         rnatyp = CRNA_ref::eType_mRNA;
  1365.                         break;
  1366.                     case CSeqFeatData::eSubtype_tRNA :
  1367.                         rnatyp = CRNA_ref::eType_tRNA;
  1368.                         break;
  1369.                     case CSeqFeatData::eSubtype_rRNA :
  1370.                         rnatyp = CRNA_ref::eType_rRNA;
  1371.                         break;
  1372.                     case CSeqFeatData::eSubtype_snRNA :
  1373.                         rnatyp = CRNA_ref::eType_snRNA;
  1374.                         break;
  1375.                     case CSeqFeatData::eSubtype_scRNA :
  1376.                         rnatyp = CRNA_ref::eType_scRNA;
  1377.                         break;
  1378.                     case CSeqFeatData::eSubtype_snoRNA :
  1379.                         rnatyp = CRNA_ref::eType_snoRNA;
  1380.                         break;
  1381.                     case CSeqFeatData::eSubtype_otherRNA :
  1382.                         rnatyp = CRNA_ref::eType_other;
  1383.                         break;
  1384.                     default :
  1385.                         break;
  1386.                 }
  1387.                 rrp.SetType (rnatyp);
  1388.             } else if (typ == CSeqFeatData::e_Imp) {
  1389.                 CImp_feat_Base& imp = sfdata.SetImp ();
  1390.                 imp.SetKey (feat);
  1391.             }
  1392.             sfp->SetLocation (location);
  1393.  
  1394.         } else {
  1395.             // unrecognized feature key
  1396.             if ((flags & CFeature_table_reader::fReportBadKey) != 0) {
  1397.                 ERR_POST (Warning << "Unrecognized feature " << feat);
  1398.             }
  1399.             if ((flags & CFeature_table_reader::fKeepBadKey) != 0) {
  1400.                 sfp.Reset (new CSeq_feat);
  1401.                 sfp->ResetLocation ();
  1402.                 sfp->SetData ().Select (CSeqFeatData::e_Imp);
  1403.                 CSeqFeatData& sfdata = sfp->SetData ();
  1404.                 CImp_feat_Base& imp = sfdata.SetImp ();
  1405.                 imp.SetKey (feat);
  1406.                 sfp->SetLocation (location);
  1407.             }
  1408.         }
  1409.     }
  1410.     return sfp;
  1411. }
  1412. void CFeature_table_reader_imp::AddFeatQual (
  1413.     CRef<CSeq_feat> sfp,
  1414.     const string& qual,
  1415.     const string& val,
  1416.     const CFeature_table_reader::TFlags flags
  1417. )
  1418. {
  1419.     if ((! qual.empty ()) && (! val.empty ())) {
  1420.         if (! x_AddQualifierToFeature (sfp, qual, val)) {
  1421.             // unrecognized qualifier key
  1422.             if ((flags & CFeature_table_reader::fReportBadKey) != 0) {
  1423.                 ERR_POST (Warning << "Unrecognized qualifier " << qual);
  1424.             }
  1425.             if ((flags & CFeature_table_reader::fKeepBadKey) != 0) {
  1426.                 CSeq_feat::TQual& qlist = sfp->SetQual ();
  1427.                 CRef<CGb_qual> gbq (new CGb_qual);
  1428.                 gbq->SetQual (qual);
  1429.                 gbq->SetVal (val);
  1430.                 qlist.push_back (gbq);
  1431.             }
  1432.         }
  1433.     } else if ((! qual.empty ()) && (val.empty ())) {
  1434.         // check for the few qualifiers that do not need a value
  1435.         if (find (m_SingleKeys.begin (), m_SingleKeys.end (), qual) != m_SingleKeys.end ()) {
  1436.             x_AddQualifierToFeature (sfp, qual, val);
  1437.         }
  1438.     }
  1439. }
  1440. // public access functions
  1441. CRef<CSeq_annot> CFeature_table_reader::ReadSequinFeatureTable (
  1442.     CNcbiIstream& ifs,
  1443.     const string& seqid,
  1444.     const string& annotname,
  1445.     const TFlags flags
  1446. )
  1447. {
  1448.     // just read features from 5-column table
  1449.     CRef<CSeq_annot> sap = x_GetImplementation ().ReadSequinFeatureTable (ifs, seqid, annotname, flags);
  1450.     // go through all features and demote single interval seqlocmix to seqlocint
  1451.     for (CTypeIterator<CSeq_feat> fi(*sap); fi; ++fi) {
  1452.         CSeq_feat& feat = *fi;
  1453.         CSeq_loc& location = feat.SetLocation ();
  1454.         if (location.IsMix ()) {
  1455.             CSeq_loc_mix& mx = location.SetMix ();
  1456.             switch (mx.Get ().size ()) {
  1457.                 case 0:
  1458.                     location.SetNull ();
  1459.                     break;
  1460.                 case 1:
  1461.                     feat.SetLocation (*mx.Set ().front ());
  1462.                     break;
  1463.                 default:
  1464.                     break;
  1465.             }
  1466.         }
  1467.     }
  1468.     return sap;
  1469. }
  1470. CRef<CSeq_annot> CFeature_table_reader::ReadSequinFeatureTable (
  1471.     CNcbiIstream& ifs,
  1472.     const TFlags flags
  1473. )
  1474. {
  1475.     string line, fst, scd, seqid, annotname;
  1476.     CT_POS_TYPE pos(0);
  1477.     // first look for >Feature line, extract seqid and optional annotname
  1478.     while (seqid.empty () && ifs.good ()) {
  1479.         pos = ifs.tellg ();
  1480.         NcbiGetlineEOL (ifs, line);
  1481.         if (! line.empty ()) {
  1482.             if (line [0] == '>') {
  1483.                 if (NStr::StartsWith (line, ">Feature")) {
  1484.                     NStr::SplitInTwo (line, " ", fst, scd);
  1485.                     NStr::SplitInTwo (scd, " ", seqid, annotname);
  1486.                 }
  1487.             }
  1488.         }
  1489.     }
  1490.     // then read features from 5-column table
  1491.     return ReadSequinFeatureTable (ifs, seqid, annotname, flags);
  1492. }
  1493. CRef<CSeq_feat> CFeature_table_reader::CreateSeqFeat (
  1494.     const string& feat,
  1495.     CSeq_loc& location,
  1496.     const TFlags flags
  1497. )
  1498. {
  1499.     return x_GetImplementation ().CreateSeqFeat (feat, location, flags);
  1500. }
  1501. void CFeature_table_reader::AddFeatQual (
  1502.     CRef<CSeq_feat> sfp,
  1503.     const string& qual,
  1504.     const string& val,
  1505.     const CFeature_table_reader::TFlags flags
  1506. )
  1507. {
  1508.     x_GetImplementation ().AddFeatQual (sfp, qual, val, flags);
  1509. }
  1510. END_objects_SCOPE
  1511. END_NCBI_SCOPE