split_parser.cpp
上传用户:yhdzpy8989
上传日期:2007-06-13
资源大小:13604k
文件大小:9k
- /*
- * ===========================================================================
- * PRODUCTION $Log: split_parser.cpp,v $
- * PRODUCTION Revision 1000.1 2004/06/01 19:41:49 gouriano
- * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.6
- * PRODUCTION
- * ===========================================================================
- */
- /* $Id: split_parser.cpp,v 1000.1 2004/06/01 19:41:49 gouriano Exp $
- * ===========================================================================
- * PUBLIC DOMAIN NOTICE
- * National Center for Biotechnology Information
- *
- * This software/database is a "United States Government Work" under the
- * terms of the United States Copyright Act. It was written as part of
- * the author's official duties as a United States Government employee and
- * thus cannot be copyrighted. This software/database is freely available
- * to the public for use. The National Library of Medicine and the U.S.
- * Government have not placed any restriction on its use or reproduction.
- *
- * Although all reasonable efforts have been taken to ensure the accuracy
- * and reliability of the software and data, the NLM and the U.S.
- * Government do not and cannot warrant the performance or results that
- * may be obtained by using this software or data. The NLM and the U.S.
- * Government disclaim all warranties, express or implied, including
- * warranties of performance, merchantability or fitness for any particular
- * purpose.
- *
- * Please cite the author in any work or product based on this material.
- *
- * ===========================================================================
- *
- * Author: Eugene Vasilchenko
- *
- * File Description: Methods to create object manager structures from ID2 spec
- *
- */
- #include <ncbi_pch.hpp>
- #include <objtools/data_loaders/genbank/split_parser.hpp>
- #include <objmgr/objmgr_exception.hpp>
- #include <objmgr/impl/tse_info.hpp>
- #include <objmgr/impl/tse_chunk_info.hpp>
- #include <objmgr/impl/seq_annot_info.hpp>
- #include <objects/seqsplit/seqsplit__.hpp>
- BEGIN_NCBI_SCOPE
- BEGIN_SCOPE(objects)
- void CSplitParser::Attach(CTSE_Info& tse, const CID2S_Split_Info& split)
- {
- ITERATE ( CID2S_Split_Info::TChunks, it, split.GetChunks() ) {
- CRef<CTSE_Chunk_Info> chunk = Parse(**it);
- chunk->x_TSEAttach(tse);
- }
- }
- CRef<CTSE_Chunk_Info> CSplitParser::Parse(const CID2S_Chunk_Info& info)
- {
- CRef<CTSE_Chunk_Info> ret(new CTSE_Chunk_Info(info.GetId()));
- ITERATE ( CID2S_Chunk_Info::TContent, it, info.GetContent() ) {
- const CID2S_Chunk_Content& content = **it;
- switch ( content.Which() ) {
- case CID2S_Chunk_Content::e_Seq_annot:
- x_Attach(*ret, content.GetSeq_annot());
- break;
- case CID2S_Chunk_Content::e_Seq_annot_place:
- x_Attach(*ret, content.GetSeq_annot_place());
- break;
- default:
- NCBI_THROW(CLoaderException, eOtherError,
- "Unexpected split data");
- }
- }
- return ret;
- }
- void CSplitParser::x_Attach(CTSE_Chunk_Info& chunk,
- const CID2S_Seq_annot_place_Info& place)
- {
- ITERATE ( CID2S_Seq_annot_place_Info::TBioseqs,
- it, place.GetBioseqs() ) {
- chunk.x_AddAnnotPlace(CTSE_Chunk_Info::eBioseq, *it);
- }
- ITERATE ( CID2S_Seq_annot_place_Info::TBioseq_sets,
- it, place.GetBioseq_sets() ) {
- chunk.x_AddAnnotPlace(CTSE_Chunk_Info::eBioseq_set, *it);
- }
- }
- void CSplitParser::x_Attach(CTSE_Chunk_Info& chunk,
- const CID2S_Seq_annot_Info& annot)
- {
- CAnnotName name;
- if ( annot.IsSetName() ) {
- name.SetNamed(annot.GetName());
- }
-
- TLocationSet loc;
- x_ParseLocation(loc, annot.GetSeq_loc());
- if ( annot.IsSetAlign() ) {
- SAnnotTypeSelector sel(CSeq_annot::TData::e_Align);
- chunk.x_AddAnnotType(name, sel, loc);
- }
- if ( annot.IsSetGraph() ) {
- SAnnotTypeSelector sel(CSeq_annot::TData::e_Graph);
- chunk.x_AddAnnotType(name, sel, loc);
- }
-
- ITERATE ( CID2S_Seq_annot_Info::TFeat, it, annot.GetFeat() ) {
- const CID2S_Feat_type_Info& type = **it;
- if ( type.IsSetSubtypes() ) {
- ITERATE ( CID2S_Feat_type_Info::TSubtypes, sit,
- type.GetSubtypes() ) {
- SAnnotTypeSelector sel(CSeqFeatData::ESubtype(+*sit));
- chunk.x_AddAnnotType(name, sel, loc);
- }
- }
- else {
- SAnnotTypeSelector sel(CSeqFeatData::E_Choice(type.GetType()));
- chunk.x_AddAnnotType(name, sel, loc);
- }
- }
- }
- inline
- void CSplitParser::x_AddWhole(TLocationSet& vec,
- const TLocationId& id)
- {
- vec.push_back(TLocation(id, TLocationRange::GetWhole()));
- }
- inline
- void CSplitParser::x_AddInterval(TLocationSet& vec,
- const TLocationId& id,
- TSeqPos start, TSeqPos length)
- {
- vec.push_back(TLocation(id, TLocationRange(start, start+length-1)));
- }
- void CSplitParser::x_ParseLocation(TLocationSet& vec,
- const CID2_Seq_loc& loc)
- {
- switch ( loc.Which() ) {
- case CID2_Seq_loc::e_Gi_whole:
- {
- x_AddWhole(vec, loc.GetGi_whole());
- break;
- }
-
- case CID2_Seq_loc::e_Gi_whole_range:
- {
- const CID2_Id_Range& wr = loc.GetGi_whole_range();
- for ( int gi = wr.GetStart(), end = gi+wr.GetCount(); gi < end; ++gi )
- x_AddWhole(vec, gi);
- break;
- }
- case CID2_Seq_loc::e_Interval:
- {
- const CID2_Interval& interval = loc.GetInterval();
- x_AddInterval(vec,
- interval.GetGi(),
- interval.GetStart(),
- interval.GetLength());
- break;
- }
- case CID2_Seq_loc::e_Packed_ints:
- {
- const CID2_Packed_Seq_ints& ints = loc.GetPacked_ints();
- ITERATE ( CID2_Packed_Seq_ints::TIntervals, it, ints.GetIntervals() ) {
- const CID2_Seq_range& interval = **it;
- x_AddInterval(vec,
- ints.GetGi(),
- interval.GetStart(),
- interval.GetLength());
- }
- break;
- }
- case CID2_Seq_loc::e_Loc_set:
- {
- const CID2_Seq_loc::TLoc_set& loc_set = loc.GetLoc_set();
- ITERATE ( CID2_Seq_loc::TLoc_set, it, loc_set ) {
- x_ParseLocation(vec, **it);
- }
- break;
- }
- }
- }
- void CSplitParser::Load(CTSE_Chunk_Info& chunk,
- const CID2S_Chunk& id2_chunk)
- {
- ITERATE ( CID2S_Chunk::TData, dit, id2_chunk.GetData() ) {
- const CID2S_Chunk_Data& data = **dit;
- CTSE_Chunk_Info::TPlace place;
- if ( data.GetId().IsGi() ) {
- place.first = CTSE_Chunk_Info::eBioseq;
- place.second = data.GetId().GetGi();
- }
- else {
- place.first = CTSE_Chunk_Info::eBioseq_set;
- place.second = data.GetId().GetBioseq_set();
- }
- ITERATE ( CID2S_Chunk_Data::TDescrs, it, data.GetDescrs() ) {
- NCBI_THROW(CLoaderException, eOtherError,
- "split descr is not supported");
- }
- ITERATE ( CID2S_Chunk_Data::TAnnots, it, data.GetAnnots() ) {
- CSeq_annot& annot = const_cast<CSeq_annot&>(**it);
- CRef<CSeq_annot_Info> annot_info(new CSeq_annot_Info(annot));
- chunk.x_LoadAnnot(place, annot_info);
- }
- ITERATE ( CID2S_Chunk_Data::TAssembly, it, data.GetAssembly() ) {
- NCBI_THROW(CLoaderException, eOtherError,
- "split assembly is not supported");
- }
- ITERATE ( CID2S_Chunk_Data::TSeq_map, it, data.GetSeq_map() ) {
- NCBI_THROW(CLoaderException, eOtherError,
- "split seq-map is not supported");
- }
- ITERATE ( CID2S_Chunk_Data::TSeq_data, it, data.GetSeq_data() ) {
- NCBI_THROW(CLoaderException, eOtherError,
- "split seq-data is not supported");
- }
- }
- }
- END_SCOPE(objects)
- END_NCBI_SCOPE
- /*
- * $Log: split_parser.cpp,v $
- * Revision 1000.1 2004/06/01 19:41:49 gouriano
- * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.6
- *
- * Revision 1.6 2004/05/21 21:42:52 gorelenk
- * Added PCH ncbi_pch.hpp
- *
- * Revision 1.5 2004/03/16 15:47:29 vasilche
- * Added CBioseq_set_Handle and set of EditHandles
- *
- * Revision 1.4 2004/02/17 21:19:35 vasilche
- * Fixed 'non-const reference to temporary' warnings.
- *
- * Revision 1.3 2004/01/28 20:53:42 vasilche
- * Added CSplitParser::Attach().
- *
- * Revision 1.2 2004/01/22 20:36:43 ucko
- * Correct path to seqsplit__.hpp.
- *
- * Revision 1.1 2004/01/22 20:10:35 vasilche
- * 1. Splitted ID2 specs to two parts.
- * ID2 now specifies only protocol.
- * Specification of ID2 split data is moved to seqsplit ASN module.
- * For now they are still reside in one resulting library as before - libid2.
- * As the result split specific headers are now in objects/seqsplit.
- * 2. Moved ID2 and ID1 specific code out of object manager.
- * Protocol is processed by corresponding readers.
- * ID2 split parsing is processed by ncbi_xreader library - used by all readers.
- * 3. Updated OBJMGR_LIBS correspondingly.
- *
- */