seq_annot_info.cpp
上传用户:yhdzpy8989
上传日期:2007-06-13
资源大小:13604k
文件大小:16k
源码类别:

生物技术

开发平台:

C/C++

  1. /*
  2.  * ===========================================================================
  3.  * PRODUCTION $Log: seq_annot_info.cpp,v $
  4.  * PRODUCTION Revision 1000.4  2004/06/01 19:23:51  gouriano
  5.  * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.16
  6.  * PRODUCTION
  7.  * ===========================================================================
  8.  */
  9. /*  $Id: seq_annot_info.cpp,v 1000.4 2004/06/01 19:23:51 gouriano Exp $
  10. * ===========================================================================
  11. *
  12. *                            PUBLIC DOMAIN NOTICE
  13. *               National Center for Biotechnology Information
  14. *
  15. *  This software/database is a "United States Government Work" under the
  16. *  terms of the United States Copyright Act.  It was written as part of
  17. *  the author's official duties as a United States Government employee and
  18. *  thus cannot be copyrighted.  This software/database is freely available
  19. *  to the public for use. The National Library of Medicine and the U.S.
  20. *  Government have not placed any restriction on its use or reproduction.
  21. *
  22. *  Although all reasonable efforts have been taken to ensure the accuracy
  23. *  and reliability of the software and data, the NLM and the U.S.
  24. *  Government do not and cannot warrant the performance or results that
  25. *  may be obtained by using this software or data. The NLM and the U.S.
  26. *  Government disclaim all warranties, express or implied, including
  27. *  warranties of performance, merchantability or fitness for any particular
  28. *  purpose.
  29. *
  30. *  Please cite the author in any work or product based on this material.
  31. *
  32. * ===========================================================================
  33. *
  34. * Author: Eugene Vasilchenko
  35. *
  36. * File Description:
  37. *   CSeq_annot_Info info -- entry for data source information about Seq-annot
  38. *
  39. */
  40. #include <ncbi_pch.hpp>
  41. #include <objmgr/impl/seq_annot_info.hpp>
  42. #include <objmgr/impl/seq_entry_info.hpp>
  43. #include <objmgr/impl/bioseq_base_info.hpp>
  44. #include <objmgr/impl/tse_info.hpp>
  45. #include <objmgr/impl/tse_chunk_info.hpp>
  46. #include <objmgr/impl/annot_object.hpp>
  47. #include <objmgr/impl/handle_range_map.hpp>
  48. #include <objmgr/impl/data_source.hpp>
  49. #include <objmgr/impl/snp_annot_info.hpp>
  50. #include <objmgr/objmgr_exception.hpp>
  51. #include <objects/seq/Seq_annot.hpp>
  52. #include <objects/seq/Annotdesc.hpp>
  53. #include <objects/seq/Annot_descr.hpp>
  54. BEGIN_NCBI_SCOPE
  55. BEGIN_SCOPE(objects)
  56. CSeq_annot_Info::CSeq_annot_Info(const CSeq_annot& annot)
  57. {
  58.     x_SetObject(annot);
  59. }
  60. CSeq_annot_Info::CSeq_annot_Info(CSeq_annot_SNP_Info& snp_annot)
  61. {
  62.     x_SetSNP_annot_Info(snp_annot);
  63. }
  64. CSeq_annot_Info::CSeq_annot_Info(const CSeq_annot_Info& info)
  65. {
  66.     x_SetObject(info);
  67. }
  68. CSeq_annot_Info::~CSeq_annot_Info(void)
  69. {
  70. }
  71. const CBioseq_Base_Info& CSeq_annot_Info::GetParentBioseq_Base_Info(void) const
  72. {
  73.     return static_cast<const CBioseq_Base_Info&>(GetBaseParent_Info());
  74. }
  75. CBioseq_Base_Info& CSeq_annot_Info::GetParentBioseq_Base_Info(void)
  76. {
  77.     return static_cast<CBioseq_Base_Info&>(GetBaseParent_Info());
  78. }
  79. const CSeq_entry_Info& CSeq_annot_Info::GetParentSeq_entry_Info(void) const
  80. {
  81.     return GetParentBioseq_Base_Info().GetParentSeq_entry_Info();
  82. }
  83. CSeq_entry_Info& CSeq_annot_Info::GetParentSeq_entry_Info(void)
  84. {
  85.     return GetParentBioseq_Base_Info().GetParentSeq_entry_Info();
  86. }
  87. void CSeq_annot_Info::x_ParentAttach(CBioseq_Base_Info& parent)
  88. {
  89.     x_BaseParentAttach(parent);
  90. }
  91. void CSeq_annot_Info::x_ParentDetach(CBioseq_Base_Info& parent)
  92. {
  93.     x_BaseParentDetach(parent);
  94. }
  95. void CSeq_annot_Info::x_DSAttachContents(CDataSource& ds)
  96. {
  97.     TParent::x_DSAttachContents(ds);
  98.     x_DSMapObject(m_Object, ds);
  99.     if ( m_SNP_Info ) {
  100.         m_SNP_Info->x_DSAttach(ds);
  101.     }
  102. }
  103. void CSeq_annot_Info::x_DSDetachContents(CDataSource& ds)
  104. {
  105.     if ( m_SNP_Info ) {
  106.         m_SNP_Info->x_DSDetach(ds);
  107.     }
  108.     ITERATE ( TDSMappedObjects, it, m_DSMappedObjects ) {
  109.         x_DSUnmapObject(*it, ds);
  110.     }
  111.     m_DSMappedObjects.clear();
  112.     TParent::x_DSDetachContents(ds);
  113. }
  114. void CSeq_annot_Info::x_DSMapObject(CConstRef<TObject> obj, CDataSource& ds)
  115. {
  116.     m_DSMappedObjects.push_back(obj);
  117.     ds.x_Map(obj, this);
  118. }
  119. void CSeq_annot_Info::x_DSUnmapObject(CConstRef<TObject> obj, CDataSource& ds)
  120. {
  121.     ds.x_Unmap(obj, this);
  122. }
  123. void CSeq_annot_Info::x_TSEAttachContents(CTSE_Info& tse)
  124. {
  125.     TParent::x_TSEAttachContents(tse);
  126.     if ( m_SNP_Info ) {
  127.         m_SNP_Info->x_TSEAttach(tse);
  128.     }
  129. }
  130. void CSeq_annot_Info::x_TSEDetachContents(CTSE_Info& tse)
  131. {
  132.     if ( m_SNP_Info ) {
  133.         m_SNP_Info->x_TSEDetach(tse);
  134.     }
  135.     x_UnmapAnnotObjects(tse);
  136.     TParent::x_TSEDetachContents(tse);
  137. }
  138. size_t
  139. CSeq_annot_Info::GetAnnotObjectIndex(const CAnnotObject_Info& info) const
  140. {
  141.     _ASSERT(&info.GetSeq_annot_Info() == this);
  142.     return m_ObjectInfos.GetIndex(info);
  143. }
  144. const CAnnotName& CSeq_annot_Info::GetName(void) const
  145. {
  146.     return m_Name.IsNamed()? m_Name: GetTSE_Info().GetName();
  147. }
  148. void CSeq_annot_Info::x_UpdateName(void)
  149. {
  150.     if ( m_Object->IsSetDesc() ) {
  151.         string name;
  152.         ITERATE( CSeq_annot::TDesc::Tdata, it, m_Object->GetDesc().Get() ) {
  153.             const CAnnotdesc& desc = **it;
  154.             if ( desc.Which() == CAnnotdesc::e_Name ) {
  155.                 name = desc.GetName();
  156.                 break;
  157.             }
  158.         }
  159.         m_Name.SetNamed(name);
  160.     }
  161.     else {
  162.         m_Name.SetUnnamed();
  163.     }
  164. }
  165. CConstRef<CSeq_annot> CSeq_annot_Info::GetCompleteSeq_annot(void) const
  166. {
  167.     return GetSeq_annotCore();
  168. }
  169. CConstRef<CSeq_annot> CSeq_annot_Info::GetSeq_annotCore(void) const
  170. {
  171.     x_UpdateObject();
  172.     return m_Object;
  173. }
  174. void CSeq_annot_Info::x_SetObject(const TObject& obj)
  175. {
  176.     _ASSERT(!m_SNP_Info && !m_Object);
  177.     m_Object.Reset(&obj);
  178.     x_UpdateName();
  179. }
  180. void CSeq_annot_Info::x_SetObject(const CSeq_annot_Info& info)
  181. {
  182.     _ASSERT(!m_SNP_Info && !m_Object);
  183.     m_Object = info.m_Object;
  184.     m_Name = info.m_Name;
  185.     if ( info.m_SNP_Info ) {
  186.         m_SNP_Info.Reset(new CSeq_annot_SNP_Info(*info.m_SNP_Info));
  187.         m_SNP_Info->x_ParentAttach(*this);
  188.         x_AttachObject(*m_SNP_Info);
  189.     }
  190. }
  191. void CSeq_annot_Info::x_SetSNP_annot_Info(CSeq_annot_SNP_Info& snp_info)
  192. {
  193.     _ASSERT(!m_SNP_Info && !m_Object && !snp_info.HasParent_Info());
  194.     x_SetObject(snp_info.GetRemainingSeq_annot());
  195.     m_SNP_Info.Reset(&snp_info);
  196.     snp_info.x_ParentAttach(*this);
  197.     _ASSERT(&snp_info.GetParentSeq_annot_Info() == this);
  198.     x_AttachObject(snp_info);
  199. }
  200. void CSeq_annot_Info::x_DoUpdateObject(void)
  201. {
  202.     NCBI_THROW(CObjMgrException, eNotImplemented,
  203.                "CSeq_annot_Info::x_DoUpdateObject: unimplemented");
  204. }
  205. void CSeq_annot_Info::UpdateAnnotIndex(void) const
  206. {
  207.     if ( x_DirtyAnnotIndex() ) {
  208.         GetTSE_Info().UpdateAnnotIndex(*this);
  209.         _ASSERT(!x_DirtyAnnotIndex());
  210.     }
  211. }
  212. void CSeq_annot_Info::x_UpdateAnnotIndexContents(CTSE_Info& tse)
  213. {
  214.     m_ObjectInfos.SetName(GetName());
  215.     const CSeq_annot::C_Data& data = m_Object->GetData();
  216.     switch ( data.Which() ) {
  217.     case CSeq_annot::C_Data::e_Ftable:
  218.         x_MapAnnotObjects(tse, data.GetFtable());
  219.         break;
  220.     case CSeq_annot::C_Data::e_Align:
  221.         x_MapAnnotObjects(tse, data.GetAlign());
  222.         break;
  223.     case CSeq_annot::C_Data::e_Graph:
  224.         x_MapAnnotObjects(tse, data.GetGraph());
  225.         break;
  226.     default:
  227.         break;
  228.     }
  229.     if ( m_SNP_Info ) {
  230.         m_SNP_Info->x_UpdateAnnotIndex(tse);
  231.     }
  232.     TParent::x_UpdateAnnotIndexContents(tse);
  233. }
  234. void
  235. CSeq_annot_Info::x_MapAnnotObjects(CTSE_Info& tse,
  236.                                    const CSeq_annot::C_Data::TFtable& objs)
  237. {
  238.     m_ObjectInfos.Reserve(objs.size(), 1.1);
  239.     CTSE_Info::TAnnotObjs& index = tse.x_SetAnnotObjs(GetName());
  240.     SAnnotObject_Key key;
  241.     SAnnotObject_Index annotRef;
  242.     vector<CHandleRangeMap> hrmaps;
  243.     ITERATE ( CSeq_annot::C_Data::TFtable, fit, objs ) {
  244.         const CSeq_feat& feat = **fit;
  245.         CAnnotObject_Info* info =
  246.             m_ObjectInfos.AddInfo(CAnnotObject_Info(feat, *this));
  247.         key.m_AnnotObject_Info = annotRef.m_AnnotObject_Info = info;
  248.         info->GetMaps(hrmaps);
  249.         annotRef.m_AnnotLocationIndex = 0;
  250.         ITERATE ( vector<CHandleRangeMap>, hrmit, hrmaps ) {
  251.             ITERATE ( CHandleRangeMap, hrit, *hrmit ) {
  252.                 key.m_Handle = hrit->first;
  253.                 const CHandleRange& hr = hrit->second;
  254.                 key.m_Range = hr.GetOverlappingRange();
  255.                 if ( hr.HasGaps() ) {
  256.                     annotRef.m_HandleRange.Reset(new CObjectFor<CHandleRange>);
  257.                     annotRef.m_HandleRange->GetData() = hr;
  258.                 }
  259.                 else {
  260.                     annotRef.m_HandleRange.Reset();
  261.                 }
  262.                 
  263.                 tse.x_MapAnnotObject(index, key, annotRef, m_ObjectInfos);
  264.             }
  265.             ++annotRef.m_AnnotLocationIndex;
  266.         }
  267.     }
  268. }
  269. void CSeq_annot_Info::x_MapAnnotObjects(CTSE_Info& tse,
  270.                                         const CSeq_annot::C_Data::TGraph& objs)
  271. {
  272.     m_ObjectInfos.Reserve(objs.size());
  273.     CTSE_Info::TAnnotObjs& index = tse.x_SetAnnotObjs(GetName());
  274.     SAnnotObject_Key key;
  275.     SAnnotObject_Index annotRef;
  276.     vector<CHandleRangeMap> hrmaps;
  277.     ITERATE ( CSeq_annot::C_Data::TGraph, git, objs ) {
  278.         const CSeq_graph& graph = **git;
  279.         CAnnotObject_Info* info =
  280.             m_ObjectInfos.AddInfo(CAnnotObject_Info(graph, *this));
  281.         key.m_AnnotObject_Info = annotRef.m_AnnotObject_Info = info;
  282.         info->GetMaps(hrmaps);
  283.         annotRef.m_AnnotLocationIndex = 0;
  284.         ITERATE ( vector<CHandleRangeMap>, hrmit, hrmaps ) {
  285.             ITERATE ( CHandleRangeMap, hrit, *hrmit ) {
  286.                 key.m_Handle = hrit->first;
  287.                 const CHandleRange& hr = hrit->second;
  288.                 key.m_Range = hr.GetOverlappingRange();
  289.                 if ( hr.HasGaps() ) {
  290.                     annotRef.m_HandleRange.Reset(new CObjectFor<CHandleRange>);
  291.                     annotRef.m_HandleRange->GetData() = hr;
  292.                 }
  293.                 else {
  294.                     annotRef.m_HandleRange.Reset();
  295.                 }
  296.                 tse.x_MapAnnotObject(index, key, annotRef, m_ObjectInfos);
  297.             }
  298.             ++annotRef.m_AnnotLocationIndex;
  299.         }
  300.     }
  301. }
  302. void CSeq_annot_Info::x_MapAnnotObjects(CTSE_Info& tse,
  303.                                         const CSeq_annot::C_Data::TAlign& objs)
  304. {
  305.     m_ObjectInfos.Reserve(objs.size());
  306.     CTSE_Info::TAnnotObjs& index = tse.x_SetAnnotObjs(GetName());
  307.     SAnnotObject_Key key;
  308.     SAnnotObject_Index annotRef;
  309.     vector<CHandleRangeMap> hrmaps;
  310.     ITERATE ( CSeq_annot::C_Data::TAlign, ait, objs ) {
  311.         const CSeq_align& align = **ait;
  312.         CAnnotObject_Info* info =
  313.             m_ObjectInfos.AddInfo(CAnnotObject_Info(align, *this));
  314.         key.m_AnnotObject_Info = annotRef.m_AnnotObject_Info = info;
  315.         info->GetMaps(hrmaps);
  316.         annotRef.m_AnnotLocationIndex = 0;
  317.         ITERATE ( vector<CHandleRangeMap>, hrmit, hrmaps ) {
  318.             ITERATE ( CHandleRangeMap, hrit, *hrmit ) {
  319.                 key.m_Handle = hrit->first;
  320.                 const CHandleRange& hr = hrit->second;
  321.                 key.m_Range = hr.GetOverlappingRange();
  322.                 if ( hr.HasGaps() ) {
  323.                     annotRef.m_HandleRange.Reset(new CObjectFor<CHandleRange>);
  324.                     annotRef.m_HandleRange->GetData() = hr;
  325.                 }
  326.                 else {
  327.                     annotRef.m_HandleRange.Reset();
  328.                 }
  329.                 tse.x_MapAnnotObject(index, key, annotRef, m_ObjectInfos);
  330.             }
  331.             ++annotRef.m_AnnotLocationIndex;
  332.         }
  333.     }
  334. }
  335. void CSeq_annot_Info::x_UnmapAnnotObjects(CTSE_Info& tse)
  336. {
  337.     if ( m_SNP_Info ) {
  338.         m_SNP_Info->x_UnmapAnnotObjects(tse);
  339.     }
  340.     tse.x_UnmapAnnotObjects(m_ObjectInfos);
  341. }
  342. void CSeq_annot_Info::x_DropAnnotObjects(CTSE_Info& tse)
  343. {
  344.     if ( m_SNP_Info ) {
  345.         m_SNP_Info->x_DropAnnotObjects(tse);
  346.     }
  347.     m_ObjectInfos.Clear();
  348. }
  349. END_SCOPE(objects)
  350. END_NCBI_SCOPE
  351. /*
  352.  * ===========================================================================
  353.  * $Log: seq_annot_info.cpp,v $
  354.  * Revision 1000.4  2004/06/01 19:23:51  gouriano
  355.  * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.16
  356.  *
  357.  * Revision 1.16  2004/05/21 21:42:13  gorelenk
  358.  * Added PCH ncbi_pch.hpp
  359.  *
  360.  * Revision 1.15  2004/03/26 19:42:04  vasilche
  361.  * Fixed premature deletion of SNP annot info object.
  362.  * Removed obsolete references to chunk info.
  363.  *
  364.  * Revision 1.14  2004/03/24 18:30:30  vasilche
  365.  * Fixed edit API.
  366.  * Every *_Info object has its own shallow copy of original object.
  367.  *
  368.  * Revision 1.13  2004/03/16 15:47:28  vasilche
  369.  * Added CBioseq_set_Handle and set of EditHandles
  370.  *
  371.  * Revision 1.12  2004/01/22 20:10:40  vasilche
  372.  * 1. Splitted ID2 specs to two parts.
  373.  * ID2 now specifies only protocol.
  374.  * Specification of ID2 split data is moved to seqsplit ASN module.
  375.  * For now they are still reside in one resulting library as before - libid2.
  376.  * As the result split specific headers are now in objects/seqsplit.
  377.  * 2. Moved ID2 and ID1 specific code out of object manager.
  378.  * Protocol is processed by corresponding readers.
  379.  * ID2 split parsing is processed by ncbi_xreader library - used by all readers.
  380.  * 3. Updated OBJMGR_LIBS correspondingly.
  381.  *
  382.  * Revision 1.11  2003/11/26 17:56:00  vasilche
  383.  * Implemented ID2 split in ID1 cache.
  384.  * Fixed loading of splitted annotations.
  385.  *
  386.  * Revision 1.10  2003/11/19 22:18:03  grichenk
  387.  * All exceptions are now CException-derived. Catch "exception" rather
  388.  * than "runtime_error".
  389.  *
  390.  * Revision 1.9  2003/10/07 13:43:23  vasilche
  391.  * Added proper handling of named Seq-annots.
  392.  * Added feature search from named Seq-annots.
  393.  * Added configurable adaptive annotation search (default: gene, cds, mrna).
  394.  * Fixed selection of blobs for loading from GenBank.
  395.  * Added debug checks to CSeq_id_Mapper for easier finding lost CSeq_id_Handles.
  396.  * Fixed leaked split chunks annotation stubs.
  397.  * Moved some classes definitions in separate *.cpp files.
  398.  *
  399.  * Revision 1.8  2003/09/30 16:22:03  vasilche
  400.  * Updated internal object manager classes to be able to load ID2 data.
  401.  * SNP blobs are loaded as ID2 split blobs - readers convert them automatically.
  402.  * Scope caches results of requests for data to data loaders.
  403.  * Optimized CSeq_id_Handle for gis.
  404.  * Optimized bioseq lookup in scope.
  405.  * Reduced object allocations in annotation iterators.
  406.  * CScope is allowed to be destroyed before other objects using this scope are
  407.  * deleted (feature iterators, bioseq handles etc).
  408.  * Optimized lookup for matching Seq-ids in CSeq_id_Mapper.
  409.  * Added 'adaptive' option to objmgr_demo application.
  410.  *
  411.  * Revision 1.7  2003/08/27 14:29:52  vasilche
  412.  * Reduce object allocations in feature iterator.
  413.  *
  414.  * Revision 1.6  2003/07/18 19:41:46  vasilche
  415.  * Removed unused variable.
  416.  *
  417.  * Revision 1.5  2003/07/17 22:51:31  vasilche
  418.  * Fixed unused variables warnings.
  419.  *
  420.  * Revision 1.4  2003/07/17 20:07:56  vasilche
  421.  * Reduced memory usage by feature indexes.
  422.  * SNP data is loaded separately through PUBSEQ_OS.
  423.  * String compression for SNP data.
  424.  *
  425.  * Revision 1.3  2003/07/09 17:54:29  dicuccio
  426.  * Fixed uninitialized variables in CDataSource and CSeq_annot_Info
  427.  *
  428.  * Revision 1.2  2003/06/02 16:06:38  dicuccio
  429.  * Rearranged src/objects/ subtree.  This includes the following shifts:
  430.  *     - src/objects/asn2asn --> arc/app/asn2asn
  431.  *     - src/objects/testmedline --> src/objects/ncbimime/test
  432.  *     - src/objects/objmgr --> src/objmgr
  433.  *     - src/objects/util --> src/objmgr/util
  434.  *     - src/objects/alnmgr --> src/objtools/alnmgr
  435.  *     - src/objects/flat --> src/objtools/flat
  436.  *     - src/objects/validator --> src/objtools/validator
  437.  *     - src/objects/cddalignview --> src/objtools/cddalignview
  438.  * In addition, libseq now includes six of the objects/seq... libs, and libmmdb
  439.  * replaces the three libmmdb? libs.
  440.  *
  441.  * Revision 1.1  2003/04/24 16:12:38  vasilche
  442.  * Object manager internal structures are splitted more straightforward.
  443.  * Removed excessive header dependencies.
  444.  *
  445.  *
  446.  * ===========================================================================
  447.  */