snp_annot_info.hpp
上传用户:yhdzpy8989
上传日期:2007-06-13
资源大小:13604k
文件大小:14k
- /*
- * ===========================================================================
- * PRODUCTION $Log: snp_annot_info.hpp,v $
- * PRODUCTION Revision 1000.2 2004/06/01 19:22:14 gouriano
- * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.14
- * PRODUCTION
- * ===========================================================================
- */
- #ifndef SNP_ANNOT_INFO__HPP
- #define SNP_ANNOT_INFO__HPP
- /* $Id: snp_annot_info.hpp,v 1000.2 2004/06/01 19:22:14 gouriano Exp $
- * ===========================================================================
- *
- * PUBLIC DOMAIN NOTICE
- * National Center for Biotechnology Information
- *
- * This software/database is a "United States Government Work" under the
- * terms of the United States Copyright Act. It was written as part of
- * the author's official duties as a United States Government employee and
- * thus cannot be copyrighted. This software/database is freely available
- * to the public for use. The National Library of Medicine and the U.S.
- * Government have not placed any restriction on its use or reproduction.
- *
- * Although all reasonable efforts have been taken to ensure the accuracy
- * and reliability of the software and data, the NLM and the U.S.
- * Government do not and cannot warrant the performance or results that
- * may be obtained by using this software or data. The NLM and the U.S.
- * Government disclaim all warranties, express or implied, including
- * warranties of performance, merchantability or fitness for any particular
- * purpose.
- *
- * Please cite the author in any work or product based on this material.
- *
- * ===========================================================================
- *
- * Author: Eugene Vasilchenko
- *
- * File Description:
- * SNP Seq-annot object information
- *
- */
- #include <corelib/ncbiobj.hpp>
- #include <corelib/ncbi_limits.hpp>
- #include <util/range.hpp>
- #include <vector>
- #include <map>
- #include <algorithm>
- #include <objects/seqloc/Seq_id.hpp>
- #include <objmgr/impl/tse_info_object.hpp>
- BEGIN_NCBI_SCOPE
- class CObjectIStream;
- class IWriter;
- class IReader;
- BEGIN_SCOPE(objects)
- class CSeq_entry;
- class CSeq_feat;
- class CSeq_annot;
- class CSeq_annot_Info;
- class CSeq_annot_SNP_Info;
- class CSeq_point;
- class CSeq_interval;
- struct NCBI_XOBJMGR_EXPORT SSNP_Info
- {
- public:
- typedef CRange<TSeqPos> TRange;
- TSeqPos GetFrom(void) const;
- TSeqPos GetTo(void) const;
- bool MinusStrand(void) const;
- bool operator<(const SSNP_Info& snp) const;
- bool operator<(TSeqPos end_position) const;
- bool NoMore(const TRange& range) const;
- bool NotThis(const TRange& range) const;
- // type of SNP feature returned by parsing method
- enum ESNP_Type {
- eSNP_Simple,
- eSNP_Bad_WrongMemberSet,
- eSNP_Bad_WrongTextId,
- eSNP_Complex_HasComment,
- eSNP_Complex_LocationIsNotPoint,
- eSNP_Complex_LocationIsNotGi,
- eSNP_Complex_LocationGiIsBad,
- eSNP_Complex_LocationStrandIsBad,
- eSNP_Complex_IdCountTooLarge,
- eSNP_Complex_IdCountIsNotOne,
- eSNP_Complex_AlleleLengthBad,
- eSNP_Complex_AlleleCountTooLarge,
- eSNP_Complex_AlleleCountIsNonStandard,
- eSNP_Complex_WeightBadValue,
- eSNP_Complex_WeightCountIsNotOne,
- eSNP_Type_last
- };
- // names of types for logging
- static const char* const s_SNP_Type_Label[eSNP_Type_last];
- // parser, if returned value is eSNP_Simple, then
- // other members are filled and can be stored.
- ESNP_Type ParseSeq_feat(const CSeq_feat& feat,
- CSeq_annot_SNP_Info& annot_info);
- // restore Seq-feat object from parsed info.
- CRef<CSeq_feat>
- CreateSeq_feat(const CSeq_annot_SNP_Info& annot_info) const;
- void UpdateSeq_feat(CRef<CSeq_feat>& seq_feat,
- const CSeq_annot_SNP_Info& annot_info) const;
- void UpdateSeq_feat(CRef<CSeq_feat>& seq_feat,
- CRef<CSeq_point>& seq_point,
- CRef<CSeq_interval>& seq_interval,
- const CSeq_annot_SNP_Info& annot_info) const;
-
- CRef<CSeq_feat> x_CreateSeq_feat(void) const;
- void x_UpdateSeq_featData(CSeq_feat& feat,
- const CSeq_annot_SNP_Info& annot_info) const;
- void x_UpdateSeq_feat(CSeq_feat& feat,
- const CSeq_annot_SNP_Info& annot_info) const;
- void x_UpdateSeq_feat(CSeq_feat& feat,
- CRef<CSeq_point>& seq_point,
- CRef<CSeq_interval>& seq_interval,
- const CSeq_annot_SNP_Info& annot_info) const;
- typedef int TSNPId;
- typedef Int1 TPositionDelta;
- enum {
- kMax_PositionDelta = kMax_I1
- };
- typedef Uint1 TCommentIndex;
- enum {
- kNo_CommentIndex = kMax_UI1,
- kMax_CommentIndex = kNo_CommentIndex - 1,
- kMax_CommentLength = 65530
- };
- typedef Uint1 TAlleleIndex;
- enum {
- kNo_AlleleIndex = kMax_UI1,
- kMax_AlleleIndex = kNo_AlleleIndex - 1,
- kMax_AlleleLength = 5
- };
- enum {
- kMax_AllelesCount = 4
- };
- typedef Uint1 TWeight;
- enum {
- kMax_Weight = kMax_I1
- };
- typedef Uint1 TFlags;
- enum FFlags {
- fMinusStrand = 1,
- fQualReplace = 2
- };
- TSeqPos m_ToPosition;
- TSNPId m_SNP_Id;
- TFlags m_Flags;
- TPositionDelta m_PositionDelta;
- TCommentIndex m_CommentIndex;
- TWeight m_Weight;
- TAlleleIndex m_AllelesIndices[kMax_AllelesCount];
- };
- class CIndexedStrings
- {
- public:
- void ClearIndices(void)
- {
- m_Indices.clear();
- }
- void Clear(void)
- {
- ClearIndices();
- m_Strings.clear();
- }
- bool IsEmpty(void) const
- {
- return m_Strings.empty();
- }
- size_t GetSize(void) const
- {
- return m_Strings.size();
- }
- size_t GetIndex(const string& s, size_t max_index);
- const string& GetString(size_t index) const
- {
- return m_Strings[index];
- }
- void StoreTo(CNcbiOstream& stream) const;
- void LoadFrom(CNcbiIstream& stream, size_t max_index, size_t max_length);
- private:
- typedef vector<string> TStrings;
- typedef map<string, size_t> TIndices;
- TStrings m_Strings;
- TIndices m_Indices;
- };
- class NCBI_XOBJMGR_EXPORT CSeq_annot_SNP_Info : public CTSE_Info_Object
- {
- typedef CTSE_Info_Object TParent;
- public:
- CSeq_annot_SNP_Info(void);
- CSeq_annot_SNP_Info(const CSeq_annot_SNP_Info& info);
- ~CSeq_annot_SNP_Info(void);
- const CSeq_annot_Info& GetParentSeq_annot_Info(void) const;
- CSeq_annot_Info& GetParentSeq_annot_Info(void);
- const CSeq_entry_Info& GetParentSeq_entry_Info(void) const;
- CSeq_entry_Info& GetParentSeq_entry_Info(void);
- // tree initialization
- void x_ParentAttach(CSeq_annot_Info& parent);
- void x_ParentDetach(CSeq_annot_Info& parent);
- void x_UpdateAnnotIndexContents(CTSE_Info& tse);
- void x_UnmapAnnotObjects(CTSE_Info& tse);
- void x_DropAnnotObjects(CTSE_Info& tse);
- typedef vector<SSNP_Info> TSNP_Set;
- typedef TSNP_Set::const_iterator const_iterator;
- typedef CRange<TSeqPos> TRange;
- bool empty(void) const;
- const_iterator begin(void) const;
- const_iterator end(void) const;
- const_iterator FirstIn(const TRange& range) const;
- int GetGi(void) const;
- const CSeq_id& GetSeq_id(void) const;
- const SSNP_Info& GetSNP_Info(size_t index) const;
- const CSeq_annot& GetRemainingSeq_annot(void) const;
- void Reset(void);
- protected:
- SSNP_Info::TCommentIndex x_GetCommentIndex(const string& comment);
- const string& x_GetComment(SSNP_Info::TCommentIndex index) const;
- SSNP_Info::TAlleleIndex x_GetAlleleIndex(const string& allele);
- const string& x_GetAllele(SSNP_Info::TAlleleIndex index) const;
- bool x_CheckGi(int gi);
- void x_SetGi(int gi);
- void x_AddSNP(const SSNP_Info& snp_info);
- void x_DoUpdateObject(void);
- private:
- CSeq_annot_SNP_Info& operator=(const CSeq_annot_SNP_Info&);
- friend class CSeq_annot_Info;
- friend class CSeq_annot_SNP_Info_Reader;
- friend class CSNP_Seq_feat_hook;
- friend struct SSNP_Info;
- friend class CSeq_feat_Handle;
- int m_Gi;
- CRef<CSeq_id> m_Seq_id;
- TSNP_Set m_SNP_Set;
- CIndexedStrings m_Comments;
- CIndexedStrings m_Alleles;
- CRef<CSeq_annot> m_Seq_annot;
- };
- /////////////////////////////////////////////////////////////////////////////
- // SSNP_Info
- /////////////////////////////////////////////////////////////////////////////
- inline
- TSeqPos SSNP_Info::GetFrom(void) const
- {
- return m_ToPosition - m_PositionDelta;
- }
- inline
- TSeqPos SSNP_Info::GetTo(void) const
- {
- return m_ToPosition;
- }
- inline
- bool SSNP_Info::MinusStrand(void) const
- {
- return (m_Flags & fMinusStrand) != 0;
- }
- inline
- bool SSNP_Info::operator<(const SSNP_Info& snp) const
- {
- return m_ToPosition < snp.m_ToPosition;
- }
- inline
- bool SSNP_Info::operator<(TSeqPos to_position) const
- {
- return m_ToPosition < to_position;
- }
- inline
- bool SSNP_Info::NoMore(const TRange& range) const
- {
- return GetTo() >= min(kInvalidSeqPos-kMax_PositionDelta,
- range.GetToOpen()) + kMax_PositionDelta;
- }
- inline
- bool SSNP_Info::NotThis(const TRange& range) const
- {
- return GetFrom() >= range.GetToOpen();
- }
- /////////////////////////////////////////////////////////////////////////////
- // CSeq_annot_SNP_Info
- /////////////////////////////////////////////////////////////////////////////
- inline
- bool CSeq_annot_SNP_Info::empty(void) const
- {
- return m_SNP_Set.empty();
- }
- inline
- CSeq_annot_SNP_Info::const_iterator
- CSeq_annot_SNP_Info::begin(void) const
- {
- return m_SNP_Set.begin();
- }
- inline
- CSeq_annot_SNP_Info::const_iterator
- CSeq_annot_SNP_Info::end(void) const
- {
- return m_SNP_Set.end();
- }
- inline
- CSeq_annot_SNP_Info::const_iterator
- CSeq_annot_SNP_Info::FirstIn(const CRange<TSeqPos>& range) const
- {
- return lower_bound(m_SNP_Set.begin(), m_SNP_Set.end(), range.GetFrom());
- }
- inline
- int CSeq_annot_SNP_Info::GetGi(void) const
- {
- return m_Gi;
- }
- inline
- const CSeq_id& CSeq_annot_SNP_Info::GetSeq_id(void) const
- {
- return *m_Seq_id;
- }
- inline
- bool CSeq_annot_SNP_Info::x_CheckGi(int gi)
- {
- if ( gi == m_Gi ) {
- return true;
- }
- if ( m_Gi < 0 ) {
- x_SetGi(gi);
- return true;
- }
- return false;
- }
- inline
- const CSeq_annot& CSeq_annot_SNP_Info::GetRemainingSeq_annot(void) const
- {
- return *m_Seq_annot;
- }
- inline
- SSNP_Info::TCommentIndex
- CSeq_annot_SNP_Info::x_GetCommentIndex(const string& comment)
- {
- return comment.size() > SSNP_Info::kMax_CommentLength?
- SSNP_Info::kNo_CommentIndex:
- m_Comments.GetIndex(comment, SSNP_Info::kMax_CommentIndex);
- }
- inline
- const string&
- CSeq_annot_SNP_Info::x_GetComment(SSNP_Info::TCommentIndex index) const
- {
- return m_Comments.GetString(index);
- }
- inline
- const string&
- CSeq_annot_SNP_Info::x_GetAllele(SSNP_Info::TAlleleIndex index) const
- {
- return m_Alleles.GetString(index);
- }
- inline
- void CSeq_annot_SNP_Info::x_AddSNP(const SSNP_Info& snp_info)
- {
- m_SNP_Set.push_back(snp_info);
- }
- inline
- const SSNP_Info& CSeq_annot_SNP_Info::GetSNP_Info(size_t index) const
- {
- _ASSERT(index < m_SNP_Set.size());
- return m_SNP_Set[index];
- }
- END_SCOPE(objects)
- END_NCBI_SCOPE
- /*
- * ---------------------------------------------------------------------------
- * $Log: snp_annot_info.hpp,v $
- * Revision 1000.2 2004/06/01 19:22:14 gouriano
- * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.14
- *
- * Revision 1.14 2004/05/04 18:08:47 grichenk
- * Added CSeq_feat_Handle, CSeq_align_Handle and CSeq_graph_Handle
- *
- * Revision 1.13 2004/03/24 18:30:29 vasilche
- * Fixed edit API.
- * Every *_Info object has its own shallow copy of original object.
- *
- * Revision 1.12 2004/03/16 15:47:27 vasilche
- * Added CBioseq_set_Handle and set of EditHandles
- *
- * Revision 1.11 2004/02/06 16:13:19 vasilche
- * Added parsing "replace" as a synonym of "allele" in SNP qualifiers.
- * More compact format of SNP table in cache. SNP table version increased.
- * Fixed null pointer exception when SNP features are loaded from cache.
- *
- * Revision 1.10 2004/01/28 20:54:35 vasilche
- * Fixed mapping of annotations.
- *
- * Revision 1.9 2004/01/13 16:55:31 vasilche
- * CReader, CSeqref and some more classes moved from xobjmgr to separate lib.
- * Headers moved from include/objmgr to include/objtools/data_loaders/genbank.
- *
- * Revision 1.8 2003/10/21 16:29:14 vasilche
- * Added check for errors in SNP table loaded from cache.
- *
- * Revision 1.7 2003/10/21 14:27:35 vasilche
- * Added caching of gi -> sat,satkey,version resolution.
- * SNP blobs are stored in cache in preprocessed format (platform dependent).
- * Limit number of connections to GenBank servers.
- * Added collection of ID1 loader statistics.
- *
- * Revision 1.6 2003/09/30 16:22:01 vasilche
- * Updated internal object manager classes to be able to load ID2 data.
- * SNP blobs are loaded as ID2 split blobs - readers convert them automatically.
- * Scope caches results of requests for data to data loaders.
- * Optimized CSeq_id_Handle for gis.
- * Optimized bioseq lookup in scope.
- * Reduced object allocations in annotation iterators.
- * CScope is allowed to be destroyed before other objects using this scope are
- * deleted (feature iterators, bioseq handles etc).
- * Optimized lookup for matching Seq-ids in CSeq_id_Mapper.
- * Added 'adaptive' option to objmgr_demo application.
- *
- * Revision 1.5 2003/08/27 14:28:51 vasilche
- * Reduce amount of object allocations in feature iteration.
- *
- * Revision 1.4 2003/08/15 19:34:53 vasilche
- * Added missing #include <algorigm>
- *
- * Revision 1.3 2003/08/15 19:19:15 vasilche
- * Fixed memory leak in string packing hooks.
- * Fixed processing of 'partial' flag of features.
- * Allow table packing of non-point SNP.
- * Allow table packing of SNP with long alleles.
- *
- * Revision 1.2 2003/08/14 21:26:04 kans
- * fixed inconsistent line endings that stopped Mac compiler
- *
- * Revision 1.1 2003/08/14 20:05:19 vasilche
- * Simple SNP features are stored as table internally.
- * They are recreated when needed using CFeat_CI.
- *
- * ===========================================================================
- */
- #endif // SNP_ANNOT_INFO__HPP