density_map.hpp
上传用户:yhdzpy8989
上传日期:2007-06-13
资源大小:13604k
文件大小:21k
- /*
- * ===========================================================================
- * PRODUCTION $Log: density_map.hpp,v $
- * PRODUCTION Revision 1000.0 2004/06/01 19:54:41 gouriano
- * PRODUCTION PRODUCTION: IMPORTED [GCC34_MSVC7] Dev-tree R1.1
- * PRODUCTION
- * ===========================================================================
- */
- #ifndef GUI_UTILS___DENSITY_GRAPH__HPP
- #define GUI_UTILS___DENSITY_GRAPH__HPP
- /* $Id: density_map.hpp,v 1000.0 2004/06/01 19:54:41 gouriano Exp $
- * ===========================================================================
- *
- * PUBLIC DOMAIN NOTICE
- * National Center for Biotechnology Information
- *
- * This software/database is a "United States Government Work" under the
- * terms of the United States Copyright Act. It was written as part of
- * the author's official duties as a United States Government employee and
- * thus cannot be copyrighted. This software/database is freely available
- * to the public for use. The National Library of Medicine and the U.S.
- * Government have not placed any restriction on its use or reproduction.
- *
- * Although all reasonable efforts have been taken to ensure the accuracy
- * and reliability of the software and data, the NLM and the U.S.
- * Government do not and cannot warrant the performance or results that
- * may be obtained by using this software or data. The NLM and the U.S.
- * Government disclaim all warranties, express or implied, including
- * warranties of performance, merchantability or fitness for any particular
- * purpose.
- *
- * Please cite the author in any work or product based on this material.
- *
- * ===========================================================================
- *
- * Authors: Mike DiCuccio
- *
- * File Description:
- *
- */
- #include <corelib/ncbistd.hpp>
- #include <objmgr/bioseq_handle.hpp>
- #include <objmgr/feat_ci.hpp>
- #include <objmgr/align_ci.hpp>
- #include <objmgr/annot_ci.hpp>
- #include <objtools/alnmgr/alnmap.hpp>
- #include <objtools/alnmgr/alnmix.hpp>
- #include <util/range.hpp>
- #include <deque>
- /** @addtogroup GUI_UTILS
- *
- * @{
- */
- BEGIN_NCBI_SCOPE
- /// Run iterator. iterate through runs of equal values in the bins.
- template <typename CntType>
- class CDenMapRunIterator {
- public:
- typedef vector<CntType> container_type;
- typedef typename container_type::size_type position_type;
-
- CDenMapRunIterator(position_type here, const container_type& bins, TSeqPos start, TSeqPos window)
- : m_Bins(bins), m_Pos(here), m_RunLength(x_CalcRunLength()), m_Start(start), m_Window(window)
- {}
- CntType Advance();
-
- position_type GetPosition() const { return m_Pos; }
- position_type GetRunLength() const{ return m_RunLength; }
- TSeqPos GetSeqPosition() const;
- TSeqPos GetSeqRunLength() const;
-
- CntType GetValue() const { return Valid() ? m_Bins[m_Pos]: 0 ; }
-
- bool operator ==(CDenMapRunIterator<CntType>& rhs) const { return m_Pos == rhs.m_Pos; }
-
- operator bool() const { return Valid(); }
- bool Valid() const { return m_Pos >= 0 && m_Pos < m_Bins.size() && m_RunLength != 0; }
- private:
-
- position_type x_CalcRunLength() const;
-
- const container_type& m_Bins;
- position_type m_Pos;
- position_type m_RunLength;
-
- const TSeqPos m_Start;
- const TSeqPos m_Window;
-
- };
- template <typename CntType>
- TSeqPos CDenMapRunIterator<CntType>::GetSeqPosition() const
- {
- return GetPosition() * m_Window + m_Start;
- }
- template <typename CntType>
- TSeqPos CDenMapRunIterator<CntType>::GetSeqRunLength() const
- {
- return GetRunLength() * m_Window;
- }
- template <typename CntType>
- typename CDenMapRunIterator<CntType>::position_type
- CDenMapRunIterator<CntType>::x_CalcRunLength() const
- {
- if (m_Pos >= 0 && m_Pos < m_Bins.size()) {
- CntType this_value = m_Bins[m_Pos];
- position_type i;
- for (i = m_Pos + 1;
- i < m_Bins.size() && m_Bins[i] == this_value;
- ++i) {}
- return i - m_Pos;
- }
- return 0;
- }
- template <typename CntType>
- CntType
- CDenMapRunIterator<CntType>::Advance() {
- m_Pos += m_RunLength;
- m_RunLength = x_CalcRunLength();
- return GetValue();
- }
- ///
- /// class CDensityMap generates a low-resolution view of a set of features.
- /// The features to be processed are defined by the SAnnotSelector object.
- /// The return value is a vector of counts, one for each of the bins
- /// defined by the window width and spanning the range from start to stop.
- /// If start and stop are both zero, then the entire sequence is evaluated.
- ///
- template <typename CntType, typename Accum = plus<CntType> >
- class CDensityMap {
- public:
- typedef vector<CntType> container_type;
- typedef typename container_type::const_iterator const_iterator;
- typedef typename container_type::iterator iterator;
- typedef CDenMapRunIterator<CntType> runlen_iterator;
-
- CDensityMap(TSeqPos start, TSeqPos stop, TSeqPos window = 1,
- Accum ac = Accum());
- CDensityMap(const objects::CBioseq_Handle& handle, TSeqPos window = 1,
- Accum ac = Accum());
-
- void AddRange(TSeqRange range, CntType score = 1, bool expand = false);
-
- /// All features on this bioseq selected by sel in the range of this.
- CntType AddFeatures(const objects::CBioseq_Handle& handle,
- objects::SAnnotSelector sel);
-
- /// All alignments on this bioseq selected by sel in the range of this.
- CntType AddAlignments(const objects::CBioseq_Handle& handle,
- objects::SAnnotSelector sel);
- /// All alignments in a given annotation on this bioseq within the range of this.
- CntType AddAlignments(const objects::CBioseq_Handle& handle,
- const objects::CSeq_annot& seq_annot);
- void Clear() { m_Max = 0; fill(m_Bins.begin(), m_Bins.end(), 0); }
- TSeqPos GetStart() const { return m_Range.GetFrom(); }
- TSeqPos GetStop() const { return m_Range.GetTo(); }
- TSeqRange GetRange() const { return m_Range; }
- TSeqPos GetWindow() const { return m_Window; }
- size_t GetBins() const { return m_Bins.size(); }
- CntType GetMax() const { return m_Max; }
- Accum GetAccum() const { return m_AccumFunc; }
-
- /// extend our density map to cover the sequence position stop.
- /// can only be used to extend to the right, that is only Stop is affected, not Start.
- void ExtendTo(TSeqPos stop);
-
- const_iterator begin() const { return m_Bins.begin(); }
- const_iterator end() const { return m_Bins.end(); }
- iterator begin() { return m_Bins.begin(); }
- iterator end() { return m_Bins.end(); }
-
- runlen_iterator RunLenBegin() const { return RunLenIterator(0); }
- runlen_iterator RunLenIterator(typename container_type::size_type n) const
- { return runlen_iterator(n, m_Bins, m_Range.GetFrom(), m_Window); }
-
- CntType operator[](typename container_type::size_type n) { return m_Bins[n]; }
- /// OLD static method. Use AddFeatures method instead.
- /// retrieve a density map. The return value is the maximum value
- /// in the density graph.
- static TSeqPos GetDensityMap(const objects::CBioseq_Handle& handle,
- TSeqPos start, TSeqPos stop,
- TSeqPos window,
- objects::SAnnotSelector sel,
- vector<TSeqPos>& density);
- private:
- /// given the range and window size, how many bins should there be?
- size_t x_CalcNbins() { return (m_Range.GetTo() - m_Range.GetFrom() + m_Window) / m_Window; }
- /// convert from sequence coords to a bin number.
- size_t x_BinN(TSeqPos p) { return (p - m_Range.GetFrom())/m_Window; }
- /// closed range on a sequence this covers.
- TSeqRange m_Range;
- /// coordinates per bin.
- TSeqPos m_Window;
- /// maximum Count accumulated in the bins so far.
- CntType m_Max;
-
- /// Where we actually keep the accumulated counts/scores/whatever.
- container_type m_Bins;
-
- Accum m_AccumFunc;
- };
- template <typename CntType, typename Accum >
- CDensityMap<CntType, Accum>::CDensityMap(TSeqPos start,
- TSeqPos stop,
- TSeqPos window /* = 1 */,
- Accum ac /*= Accum() */)
- : m_Range(start, stop),
- m_Window(window < 1 ? 1 : window),
- m_Max(0),
- m_Bins(x_CalcNbins()),
- m_AccumFunc(ac)
- {
- }
- template <typename CntType, typename Accum >
- CDensityMap<CntType, Accum>::CDensityMap(const objects::CBioseq_Handle& handle,
- TSeqPos window /* = 1 */,
- Accum ac /*= Accum() */)
- : m_Range(0, handle.GetSeqVector().size()),
- m_Window(window < 1 ? 1 : window),
- m_Max(0),
- m_Bins(x_CalcNbins()),
- m_AccumFunc(ac)
- {
- }
- template <typename CntType, typename Accum >
- void CDensityMap<CntType, Accum>::ExtendTo(TSeqPos stop)
- {
- if (stop > GetStop()) {
- m_Range.SetTo(stop);
- m_Bins.resize(x_CalcNbins(), 0);
- }
- }
- template <typename CntType, typename Accum >
- void CDensityMap<CntType, Accum>::AddRange(TSeqRange range, CntType score, bool expand)
- {
- //_ASSERT(range.GetFrom() <= range.GetTo());
- if (range.GetFrom() > range.GetTo()) {
- range = TSeqRange(range.GetTo(), range.GetFrom());
- }
-
- if ( expand ) {
- ExtendTo( range.GetTo());
- }
-
- TSeqRange usable_range(m_Range.IntersectionWith(range));
- if (usable_range.Empty()) {
- return;
- }
-
- size_t begin_bin = x_BinN(usable_range.GetFrom());
- size_t end_bin = x_BinN(usable_range.GetTo()) + 1;
- for (size_t i = begin_bin; i < end_bin ; ++i) {
- CntType new_val = m_AccumFunc(m_Bins[i], score);
- m_Bins[i] = new_val;
- m_Max = max(m_Max, new_val);
- }
- }
- template <typename CntType, typename Accum >
- CntType CDensityMap<CntType, Accum>::AddFeatures(
- const objects::CBioseq_Handle& handle,
- objects::SAnnotSelector sel)
- {
- TSeqPos start(GetStart());
- TSeqPos stop(GetStop());
- // grab a feature iterator for our range
- objects::CFeat_CI feat_iter(handle, start, stop, sel);
- if (feat_iter.GetSize() == 0) {
- return 0;
- }
- for (; feat_iter; ++feat_iter) {
- objects::CMappedFeat feat = *feat_iter;
- TSeqRange range = feat.GetLocation().GetTotalRange();
- AddRange(range, 1, false);
- }
- return GetMax();
- }
-
- template <typename CntType, typename Accum >
- CntType CDensityMap<CntType, Accum>::AddAlignments(
- const objects::CBioseq_Handle& handle,
- objects::SAnnotSelector sel)
- {
- TSeqPos start(GetStart());
- TSeqPos stop(GetStop());
-
-
- // grab a feature iterator for our range
- objects::CAlign_CI align_iter(handle, start, stop, sel);
- for (size_t ai = 0; align_iter; ++align_iter, ++ai) {
- const objects::CSeq_align& align = *align_iter;
- if (! align.CanGetDim()) {
- _TRACE("Dimension not set. " << ai);
- continue;
- }
- int dim = align.GetDim();
- if (dim != 2) {
- _TRACE("Dimension not 2. " << ai);
- continue;
- }
- /*
- CAlnMix mix(handle.GetScope());
- mix.Add(align);
- mix.Merge(CAlnMix::fGen2EST |
- CAlnMix::fTryOtherMethodOnFail |
- CAlnMix::fGapJoin);
- CRef<CAlnMap> aln_map
- (new CAlnMap(mix.GetDenseg()));
- */
- CRef< objects::CAlnMap> aln_map;
- if (align.GetSegs().IsStd()) {
- _TRACE(ai << ": Std seg");
- CRef<objects::CSeq_align> ds_align
- = align.CreateDensegFromStdseg();
- aln_map = new objects::CAlnMap( ds_align->GetSegs().GetDenseg());
- continue;
- } else if (align.GetSegs().IsDenseg()) {
- aln_map = new objects::CAlnMap(align.GetSegs().GetDenseg());
- _TRACE(ai << ": Dense seg");
- }
-
- {{
- string all_ids = ": ";
- for (objects::CAlnMap::TDim di = 0; di < aln_map->GetNumRows();
- ++di) {
- all_ids += aln_map->GetSeqId(di).AsFastaString() + ", ";
- }
- _TRACE(all_ids);;
- }}
-
- TSeqRange range(aln_map->GetSeqStart(0), aln_map->GetSeqStop(0));
- AddRange(range, 1, false);
- }
- return GetMax();
- }
- template <typename CntType, typename Accum >
- CntType CDensityMap<CntType, Accum>::AddAlignments(
- const objects::CBioseq_Handle& handle,
- const objects::CSeq_annot& seq_annot)
- {
- TSeqPos start(GetStart());
- TSeqPos stop(GetStop());
- int ai = 0;
-
- objects::SAnnotSelector sel;
- sel.SetLimitSeqAnnot(&seq_annot)
- .SetSortOrder(objects::SAnnotSelector::eSortOrder_None);
- // grab an alignment iterator for our range
- objects::CAlign_CI align_iter(handle, start, stop, sel);
-
- objects::CAlnMix mix(handle.GetScope());
- for (int ai = 0; align_iter; ++align_iter, ++ai) {
- const objects::CSeq_align& align = *align_iter;
- // check number of dimensions
- if (! align.CanGetDim() || align.GetDim() != 2) {
- // _TRACE("Dimension not 2. " << ai);
- continue;
- }
- // conver to a AlnMap.
- CRef<objects::CAlnMap> aln_map;
- if (align.GetSegs().IsStd()) {
- // _TRACE(ai << ": Std seg" );
- continue; // IGNORE std segs for now.
- try {
- CRef<objects::CSeq_align> ds_align =
- align.CreateDensegFromStdseg();
- aln_map =
- new objects::CAlnMap(ds_align->GetSegs().GetDenseg());
- {{
- string all_ids = ": ";
- for (objects::CAlnMap::TDim di = 0;
- di < aln_map->GetNumRows(); ++di) {
- all_ids += aln_map->GetSeqId(di).AsFastaString() + ", ";
- }
- _TRACE(all_ids);
- }}
- } catch (const objects::CSeqalignException& e) {
- // _TRACE(" : FAILED! " << e.what());
- continue;
- }
- } else if (align.GetSegs().IsDenseg()) {
- mix.Add(align);//, CAlnMix::fDontUseObjMgr);
- mix.Merge();
- aln_map = new objects::CAlnMap(mix.GetDenseg());
- //aln_map = new CAlnMap(align.GetSegs().GetDenseg());
- if (aln_map->GetNumSegs() < 2)
- continue;
- _TRACE(ai << ": Dense seg");
- }
-
- // find out what row our bioseq is on.
- objects::CAlnMap::TNumrow row = 0;
- objects::CAlnMap::TNumrow anchor = 0;
- for (row = 0; row != aln_map->GetNumRows(); ++row) {
- if ( handle.IsSynonym(aln_map->GetSeqId(row)) ) {
- aln_map->SetAnchor(row);
- anchor = row;
- break;
- }
- }
- if (row == aln_map->GetNumRows()) {
- continue;
- }
- // works since dimension always 2.
- objects::CAlnMap::TNumrow other_row = 1 - row;
- /**
- // total range
- _TRACE(" (" << aln_map->GetSeqStart(row) << "," << aln_map->GetSeqStop(row) << ")");
- // IDs
- {{
- _TRACE(" Segs: " << aln_map->GetNumSegs());
- string all_ids = ": ";
- for (CAlnMap::TDim di = 0; di < aln_map->GetNumRows(); ++di) {
- all_ids += aln_map->GetSeqId(di).AsFastaString() + ", ";
- }
- _TRACE(all_ids);
- }}
- // Segments.
- _TRACE(" ");
- for (CAlnMap::TNumseg s = 0; s < aln_map->GetNumSegs(); ++s) {
- // if (aln_map->GetSegType(row, s) && CAlnMap:: )
- _TRACE(" (" << aln_map->GetStart(row, s) << "," << aln_map->GetStop(row, s) << ")");
- }
- // Chunks
- _TRACE(" Chunks: " );
- CRef<CAlnMap::CAlnChunkVec> chunks =
- aln_map->GetSeqChunks(row, CAlnMap::TSignedRange(start, stop), CAlnMap::fAlnSegsOnly);
- for (CAlnMap::TNumchunk c = 0; c < chunks->size(); ++c) {
- CConstRef<CAlnMap::CAlnChunk> chunk = (*chunks)[c];
- _TRACE("(" << chunk->GetRange().GetFrom() << "," << chunk->GetRange().GetTo() << ") ");
- }
- **/
- /*
- TSeqRange range(aln_map->GetSeqRange(row));
- TSeqRange other_range(aln_map->GetSeqRange(other_row));
- if (GetRange().IntersectingWith(range)) {
- AddRange(range, 1, false);
- }
- */
- }
- /*
- mix.Merge(CAlnMix::fTryOtherMethodOnFail );
- CRef< CAlnMap> aln_map;
- aln_map = new CAlnMap(mix.GetDenseg());
- */
- return GetMax();
- }
- template <typename CntType, typename Accum >
- TSeqPos CDensityMap<CntType, Accum>::GetDensityMap(const objects::CBioseq_Handle& handle,
- TSeqPos start, TSeqPos stop,
- TSeqPos window,
- objects::SAnnotSelector sel,
- vector<TSeqPos>& density)
- {
- // set up our bins to handle pooled feature counts
- if (stop == start && stop == 0) {
- objects::CSeqVector vec = handle.GetSeqVector();
- stop = vec.size();
- }
- // grab a feature iterator for our range
- objects::CFeat_CI feat_iter(handle, start, stop, sel);
- if (feat_iter.GetSize() == 0) {
- return 0;
- }
- size_t bins = (stop - start) / window;
- if (bins * window < stop - start) {
- ++bins;
- }
- density.resize(bins, 0);
- TSeqPos max_val = 0;
- // we keep a temporary list of mapped features - we use these
- // the screen what's in our visible range
- deque<objects::CMappedFeat> feats;
- TSeqPos bin_count = 0;
- NON_CONST_ITERATE(vector<TSeqPos>, bin_iter, density) {
- TSeqPos bin_start = start + bin_count * window;
- TSeqPos bin_stop = bin_start + window;
- // accumulate features for this bin
- if (feat_iter) {
- objects::CMappedFeat feat = *feat_iter;
- TSeqRange range = feat.GetLocation().GetTotalRange();
- while (range.GetFrom() < bin_stop) {
- feats.push_back(feat);
- ++feat_iter;
- if ( !feat_iter ) {
- break;
- }
- feat = *feat_iter;
- range = feat.GetLocation().GetTotalRange();
- }
- }
- // remove features outside this bin
- while (feats.size()) {
- objects::CMappedFeat& feat = feats.front();
- TSeqRange range = feat.GetLocation().GetTotalRange();
- if (range.GetTo() < bin_start) {
- feats.pop_front();
- } else {
- break;
- }
- }
- // store our count and go to the next bin
- *bin_iter = feats.size();
- max_val = max(*bin_iter, max_val);
- ++bin_count;
- }
- return max_val;
- }
- END_NCBI_SCOPE
- /* @} */
- /*
- * ===========================================================================
- * $Log: density_map.hpp,v $
- * Revision 1000.0 2004/06/01 19:54:41 gouriano
- * PRODUCTION: IMPORTED [GCC34_MSVC7] Dev-tree R1.1
- *
- * Revision 1.1 2004/04/30 11:52:52 dicuccio
- * Split out from gui/utils
- *
- * Revision 1.17 2004/04/16 17:11:13 dicuccio
- * Clean-up: use _TRACE(), not cout
- *
- * Revision 1.16 2004/04/16 16:55:51 ucko
- * Add several missing occurrences of objects::, needed now that we
- * (rightly) no longer pull in the whole namespace.
- *
- * Revision 1.15 2004/04/16 14:24:14 dicuccio
- * Formatting changes. Dropped using namespace objects. Added doxygen modules tag
- *
- * Revision 1.14 2004/04/07 12:39:09 dicuccio
- * Formatting changes
- *
- * Revision 1.13 2004/03/22 23:04:00 jcherry
- * Added "return"
- *
- * Revision 1.12 2004/03/22 19:18:31 gorelenk
- * Added missed typename(s).
- * Added objects namespace to parameters list of CDensityMap::AddAlignments .
- *
- * Revision 1.11 2004/03/22 18:39:29 ucko
- * Indicate that CDenMapRunIterator<>::operator== returns bool.
- *
- * Revision 1.10 2004/03/22 16:38:56 rsmith
- * add AddAlignments in various flavors. add RunLength iterators.
- *
- * Revision 1.9 2004/03/16 15:37:21 vasilche
- * Added required include
- *
- * Revision 1.8 2004/02/17 17:16:50 dicuccio
- * Drop export specifier (the class is a template now). Code clean-up and
- * reformatting.
- *
- * Revision 1.7 2004/02/10 00:19:24 ucko
- * Add missing "typename"
- *
- * Revision 1.6 2004/02/09 21:00:46 rsmith
- * Make CDensityMap a template class. Add methods AddFeatures and AddRange.
- *
- * Revision 1.5 2003/09/16 14:36:10 dicuccio
- * Minor clarification in comments
- *
- * Revision 1.4 2003/08/22 15:57:11 dicuccio
- * Removed 'USING_SCOPE(objects)'. Removed export specifier from inline structs
- *
- * Revision 1.3 2003/08/13 16:23:20 dicuccio
- * Changed return value - maximum value in the graph
- *
- * Revision 1.2 2003/08/13 13:36:18 dicuccio
- * Filled in remaining description
- *
- * Revision 1.1 2003/08/13 13:34:37 dicuccio
- * Initial revision
- *
- * ===========================================================================
- */
- #endif // GUI_UTILS___DENSITY_GRAPH__HPP