alignment_smear.cpp
上传用户:yhdzpy8989
上传日期:2007-06-13
资源大小:13604k
文件大小:12k
- /*
- * ===========================================================================
- * PRODUCTION $Log: alignment_smear.cpp,v $
- * PRODUCTION Revision 1000.0 2004/06/01 21:19:53 gouriano
- * PRODUCTION PRODUCTION: IMPORTED [GCC34_MSVC7] Dev-tree R1.2
- * PRODUCTION
- * ===========================================================================
- */
- /* $Id: alignment_smear.cpp,v 1000.0 2004/06/01 21:19:53 gouriano Exp $
- * ===========================================================================
- *
- * PUBLIC DOMAIN NOTICE
- * National Center for Biotechnology Information
- *
- * This software/database is a "United States Government Work" under the
- * terms of the United States Copyright Act. It was written as part of
- * the author's official duties as a United States Government employee and
- * thus cannot be copyrighted. This software/database is freely available
- * to the public for use. The National Library of Medicine and the U.S.
- * Government have not placed any restriction on its use or reproduction.
- *
- * Although all reasonable efforts have been taken to ensure the accuracy
- * and reliability of the software and data, the NLM and the U.S.
- * Government do not and cannot warrant the performance or results that
- * may be obtained by using this software or data. The NLM and the U.S.
- * Government disclaim all warranties, express or implied, including
- * warranties of performance, merchantability or fitness for any particular
- * purpose.
- *
- * Please cite the author in any work or product based on this material.
- *
- * ===========================================================================
- *
- * Authors: Robert Smith
- *
- * File Description:
- *
- */
- #include <ncbi_pch.hpp>
- #include <gui/objutils/alignment_smear.hpp>
- #include <objtools/alnmgr/alnmap.hpp>
- #include <objmgr/bioseq_handle.hpp>
- #include <objmgr/annot_selector.hpp>
- #include <objects/seq/Seq_annot.hpp>
- #include <objects/seq/Annot_descr.hpp>
- #include <objects/seq/Annotdesc.hpp>
- #include <objects/general/User_object.hpp>
- #include <objects/general/User_field.hpp>
- #include <objects/general/Object_id.hpp>
- BEGIN_NCBI_SCOPE
- USING_SCOPE(objects);
- CAlignmentSmear::CAlignmentSmear(
- const objects::CBioseq_Handle& handle,
- TSeqPos start,
- TSeqPos stop,
- TSeqPos window,
- EAlignSmearStrand strand_type
- )
- : m_BioseqHandle(handle),
- m_AccumSeg(start, stop, window),
- m_AccumGap(start, stop, window),
- m_StrandType(strand_type)
- {
- }
- /// smear all the alignments in this annotation.
- void CAlignmentSmear::AddAnnot(const CSeq_annot& seq_annot)
- {
- objects::SAnnotSelector sel;
- sel.SetLimitSeqAnnot(&seq_annot)
- .SetSortOrder(SAnnotSelector::eSortOrder_None);
- AddAlignments(sel);
-
- // now if there is a chance we would put more than one annotation
- // in a smear we need to join their names together here, or something else?
- SetLabel(x_GetAnnotName(seq_annot));
- }
- #if 1
- /// Smear all the alignments matchec by this selector on my bioseq.
- void CAlignmentSmear::AddAlignments(const objects::SAnnotSelector& sel)
- {
- TSeqPos start(m_AccumSeg.GetStart());
- TSeqPos stop(m_AccumSeg.GetStop());
-
-
- // grab a alignment iterator for our range
- CAlign_CI align_iter(m_BioseqHandle, start, stop, sel);
- for (int ai = 0; align_iter; ++align_iter, ++ai) {
- const CSeq_align& align = *align_iter;
-
- // convert to an AlnMap
- CRef< CAlnMap> aln_map;
- if (align.GetSegs().IsStd()) {
- continue; // TODO: Make StdSeqs work.
- CRef<CSeq_align> ds_align = align.CreateDensegFromStdseg();
- aln_map = new CAlnMap( ds_align->GetSegs().GetDenseg());
- } else if (align.GetSegs().IsDenseg()) {
- aln_map = new CAlnMap(align.GetSegs().GetDenseg());
- }
- AddAlignment(*aln_map);
- }
- MaskGaps();
- }
- #else
- /* same thing but we group alignments by their id */
- /// Smear all the alignments matchec by this selector on my bioseq.
- void CAlignmentSmear::AddAlignments(const objects::SAnnotSelector& sel)
- {
- TSeqPos start(m_AccumSeg.GetStart());
- TSeqPos stop(m_AccumSeg.GetStop());
-
- typedef map<string, CRef<CAlnMix> > TMixes;
- TMixes mixes;
-
- // grab a alignment iterator for our range
- CAlign_CI align_iter(m_BioseqHandle, start, stop, sel);
- for (int ai = 0; align_iter; ++align_iter, ++ai) {
- const CSeq_align& align = *align_iter;
- // convert to an AlnMap
- CRef< CAlnMap> aln_map;
- if (align.GetSegs().IsStd()) {
- CRef<CSeq_align> ds_align = align.CreateDensegFromStdseg();
- aln_map = new CAlnMap( ds_align->GetSegs().GetDenseg());
- } else if (align.GetSegs().IsDenseg()) {
- aln_map = new CAlnMap(align.GetSegs().GetDenseg());
- }
- // which row of the alignment is the other sequence on?
- CAlnMap::TNumrow row = m_BioseqHandle.IsSynonym(
- aln_map->GetSeqId(0)) ? 1 : 0;
-
- // What is that sequences label?
- string align_label;
- aln_map->GetSeqId(row).GetLabel(&align_label);
- // Mix the alignments based on their label.
- TMixes::const_iterator mit = mixes.find(align_label);
- if (mit == mixes.end()) {
- CRef<CAlnMix> mix_ref(new CAlnMix(m_BioseqHandle.GetScope()));
- mixes[align_label] = mix_ref;
- }
- mixes[align_label]->Add(aln_map->GetDenseg());
- }
-
- // Now smear the mixed alignments.
- NON_CONST_ITERATE(TMixes, mit, mixes)
- {
- CAlnMix & aln_mix(*(mit->second));
- aln_mix.Merge(CAlnMix::fGen2EST |
- CAlnMix::fTryOtherMethodOnFail |
- CAlnMix::fGapJoin);
-
- // convert to a CAlnMap
- CAlnMap aln_map(aln_mix.GetDenseg());
- AddAlignment(aln_map);
- }
-
- // take out gaps in the middle of blocks.
- MaskGaps();
- }
- #endif
- void CAlignmentSmear::AddAlignment(CAlnMap& aln_map)
- {
- // check number of dimensions
- if ( aln_map.GetNumRows() != 2) {
- // cout << "alignment does not have 2 rows." << endl;
- return;
- }
- // find out what row our bioseq is on.
- CAlnMap::TNumrow bs_row = m_BioseqHandle.IsSynonym(
- aln_map.GetSeqId(0)) ? 0 : 1;
- _ASSERT( bs_row == 0 || bs_row == 1 );
- CAlnMap::TNumrow other_row = 1 - bs_row; // works since bs_row is always 0 or 1.
- // Are we only doing one strand?
- if (m_StrandType == eSmearStrand_Pos && aln_map.IsNegativeStrand(bs_row) )
- return;
- if (m_StrandType == eSmearStrand_Neg && aln_map.IsPositiveStrand(bs_row) )
- return;
-
- CAlnMap::TSignedRange range(aln_map.GetAlnStart(),
- aln_map.GetAlnStop());
- CRef<CAlnMap::CAlnChunkVec> aln_chunks
- (aln_map.GetAlnChunks(other_row, range,
- CAlnMap::fSeqOnly ));
- // | CAlnMap::fChunkSameAsSeg));
-
- // Smear all the segments.
- for (size_t i = 0; i < aln_chunks->size(); ++i) {
- CConstRef<CAlnMap::CAlnChunk> chunk((*aln_chunks)[i]);
-
- TSeqPos start = chunk->GetRange().GetFrom();
- start = aln_map.GetSeqPosFromSeqPos(bs_row, other_row, start);
- TSeqPos stop = chunk->GetRange().GetTo();
- stop = aln_map.GetSeqPosFromSeqPos(bs_row, other_row, stop);
-
- m_AccumSeg.AddRange(TSeqRange(start, stop), 1);
- }
-
- // Smear the gaps.
- for (size_t i = 1; i < aln_chunks->size(); ++i) {
- CConstRef<CAlnMap::CAlnChunk> prev_chunk((*aln_chunks)[i-1]);
- CConstRef<CAlnMap::CAlnChunk> chunk((*aln_chunks)[i]);
- TSeqPos start = prev_chunk->GetRange().GetTo();
- start = aln_map.GetSeqPosFromSeqPos(bs_row, other_row, start);
- TSeqPos stop = chunk->GetRange().GetFrom();
- stop = aln_map.GetSeqPosFromSeqPos(bs_row, other_row, stop);
-
- m_AccumGap.AddRange(TSeqRange(start, stop));
- }
- }
- // erase gaps where there are segments.
- void CAlignmentSmear::MaskGaps()
- {
- TSegMap::iterator seg_it = m_AccumSeg.begin();
- TGapMap::iterator gap_it = m_AccumGap.begin();
- for (; gap_it != m_AccumGap.end(); ++gap_it, ++seg_it ) {
- if (*gap_it > 0 && *seg_it > 0) {
- *gap_it = 0;
- }
- }
-
- }
- string CAlignmentSmear::GetLabel() const
- {
- return m_Label;
- }
- void CAlignmentSmear::SetLabel(const string& label)
- {
- m_Label = label;
- }
- bool CAlignmentSmear::SeparateStrands(const objects::CSeq_annot& seq_annot)
- {
- string name = x_GetAnnotName(seq_annot);
-
- if (name == "BLASTX - swissprot" ||
- name == "BLASTN - mrna" ) {
- return true;
- }
- return false;
- }
- static const string s_BlastFieldKey("Blast Type");
- static const string s_HistFieldKey("Hist Seqalign");
- string CAlignmentSmear::x_GetAnnotName(const objects::CSeq_annot& seq_annot)
- {
- string label;
- string annot_name;
- string blast_type;
- string hist_type;
-
- if (seq_annot.IsSetDesc()) {
- ITERATE ( CSeq_annot::TDesc::Tdata, it,
- seq_annot.GetDesc().Get() ) {
- const CAnnotdesc& desc = **it;
- if (desc.Which() == CAnnotdesc::e_Name ) {
- annot_name = desc.GetName();
- }
- if (desc.Which() == CAnnotdesc::e_User) {
- const CUser_object& user_obj = desc.GetUser();
-
- if (user_obj.CanGetType() &&
- user_obj.GetType().Which() == CObject_id::e_Str ) {
-
- if (user_obj.GetType().GetStr() == s_BlastFieldKey) {
- const CUser_field& user_field = * user_obj.GetData().front();
-
- if (user_field.CanGetData() &&
- user_field.GetData().Which() == CUser_field::TData::e_Int &&
- user_field.CanGetLabel()) {
- const CObject_id& id = user_field.GetLabel();
-
- if (id.Which() == CObject_id::e_Str ) {
- blast_type = id.GetStr();
- }
- }
- }
-
- if (user_obj.GetType().GetStr() == s_HistFieldKey ) {
- const CUser_field& user_field = * user_obj.GetData().front();
- if (user_field.CanGetData() &&
- user_field.GetData().Which() == CUser_field::TData::e_Bool &&
- user_field.GetData().GetBool() &&
- user_field.CanGetLabel()) {
- const CObject_id& id = user_field.GetLabel();
- if (id.Which() == CObject_id::e_Str ) {
- hist_type = id.GetStr();
- }
- }
- }
- }
- }
- }
- if ( ! annot_name.empty()) {
- label = annot_name;
- } else if ( ! blast_type.empty() ) {
- label = blast_type;
- } else if ( ! hist_type.empty() ) {
- label = hist_type;
- }
- }
- return label;
- }
- END_NCBI_SCOPE
- /*
- * ===========================================================================
- * $Log: alignment_smear.cpp,v $
- * Revision 1000.0 2004/06/01 21:19:53 gouriano
- * PRODUCTION: IMPORTED [GCC34_MSVC7] Dev-tree R1.2
- *
- * Revision 1.2 2004/05/21 22:27:43 gorelenk
- * Added PCH ncbi_pch.hpp
- *
- * Revision 1.1 2004/04/30 11:48:15 dicuccio
- * Initial commit - split out from src/gui/utils
- *
- * Revision 1.2 2004/04/07 15:22:39 rsmith
- * new (not yet used) implementation of AddAlignments
- *
- * Revision 1.1 2004/03/22 16:40:55 rsmith
- * initial checkin
- *
- *
- * ===========================================================================
- */