sequence.cpp
上传用户:yhdzpy8989
上传日期:2007-06-13
资源大小:13604k
文件大小:137k
- /*
- * ===========================================================================
- * PRODUCTION $Log: sequence.cpp,v $
- * PRODUCTION Revision 1000.3 2004/06/01 19:25:30 gouriano
- * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.82
- * PRODUCTION
- * ===========================================================================
- */
- /* $Id: sequence.cpp,v 1000.3 2004/06/01 19:25:30 gouriano Exp $
- * ===========================================================================
- *
- * PUBLIC DOMAIN NOTICE
- * National Center for Biotechnology Information
- *
- * This software/database is a "United States Government Work" under the
- * terms of the United States Copyright Act. It was written as part of
- * the author's official duties as a United States Government employee and
- * thus cannot be copyrighted. This software/database is freely available
- * to the public for use. The National Library of Medicine and the U.S.
- * Government have not placed any restriction on its use or reproduction.
- *
- * Although all reasonable efforts have been taken to ensure the accuracy
- * and reliability of the software and data, the NLM and the U.S.
- * Government do not and cannot warrant the performance or results that
- * may be obtained by using this software or data. The NLM and the U.S.
- * Government disclaim all warranties, express or implied, including
- * warranties of performance, merchantability or fitness for any particular
- * purpose.
- *
- * Please cite the author in any work or product based on this material.
- *
- * ===========================================================================
- *
- * Author: Clifford Clausen
- *
- * File Description:
- * Sequence utilities requiring CScope
- */
- #include <ncbi_pch.hpp>
- #include <serial/iterator.hpp>
- #include <objmgr/object_manager.hpp>
- #include <objmgr/scope.hpp>
- #include <objmgr/seq_vector.hpp>
- #include <objmgr/seq_vector_ci.hpp>
- #include <objmgr/seqdesc_ci.hpp>
- #include <objmgr/feat_ci.hpp>
- #include <objmgr/impl/handle_range_map.hpp>
- #include <objmgr/impl/synonyms.hpp>
- #include <objects/general/Int_fuzz.hpp>
- #include <objects/seq/Bioseq.hpp>
- #include <objects/seq/Delta_ext.hpp>
- #include <objects/seq/Delta_seq.hpp>
- #include <objects/seq/MolInfo.hpp>
- #include <objects/seq/Seg_ext.hpp>
- #include <objects/seq/Seq_ext.hpp>
- #include <objects/seq/Seq_inst.hpp>
- #include <objects/seq/Seq_literal.hpp>
- #include <objects/seqloc/Packed_seqpnt.hpp>
- #include <objects/seqloc/Seq_bond.hpp>
- #include <objects/seqloc/Seq_id.hpp>
- #include <objects/seqloc/Seq_interval.hpp>
- #include <objects/seqloc/Seq_loc.hpp>
- #include <objects/seqloc/Seq_loc_equiv.hpp>
- #include <objects/seqloc/Seq_loc_mix.hpp>
- #include <objects/seqloc/Seq_point.hpp>
- #include <objects/seqset/Seq_entry.hpp>
- #include <objects/seqfeat/Org_ref.hpp>
- #include <objects/seqfeat/BioSource.hpp>
- #include <objects/seqfeat/Cdregion.hpp>
- #include <objects/seqfeat/Code_break.hpp>
- #include <objects/seqfeat/Genetic_code.hpp>
- #include <objects/seqfeat/Genetic_code_table.hpp>
- #include <objects/seqfeat/Seq_feat.hpp>
- #include <objmgr/util/sequence.hpp>
- #include <util/strsearch.hpp>
- #include <list>
- #include <algorithm>
- BEGIN_NCBI_SCOPE
- BEGIN_SCOPE(objects)
- BEGIN_SCOPE(sequence)
- const COrg_ref& GetOrg_ref(const CBioseq_Handle& handle)
- {
- {{
- CSeqdesc_CI desc(handle, CSeqdesc::e_Source);
- if (desc) {
- return desc->GetSource().GetOrg();
- }
- }}
- {{
- CSeqdesc_CI desc(handle, CSeqdesc::e_Org);
- if (desc) {
- return desc->GetOrg();
- }
- }}
- NCBI_THROW(CException, eUnknown, "No organism set");
- }
- int GetTaxId(const CBioseq_Handle& handle)
- {
- try {
- return GetOrg_ref(handle).GetTaxId();
- }
- catch (...) {
- return 0;
- }
- }
- TSeqPos GetLength(const CSeq_id& id, CScope* scope)
- {
- if ( !scope ) {
- return numeric_limits<TSeqPos>::max();
- }
- CBioseq_Handle hnd = scope->GetBioseqHandle(id);
- if ( !hnd ) {
- return numeric_limits<TSeqPos>::max();
- }
- CBioseq_Handle::TBioseqCore core = hnd.GetBioseqCore();
- return core->GetInst().IsSetLength() ? core->GetInst().GetLength() :
- numeric_limits<TSeqPos>::max();
- }
- TSeqPos GetLength(const CSeq_loc& loc, CScope* scope)
- THROWS((CNoLength))
- {
- switch (loc.Which()) {
- case CSeq_loc::e_Pnt:
- return 1;
- case CSeq_loc::e_Int:
- return loc.GetInt().GetLength();
- case CSeq_loc::e_Null:
- case CSeq_loc::e_Empty:
- return 0;
- case CSeq_loc::e_Whole:
- return GetLength(loc.GetWhole(), scope);
- case CSeq_loc::e_Packed_int:
- return loc.GetPacked_int().GetLength();
- case CSeq_loc::e_Mix:
- return GetLength(loc.GetMix(), scope);
- case CSeq_loc::e_Packed_pnt: // just a bunch of points
- return loc.GetPacked_pnt().GetPoints().size();
- case CSeq_loc::e_not_set:
- case CSeq_loc::e_Bond: //can't calculate length
- case CSeq_loc::e_Feat:
- case CSeq_loc::e_Equiv: // unless actually the same length...
- default:
- THROW0_TRACE(CNoLength());
- }
- }
- TSeqPos GetLength(const CSeq_loc_mix& mix, CScope* scope)
- THROWS((CNoLength))
- {
- TSeqPos length = 0;
- ITERATE( CSeq_loc_mix::Tdata, i, mix.Get() ) {
- TSeqPos ret = GetLength((**i), scope);
- length += ret;
- }
- return length;
- }
- bool IsValid(const CSeq_point& pt, CScope* scope)
- {
- if (static_cast<TSeqPos>(pt.GetPoint()) >=
- GetLength(pt.GetId(), scope) )
- {
- return false;
- }
- return true;
- }
- bool IsValid(const CPacked_seqpnt& pts, CScope* scope)
- {
- typedef CPacked_seqpnt::TPoints TPoints;
- TSeqPos length = GetLength(pts.GetId(), scope);
- ITERATE (TPoints, it, pts.GetPoints()) {
- if (*it >= length) {
- return false;
- }
- }
- return true;
- }
- bool IsValid(const CSeq_interval& interval, CScope* scope)
- {
- if (interval.GetFrom() > interval.GetTo() ||
- interval.GetTo() >= GetLength(interval.GetId(), scope))
- {
- return false;
- }
- return true;
- }
- bool IsSameBioseq (const CSeq_id& id1, const CSeq_id& id2, CScope* scope)
- {
- // Compare CSeq_ids directly
- if (id1.Compare(id2) == CSeq_id::e_YES) {
- return true;
- }
- // Compare handles
- if ( scope ) {
- try {
- CBioseq_Handle hnd1 = scope->GetBioseqHandle(id1);
- CBioseq_Handle hnd2 = scope->GetBioseqHandle(id2);
- return hnd1 && hnd2 && (hnd1 == hnd2);
- } catch (exception& e) {
- ERR_POST(e.what() << ": CSeq_id1: " << id1.DumpAsFasta()
- << ": CSeq_id2: " << id2.DumpAsFasta());
- return false;
- }
- }
- return false;
- }
- bool IsOneBioseq(const CSeq_loc& loc, CScope* scope)
- {
- try {
- GetId(loc, scope);
- return true;
- } catch (CNotUnique&) {
- return false;
- }
- }
- class CSeqIdFromHandleException : EXCEPTION_VIRTUAL_BASE public CException
- {
- public:
- // Enumerated list of document management errors
- enum EErrCode {
- eNoSynonyms,
- eRequestedIdNotFound
- };
- // Translate the specific error code into a string representations of
- // that error code.
- virtual const char* GetErrCodeString(void) const
- {
- switch (GetErrCode()) {
- case eNoSynonyms: return "eNoSynonyms";
- case eRequestedIdNotFound: return "eRequestedIdNotFound";
- default: return CException::GetErrCodeString();
- }
- }
- NCBI_EXCEPTION_DEFAULT(CSeqIdFromHandleException, CException);
- };
- const CSeq_id& GetId(const CBioseq_Handle& handle,
- EGetIdType type)
- {
- switch (type) {
- case eGetId_HandleDefault:
- return *handle.GetSeqId();
- case eGetId_ForceGi:
- if (handle.GetSeqId().GetPointer() && handle.GetSeqId()->IsGi()) {
- return *handle.GetSeqId();
- }
- {{
- CConstRef<CSynonymsSet> syns =
- handle.GetScope().GetSynonyms(*handle.GetSeqId());
- if ( !syns ) {
- string msg("No synonyms found for sequence ");
- handle.GetSeqId()->GetLabel(&msg);
- NCBI_THROW(CSeqIdFromHandleException, eNoSynonyms, msg);
- }
- ITERATE (CSynonymsSet, iter, *syns) {
- CSeq_id_Handle idh = CSynonymsSet::GetSeq_id_Handle(iter);
- if (idh.GetSeqId()->IsGi()) {
- return *idh.GetSeqId();
- }
- }
- }}
- break;
- case eGetId_Best:
- {{
- CConstRef<CSynonymsSet> syns =
- handle.GetScope().GetSynonyms(handle.GetSeq_id_Handle());
- if ( !syns ) {
- string msg("No synonyms found for sequence ");
- handle.GetSeqId()->GetLabel(&msg);
- NCBI_THROW(CSeqIdFromHandleException, eNoSynonyms, msg);
- }
- list< CRef<CSeq_id> > ids;
- ITERATE (CSynonymsSet, iter, *syns) {
- CSeq_id_Handle idh = CSynonymsSet::GetSeq_id_Handle(iter);
- ids.push_back
- (CRef<CSeq_id>(const_cast<CSeq_id*>(idh.GetSeqId().GetPointer())));
- }
- CConstRef<CSeq_id> best_id = FindBestChoice(ids, CSeq_id::Score);
- if (best_id) {
- return *best_id;
- }
- }}
- break;
- default:
- NCBI_THROW(CSeqIdFromHandleException, eRequestedIdNotFound,
- "Unhandled seq-id type");
- break;
- }
- NCBI_THROW(CSeqIdFromHandleException, eRequestedIdNotFound,
- "No best seq-id could be found");
- }
- const CSeq_id& GetId(const CSeq_loc& loc, CScope* scope)
- THROWS((CNotUnique))
- {
- CTypeConstIterator<CSeq_id> cur = ConstBegin(loc);
- CTypeConstIterator<CSeq_id> first = ConstBegin(loc);
- if (!first) {
- THROW0_TRACE(CNotUnique());
- }
- for (++cur; cur; ++cur) {
- if ( !IsSameBioseq(*cur, *first, scope) ) {
- THROW0_TRACE(CNotUnique());
- }
- }
- return *first;
- }
- inline
- static ENa_strand s_GetStrand(const CSeq_loc& loc)
- {
- switch (loc.Which()) {
- case CSeq_loc::e_Bond:
- {
- const CSeq_bond& bond = loc.GetBond();
- ENa_strand a_strand = bond.GetA().IsSetStrand() ?
- bond.GetA().GetStrand() : eNa_strand_unknown;
- ENa_strand b_strand = eNa_strand_unknown;
- if ( bond.IsSetB() ) {
- b_strand = bond.GetB().IsSetStrand() ?
- bond.GetB().GetStrand() : eNa_strand_unknown;
- }
- if ( a_strand == eNa_strand_unknown &&
- b_strand != eNa_strand_unknown ) {
- a_strand = b_strand;
- } else if ( a_strand != eNa_strand_unknown &&
- b_strand == eNa_strand_unknown ) {
- b_strand = a_strand;
- }
- return (a_strand != b_strand) ? eNa_strand_other : a_strand;
- }
- case CSeq_loc::e_Whole:
- return eNa_strand_both;
- case CSeq_loc::e_Int:
- return loc.GetInt().IsSetStrand() ? loc.GetInt().GetStrand() :
- eNa_strand_unknown;
- case CSeq_loc::e_Pnt:
- return loc.GetPnt().IsSetStrand() ? loc.GetPnt().GetStrand() :
- eNa_strand_unknown;
- case CSeq_loc::e_Packed_pnt:
- return loc.GetPacked_pnt().IsSetStrand() ?
- loc.GetPacked_pnt().GetStrand() : eNa_strand_unknown;
- case CSeq_loc::e_Packed_int:
- {
- ENa_strand strand = eNa_strand_unknown;
- bool strand_set = false;
- ITERATE(CPacked_seqint::Tdata, i, loc.GetPacked_int().Get()) {
- ENa_strand istrand = (*i)->IsSetStrand() ? (*i)->GetStrand() :
- eNa_strand_unknown;
- if (strand == eNa_strand_unknown && istrand == eNa_strand_plus) {
- strand = eNa_strand_plus;
- strand_set = true;
- } else if (strand == eNa_strand_plus &&
- istrand == eNa_strand_unknown) {
- istrand = eNa_strand_plus;
- strand_set = true;
- } else if (!strand_set) {
- strand = istrand;
- strand_set = true;
- } else if (istrand != strand) {
- return eNa_strand_other;
- }
- }
- return strand;
- }
- case CSeq_loc::e_Mix:
- {
- ENa_strand strand = eNa_strand_unknown;
- bool strand_set = false;
- ITERATE(CSeq_loc_mix::Tdata, it, loc.GetMix().Get()) {
- ENa_strand istrand = GetStrand(**it);
- if (strand == eNa_strand_unknown && istrand == eNa_strand_plus) {
- strand = eNa_strand_plus;
- strand_set = true;
- } else if (strand == eNa_strand_plus &&
- istrand == eNa_strand_unknown) {
- istrand = eNa_strand_plus;
- strand_set = true;
- } else if (!strand_set) {
- strand = istrand;
- strand_set = true;
- } else if (istrand != strand) {
- return eNa_strand_other;
- }
- }
- }
- default:
- return eNa_strand_unknown;
- }
- }
- ENa_strand GetStrand(const CSeq_loc& loc, CScope* scope)
- {
- if (!IsOneBioseq(loc, scope)) {
- return eNa_strand_unknown; // multiple bioseqs
- }
- ENa_strand strand = eNa_strand_unknown, cstrand;
- bool strand_set = false;
- for (CSeq_loc_CI i(loc); i; ++i) {
- switch (i.GetSeq_loc().Which()) {
- case CSeq_loc::e_Equiv:
- break;
- default:
- cstrand = s_GetStrand(i.GetSeq_loc());
- if (strand == eNa_strand_unknown && cstrand == eNa_strand_plus) {
- strand = eNa_strand_plus;
- strand_set = true;
- } else if (strand == eNa_strand_plus &&
- cstrand == eNa_strand_unknown) {
- cstrand = eNa_strand_plus;
- strand_set = true;
- } else if (!strand_set) {
- strand = cstrand;
- strand_set = true;
- } else if (cstrand != strand) {
- return eNa_strand_other;
- }
- }
- }
- return strand;
- }
- TSeqPos GetStart(const CSeq_loc& loc, CScope* scope)
- THROWS((CNotUnique))
- {
- // Throw CNotUnique if loc does not represent one CBioseq
- GetId(loc, scope);
- CSeq_loc::TRange rg = loc.GetTotalRange();
- return rg.GetFrom();
- }
- TSeqPos GetStop(const CSeq_loc& loc, CScope* scope)
- THROWS((CNotUnique))
- {
- // Throw CNotUnique if loc does not represent one CBioseq
- GetId(loc, scope);
- CSeq_loc::TRange rg = loc.GetTotalRange();
- return rg.GetTo();
- }
- /*
- *******************************************************************************
- Implementation of Compare()
- *******************************************************************************
- */
- ECompare s_Compare
- (const CSeq_loc&,
- const CSeq_loc&,
- CScope*);
- int s_RetA[5][5] = {
- { 0, 4, 2, 2, 4 },
- { 4, 1, 4, 1, 4 },
- { 2, 4, 2, 2, 4 },
- { 2, 1, 2, 3, 4 },
- { 4, 4, 4, 4, 4 }
- };
- int s_RetB[5][5] = {
- { 0, 1, 4, 1, 4 },
- { 1, 1, 4, 1, 1 },
- { 4, 4, 2, 2, 4 },
- { 1, 1, 4, 3, 4 },
- { 4, 1, 4, 4, 4 }
- };
- // Returns the complement of an ECompare value
- inline
- ECompare s_Complement(ECompare cmp)
- {
- switch ( cmp ) {
- case eContains:
- return eContained;
- case eContained:
- return eContains;
- default:
- return cmp;
- }
- }
- // Compare two Whole sequences
- inline
- ECompare s_Compare
- (const CSeq_id& id1,
- const CSeq_id& id2,
- CScope* scope)
- {
- // If both sequences from same CBioseq, they are the same
- if ( IsSameBioseq(id1, id2, scope) ) {
- return eSame;
- } else {
- return eNoOverlap;
- }
- }
- // Compare Whole sequence with a Bond
- inline
- ECompare s_Compare
- (const CSeq_id& id,
- const CSeq_bond& bond,
- CScope* scope)
- {
- unsigned int count = 0;
- // Increment count if bond point A is from same CBioseq as id
- if ( IsSameBioseq(id, bond.GetA().GetId(), scope) ) {
- ++count;
- }
- // Increment count if second point in bond is set and is from
- // same CBioseq as id.
- if (bond.IsSetB() && IsSameBioseq(id, bond.GetB().GetId(), scope)) {
- ++count;
- }
- switch ( count ) {
- case 1:
- if ( bond.IsSetB() ) {
- // One of two bond points are from same CBioseq as id --> overlap
- return eOverlap;
- } else {
- // There is only one bond point set --> id contains bond
- return eContains;
- }
- case 2:
- // Both bond points are from same CBioseq as id --> id contains bond
- return eContains;
- default:
- // id and bond do not overlap
- return eNoOverlap;
- }
- }
- // Compare Whole sequence with an interval
- inline
- ECompare s_Compare
- (const CSeq_id& id,
- const CSeq_interval& interval,
- CScope* scope)
- {
- // If interval is from same CBioseq as id and interval is
- // [0, length of id-1], then they are the same. If interval is from same
- // CBioseq as id, but interval is not [0, length of id -1] then id
- // contains seqloc.
- if ( IsSameBioseq(id, interval.GetId(), scope) ) {
- if (interval.GetFrom() == 0 &&
- interval.GetTo() == GetLength(id, scope) - 1) {
- return eSame;
- } else {
- return eContains;
- }
- } else {
- return eNoOverlap;
- }
- }
- // Compare Whole sequence with a point
- inline
- ECompare s_Compare
- (const CSeq_id& id,
- const CSeq_point& point,
- CScope* scope)
- {
- // If point from the same CBioseq as id, then id contains point
- if ( IsSameBioseq(id, point.GetId(), scope) ) {
- return eContains;
- } else {
- return eNoOverlap;
- }
- }
- // Compare Whole sequence with packed points
- inline
- ECompare s_Compare
- (const CSeq_id& id,
- const CPacked_seqpnt& packed,
- CScope* scope)
- {
- // If packed from same CBioseq as id, then id contains packed
- if ( IsSameBioseq(id, packed.GetId(), scope) ) {
- return eContains;
- } else {
- return eNoOverlap;
- }
- }
- // Compare an interval with a bond
- inline
- ECompare s_Compare
- (const CSeq_interval& interval,
- const CSeq_bond& bond,
- CScope* scope)
- {
- unsigned int count = 0;
- // Increment count if interval contains the first point of the bond
- if (IsSameBioseq(interval.GetId(), bond.GetA().GetId(), scope) &&
- interval.GetFrom() <= bond.GetA().GetPoint() &&
- interval.GetTo() >= bond.GetA().GetPoint()) {
- ++count;
- }
- // Increment count if the second point of the bond is set and the
- // interval contains the second point.
- if (bond.IsSetB() &&
- IsSameBioseq(interval.GetId(), bond.GetB().GetId(), scope) &&
- interval.GetFrom() <= bond.GetB().GetPoint() &&
- interval.GetTo() >= bond.GetB().GetPoint()) {
- ++count;
- }
- switch ( count ) {
- case 1:
- if ( bond.IsSetB() ) {
- // The 2nd point of the bond is set
- return eOverlap;
- } else {
- // The 2nd point of the bond is not set
- return eContains;
- }
- case 2:
- // Both points of the bond are in the interval
- return eContains;
- default:
- return eNoOverlap;
- }
- }
- // Compare a bond with an interval
- inline
- ECompare s_Compare
- (const CSeq_bond& bond,
- const CSeq_interval& interval,
- CScope* scope)
- {
- // Just return the complement of comparing an interval with a bond
- return s_Complement(s_Compare(interval, bond, scope));
- }
- // Compare an interval to an interval
- inline
- ECompare s_Compare
- (const CSeq_interval& intA,
- const CSeq_interval& intB,
- CScope* scope)
- {
- // If the intervals are not on the same bioseq, then there is no overlap
- if ( !IsSameBioseq(intA.GetId(), intB.GetId(), scope) ) {
- return eNoOverlap;
- }
- // Compare two intervals
- TSeqPos fromA = intA.GetFrom();
- TSeqPos toA = intA.GetTo();
- TSeqPos fromB = intB.GetFrom();
- TSeqPos toB = intB.GetTo();
- if (fromA == fromB && toA == toB) {
- // End points are the same --> the intervals are the same.
- return eSame;
- } else if (fromA <= fromB && toA >= toB) {
- // intA contains intB
- return eContains;
- } else if (fromA >= fromB && toA <= toB) {
- // intB contains intA
- return eContained;
- } else if (fromA > toB || toA < fromB) {
- // No overlap
- return eNoOverlap;
- } else {
- // The only possibility left is overlap
- return eOverlap;
- }
- }
- // Compare an interval and a point
- inline
- ECompare s_Compare
- (const CSeq_interval& interval,
- const CSeq_point& point,
- CScope* scope)
- {
- // If not from same CBioseq, then no overlap
- if ( !IsSameBioseq(interval.GetId(), point.GetId(), scope) ) {
- return eNoOverlap;
- }
- TSeqPos pnt = point.GetPoint();
- // If the interval is of length 1 and contains the point, then they are
- // identical
- if (interval.GetFrom() == pnt && interval.GetTo() == pnt ) {
- return eSame;
- }
- // If point in interval, then interval contains point
- if (interval.GetFrom() <= pnt && interval.GetTo() >= pnt ) {
- return eContains;
- }
- // Only other possibility
- return eNoOverlap;
- }
- // Compare a point and an interval
- inline
- ECompare s_Compare
- (const CSeq_point& point,
- const CSeq_interval& interval,
- CScope* scope)
- {
- // The complement of comparing an interval with a point.
- return s_Complement(s_Compare(interval, point, scope));
- }
- // Compare a point with a point
- inline
- ECompare s_Compare
- (const CSeq_point& pntA,
- const CSeq_point& pntB,
- CScope* scope)
- {
- // If points are on same bioseq and coordinates are the same, then same.
- if (IsSameBioseq(pntA.GetId(), pntB.GetId(), scope) &&
- pntA.GetPoint() == pntB.GetPoint() ) {
- return eSame;
- }
- // Otherwise they don't overlap
- return eNoOverlap;
- }
- // Compare a point with packed point
- inline
- ECompare s_Compare
- (const CSeq_point& point,
- const CPacked_seqpnt& points,
- CScope* scope)
- {
- // If on the same bioseq, and any of the packed points are the
- // same as point, then the point is contained in the packed point
- if ( IsSameBioseq(point.GetId(), points.GetId(), scope) ) {
- TSeqPos pnt = point.GetPoint();
- // This loop will only be executed if points.GetPoints().size() > 0
- ITERATE(CSeq_loc::TPoints, it, points.GetPoints()) {
- if (pnt == *it) {
- return eContained;
- }
- }
- }
- // Otherwise, no overlap
- return eNoOverlap;
- }
- // Compare a point with a bond
- inline
- ECompare s_Compare
- (const CSeq_point& point,
- const CSeq_bond& bond,
- CScope* scope)
- {
- unsigned int count = 0;
- // If the first point of the bond is on the same CBioseq as point
- // and the point coordinates are the same, increment count by 1
- if (IsSameBioseq(point.GetId(), bond.GetA().GetId(), scope) &&
- bond.GetA().GetPoint() == point.GetPoint()) {
- ++count;
- }
- // If the second point of the bond is set, the points are from the
- // same CBioseq, and the coordinates of the points are the same,
- // increment the count by 1.
- if (bond.IsSetB() &&
- IsSameBioseq(point.GetId(), bond.GetB().GetId(), scope) &&
- bond.GetB().GetPoint() == point.GetPoint()) {
- ++count;
- }
- switch ( count ) {
- case 1:
- if ( bond.IsSetB() ) {
- // The second point of the bond is set -- overlap
- return eOverlap;
- // The second point of the bond is not set -- same
- } else {
- return eSame;
- }
- case 2:
- // Unlikely case -- can happen if both points of the bond are the same
- return eSame;
- default:
- // All that's left is no overlap
- return eNoOverlap;
- }
- }
- // Compare packed point with an interval
- inline
- ECompare s_Compare
- (const CPacked_seqpnt& points,
- const CSeq_interval& interval,
- CScope* scope)
- {
- // If points and interval are from same bioseq and some points are
- // in interval, then overlap. If all points are in interval, then
- // contained. Else, no overlap.
- if ( IsSameBioseq(points.GetId(), interval.GetId(), scope) ) {
- bool got_one = false;
- bool missed_one = false;
- TSeqPos from = interval.GetFrom();
- TSeqPos to = interval.GetTo();
- // This loop will only be executed if points.GetPoints().size() > 0
- ITERATE(CSeq_loc::TPoints, it, points.GetPoints()) {
- if (from <= *it && to >= *it) {
- got_one = true;
- } else {
- missed_one = true;
- }
- if (got_one && missed_one) {
- break;
- }
- }
- if ( !got_one ) {
- return eNoOverlap;
- } else if ( missed_one ) {
- return eOverlap;
- } else {
- return eContained;
- }
- }
- return eNoOverlap;
- }
- // Compare two packed points
- inline
- ECompare s_Compare
- (const CPacked_seqpnt& pntsA,
- const CPacked_seqpnt& pntsB,
- CScope* scope)
- {
- // If CPacked_seqpnts from different CBioseqs, then no overlap
- if ( !IsSameBioseq(pntsA.GetId(), pntsB.GetId(), scope) ) {
- return eNoOverlap;
- }
- const CSeq_loc::TPoints& pointsA = pntsA.GetPoints();
- const CSeq_loc::TPoints& pointsB = pntsB.GetPoints();
- // Check for equality. Note order of points is significant
- if (pointsA.size() == pointsB.size()) {
- CSeq_loc::TPoints::const_iterator iA = pointsA.begin();
- CSeq_loc::TPoints::const_iterator iB = pointsB.begin();
- bool check_containtment = false;
- // This loop will only be executed if pointsA.size() > 0
- while (iA != pointsA.end()) {
- if (*iA != *iB) {
- check_containtment = true;
- break;
- }
- ++iA;
- ++iB;
- }
- if ( !check_containtment ) {
- return eSame;
- }
- }
- // Check for containment
- size_t hits = 0;
- // This loop will only be executed if pointsA.size() > 0
- ITERATE(CSeq_loc::TPoints, iA, pointsA) {
- ITERATE(CSeq_loc::TPoints, iB, pointsB) {
- hits += (*iA == *iB) ? 1 : 0;
- }
- }
- if (hits == pointsA.size()) {
- return eContained;
- } else if (hits == pointsB.size()) {
- return eContains;
- } else if (hits == 0) {
- return eNoOverlap;
- } else {
- return eOverlap;
- }
- }
- // Compare packed point with bond
- inline
- ECompare s_Compare
- (const CPacked_seqpnt& points,
- const CSeq_bond& bond,
- CScope* scope)
- {
- // If all set bond points (can be just 1) are in points, then contains. If
- // one, but not all bond points in points, then overlap, else, no overlap
- const CSeq_point& bondA = bond.GetA();
- ECompare cmp = eNoOverlap;
- // Check for containment of the first residue in the bond
- if ( IsSameBioseq(points.GetId(), bondA.GetId(), scope) ) {
- TSeqPos pntA = bondA.GetPoint();
- // This loop will only be executed if points.GetPoints().size() > 0
- ITERATE(CSeq_loc::TPoints, it, points.GetPoints()) {
- if (pntA == *it) {
- cmp = eContains;
- break;
- }
- }
- }
- // Check for containment of the second residue of the bond if it exists
- if ( !bond.IsSetB() ) {
- return cmp;
- }
- const CSeq_point& bondB = bond.GetB();
- if ( !IsSameBioseq(points.GetId(), bondB.GetId(), scope) ) {
- return cmp;
- }
- TSeqPos pntB = bondB.GetPoint();
- // This loop will only be executed if points.GetPoints().size() > 0
- ITERATE(CSeq_loc::TPoints, it, points.GetPoints()) {
- if (pntB == *it) {
- if (cmp == eContains) {
- return eContains;
- } else {
- return eOverlap;
- }
- }
- }
- return cmp == eContains ? eOverlap : cmp;
- }
- // Compare a bond with a bond
- inline
- ECompare s_Compare
- (const CSeq_bond& bndA,
- const CSeq_bond& bndB,
- CScope* scope)
- {
- // Performs two comparisons. First comparison is comparing the a points
- // of each bond and the b points of each bond. The second comparison
- // compares point a of bndA with point b of bndB and point b of bndA
- // with point a of bndB. The best match is used.
- ECompare cmp1, cmp2;
- int div = static_cast<int> (eSame);
- // Compare first points in each bond
- cmp1 = s_Compare(bndA.GetA(), bndB.GetA(), scope);
- // Compare second points in each bond if both exist
- if (bndA.IsSetB() && bndB.IsSetB() ) {
- cmp2 = s_Compare(bndA.GetB(), bndB.GetB(), scope);
- } else {
- cmp2 = eNoOverlap;
- }
- int count1 = (static_cast<int> (cmp1) + static_cast<int> (cmp2)) / div;
- // Compare point A of bndA with point B of bndB (if it exists)
- if ( bndB.IsSetB() ) {
- cmp1 = s_Compare(bndA.GetA(), bndB.GetB(), scope);
- } else {
- cmp1 = eNoOverlap;
- }
- // Compare point B of bndA (if it exists) with point A of bndB.
- if ( bndA.IsSetB() ) {
- cmp2 = s_Compare(bndA.GetB(), bndB.GetA(), scope);
- } else {
- cmp2 = eNoOverlap;
- }
- int count2 = (static_cast<int> (cmp1) + static_cast<int> (cmp2)) / div;
- // Determine best match
- int count = (count1 > count2) ? count1 : count2;
- // Return based upon count in the obvious way
- switch ( count ) {
- case 1:
- if (!bndA.IsSetB() && !bndB.IsSetB()) {
- return eSame;
- } else if ( !bndB.IsSetB() ) {
- return eContains;
- } else if ( !bndA.IsSetB() ) {
- return eContained;
- } else {
- return eOverlap;
- }
- case 2:
- return eSame;
- default:
- return eNoOverlap;
- }
- }
- // Compare an interval with a whole sequence
- inline
- ECompare s_Compare
- (const CSeq_interval& interval,
- const CSeq_id& id,
- CScope* scope)
- {
- // Return the complement of comparing id with interval
- return s_Complement(s_Compare(id, interval, scope));
- }
- // Compare an interval with a packed point
- inline
- ECompare s_Compare
- (const CSeq_interval& interval,
- const CPacked_seqpnt& packed,
- CScope* scope)
- {
- // Return the complement of comparing packed points and an interval
- return s_Complement(s_Compare(packed, interval, scope));
- }
- // Compare a Packed Interval with one of Whole, Interval, Point,
- // Packed Point, or Bond.
- template <class T>
- ECompare s_Compare
- (const CPacked_seqint& intervals,
- const T& slt,
- CScope* scope)
- {
- // Check that intervals is not empty
- if(intervals.Get().size() == 0) {
- return eNoOverlap;
- }
- ECompare cmp1 , cmp2;
- CSeq_loc::TIntervals::const_iterator it = intervals.Get().begin();
- cmp1 = s_Compare(**it, slt, scope);
- // This loop will be executed only if intervals.Get().size() > 1
- for (++it; it != intervals.Get().end(); ++it) {
- cmp2 = s_Compare(**it, slt, scope);
- cmp1 = static_cast<ECompare> (s_RetA[cmp1][cmp2]);
- }
- return cmp1;
- }
- // Compare one of Whole, Interval, Point, Packed Point, or Bond with a
- // Packed Interval.
- template <class T>
- ECompare s_Compare
- (const T& slt,
- const CPacked_seqint& intervals,
- CScope* scope)
- {
- // Check that intervals is not empty
- if(intervals.Get().size() == 0) {
- return eNoOverlap;
- }
- ECompare cmp1 , cmp2;
- CSeq_loc::TIntervals::const_iterator it = intervals.Get().begin();
- cmp1 = s_Compare(slt, **it, scope);
- // This loop will be executed only if intervals.Get().size() > 1
- for (++it; it != intervals.Get().end(); ++it) {
- cmp2 = s_Compare(slt, **it, scope);
- cmp1 = static_cast<ECompare> (s_RetB[cmp1][cmp2]);
- }
- return cmp1;
- }
- // Compare a CSeq_loc and a CSeq_interval. Used by s_Compare_Help below
- // when comparing list<CRef<CSeq_loc>> with list<CRef<CSeq_interval>>.
- // By wrapping the CSeq_interval in a CSeq_loc, s_Compare(const CSeq_loc&,
- // const CSeq_loc&, CScope*) can be called. This permits CPacked_seqint type
- // CSeq_locs to be treated the same as CSeq_loc_mix and CSeq_loc_equiv type
- // CSeq_locs
- inline
- ECompare s_Compare
- (const CSeq_loc& sl,
- const CSeq_interval& si,
- CScope* scope)
- {
- CSeq_loc nsl;
- nsl.SetInt(const_cast<CSeq_interval&>(si));
- return s_Compare(sl, nsl, scope);
- }
- // Compare a CSeq_interval and a CSeq_loc. Used by s_Compare_Help below
- // when comparing TLocations with TIntervals. By
- // wrapping the CSeq_interval in a CSeq_loc, s_Compare(const CSeq_loc&,
- // const CSeq_loc&, CScope*) can be called. This permits CPacked_seqint type
- // CSeq_locs to be treated the same as CSeq_loc_mix and CSeq_loc_equiv type
- // CSeq_locs
- inline
- ECompare s_Compare
- (const CSeq_interval& si,
- const CSeq_loc& sl,
- CScope* scope)
- {
- CSeq_loc nsl;
- nsl.SetInt(const_cast<CSeq_interval&>(si));
- return s_Compare(nsl, sl, scope);
- }
- // Called by s_Compare() below for <CSeq_loc, CSeq_loc>,
- // <CSeq_loc, CSeq_interval>, <CSeq_interval, CSeq_loc>, and
- // <CSeq_interval, CSeq_interval>
- template <class T1, class T2>
- ECompare s_Compare_Help
- (const list<CRef<T1> >& mec,
- const list<CRef<T2> >& youc,
- const CSeq_loc& you,
- CScope* scope)
- {
- // If either mec or youc is empty, return eNoOverlap
- if(mec.size() == 0 || youc.size() == 0) {
- return eNoOverlap;
- }
- typename list<CRef<T1> >::const_iterator mit = mec.begin();
- typename list<CRef<T2> >::const_iterator yit = youc.begin();
- ECompare cmp1, cmp2;
- // Check for equality of the lists. Note, order of the lists contents are
- // significant
- if (mec.size() == youc.size()) {
- bool check_contained = false;
- for ( ; mit != mec.end() && yit != youc.end(); ++mit, ++yit) {
- if (s_Compare(**mit, ** yit, scope) != eSame) {
- check_contained = true;
- break;
- }
- }
- if ( !check_contained ) {
- return eSame;
- }
- }
- // Check if mec contained in youc
- mit = mec.begin();
- yit = youc.begin();
- bool got_one = false;
- while (mit != mec.end() && yit != youc.end()) {
- cmp1 = s_Compare(**mit, **yit, scope);
- switch ( cmp1 ) {
- case eNoOverlap:
- ++yit;
- break;
- case eSame:
- ++mit;
- ++yit;
- got_one = true;
- break;
- case eContained:
- ++mit;
- got_one = true;
- break;
- default:
- got_one = true;
- // artificial trick -- just to get out the loop "prematurely"
- yit = youc.end();
- }
- }
- if (mit == mec.end()) {
- return eContained;
- }
- // Check if mec contains youc
- mit = mec.begin();
- yit = youc.begin();
- while (mit != mec.end() && yit != youc.end() ) {
- cmp1 = s_Compare(**yit, **mit, scope);
- switch ( cmp1 ) {
- case eNoOverlap:
- ++mit;
- break;
- case eSame:
- ++mit;
- ++yit;
- got_one = true;
- break;
- case eContained:
- ++yit;
- got_one = true;
- break;
- default:
- got_one = true;
- // artificial trick -- just to get out the loop "prematurely"
- mit = mec.end();
- }
- }
- if (yit == youc.end()) {
- return eContains;
- }
- // Check for overlap of mec and youc
- if ( got_one ) {
- return eOverlap;
- }
- mit = mec.begin();
- cmp1 = s_Compare(**mit, you, scope);
- for (++mit; mit != mec.end(); ++mit) {
- cmp2 = s_Compare(**mit, you, scope);
- cmp1 = static_cast<ECompare> (s_RetA[cmp1][cmp2]);
- }
- return cmp1;
- }
- inline
- const CSeq_loc::TLocations& s_GetLocations(const CSeq_loc& loc)
- {
- switch (loc.Which()) {
- case CSeq_loc::e_Mix: return loc.GetMix().Get();
- case CSeq_loc::e_Equiv: return loc.GetEquiv().Get();
- default: // should never happen, but the compiler doesn't know that...
- NCBI_THROW(CObjmgrUtilException, eBadLocation,
- "s_GetLocations: unsupported location type:"
- + CSeq_loc::SelectionName(loc.Which()));
- }
- }
- // Compares two Seq-locs
- ECompare s_Compare
- (const CSeq_loc& me,
- const CSeq_loc& you,
- CScope* scope)
- {
- // Handle the case where me is one of e_Mix, e_Equiv, e_Packed_int
- switch ( me.Which() ) {
- case CSeq_loc::e_Mix:
- case CSeq_loc::e_Equiv: {
- const CSeq_loc::TLocations& pmc = s_GetLocations(me);
- switch ( you.Which() ) {
- case CSeq_loc::e_Mix:
- case CSeq_loc::e_Equiv: {
- const CSeq_loc::TLocations& pyc = s_GetLocations(you);
- return s_Compare_Help(pmc, pyc, you, scope);
- }
- case CSeq_loc::e_Packed_int: {
- const CSeq_loc::TIntervals& pyc = you.GetPacked_int().Get();
- return s_Compare_Help(pmc,pyc, you, scope);
- }
- case CSeq_loc::e_Null:
- case CSeq_loc::e_Empty:
- case CSeq_loc::e_Whole:
- case CSeq_loc::e_Int:
- case CSeq_loc::e_Pnt:
- case CSeq_loc::e_Packed_pnt:
- case CSeq_loc::e_Bond:
- case CSeq_loc::e_Feat: {
- //Check that pmc is not empty
- if(pmc.size() == 0) {
- return eNoOverlap;
- }
- CSeq_loc::TLocations::const_iterator mit = pmc.begin();
- ECompare cmp1, cmp2;
- cmp1 = s_Compare(**mit, you, scope);
- // This loop will only be executed if pmc.size() > 1
- for (++mit; mit != pmc.end(); ++mit) {
- cmp2 = s_Compare(**mit, you, scope);
- cmp1 = static_cast<ECompare> (s_RetA[cmp1][cmp2]);
- }
- return cmp1;
- }
- default:
- return eNoOverlap;
- }
- }
- case CSeq_loc::e_Packed_int: {
- switch ( you.Which() ) {
- case CSeq_loc::e_Mix:
- case CSeq_loc::e_Equiv: {
- const CSeq_loc::TLocations& pyc = s_GetLocations(you);
- const CSeq_loc::TIntervals& pmc = me.GetPacked_int().Get();
- return s_Compare_Help(pmc,pyc, you, scope);
- }
- case CSeq_loc::e_Packed_int: {
- const CSeq_loc::TIntervals& mc = me.GetPacked_int().Get();
- const CSeq_loc::TIntervals& yc = you.GetPacked_int().Get();
- return s_Compare_Help(mc, yc, you, scope);
- }
- default:
- // All other cases are handled below
- break;
- }
- }
- default:
- // All other cases are handled below
- break;
- }
- // Note, at this point, me is not one of e_Mix or e_Equiv
- switch ( you.Which() ) {
- case CSeq_loc::e_Mix:
- case CSeq_loc::e_Equiv: {
- const CSeq_loc::TLocations& pyouc = s_GetLocations(you);
- // Check that pyouc is not empty
- if(pyouc.size() == 0) {
- return eNoOverlap;
- }
- CSeq_loc::TLocations::const_iterator yit = pyouc.begin();
- ECompare cmp1, cmp2;
- cmp1 = s_Compare(me, **yit, scope);
- // This loop will only be executed if pyouc.size() > 1
- for (++yit; yit != pyouc.end(); ++yit) {
- cmp2 = s_Compare(me, **yit, scope);
- cmp1 = static_cast<ECompare> (s_RetB[cmp1][cmp2]);
- }
- return cmp1;
- }
- // All other cases handled below
- default:
- break;
- }
- switch ( me.Which() ) {
- case CSeq_loc::e_Null: {
- switch ( you.Which() ) {
- case CSeq_loc::e_Null:
- return eSame;
- default:
- return eNoOverlap;
- }
- }
- case CSeq_loc::e_Empty: {
- switch ( you.Which() ) {
- case CSeq_loc::e_Empty:
- if ( IsSameBioseq(me.GetEmpty(), you.GetEmpty(), scope) ) {
- return eSame;
- } else {
- return eNoOverlap;
- }
- default:
- return eNoOverlap;
- }
- }
- case CSeq_loc::e_Packed_int: {
- // Comparison of two e_Packed_ints is handled above
- switch ( you.Which() ) {
- case CSeq_loc::e_Whole:
- return s_Compare(me.GetPacked_int(), you.GetWhole(), scope);
- case CSeq_loc::e_Int:
- return s_Compare(me.GetPacked_int(), you.GetInt(), scope);
- case CSeq_loc::e_Pnt:
- return s_Compare(me.GetPacked_int(), you.GetPnt(), scope);
- case CSeq_loc::e_Packed_pnt:
- return s_Compare(me.GetPacked_int(), you.GetPacked_pnt(), scope);
- case CSeq_loc::e_Bond:
- return s_Compare(me.GetPacked_int(), you.GetBond(), scope);
- default:
- return eNoOverlap;
- }
- }
- case CSeq_loc::e_Whole: {
- switch ( you.Which() ) {
- case CSeq_loc::e_Whole:
- return s_Compare(me.GetWhole(), you.GetWhole(), scope);
- case CSeq_loc::e_Bond:
- return s_Compare(me.GetWhole(), you.GetBond(), scope);
- case CSeq_loc::e_Int:
- return s_Compare(me.GetWhole(), you.GetInt(), scope);
- case CSeq_loc::e_Pnt:
- return s_Compare(me.GetWhole(), you.GetPnt(), scope);
- case CSeq_loc::e_Packed_pnt:
- return s_Compare(me.GetWhole(), you.GetPacked_pnt(), scope);
- case CSeq_loc::e_Packed_int:
- return s_Compare(me.GetWhole(), you.GetPacked_int(), scope);
- default:
- return eNoOverlap;
- }
- }
- case CSeq_loc::e_Int: {
- switch ( you.Which() ) {
- case CSeq_loc::e_Whole:
- return s_Compare(me.GetInt(), you.GetWhole(), scope);
- case CSeq_loc::e_Bond:
- return s_Compare(me.GetInt(), you.GetBond(), scope);
- case CSeq_loc::e_Int:
- return s_Compare(me.GetInt(), you.GetInt(), scope);
- case CSeq_loc::e_Pnt:
- return s_Compare(me.GetInt(), you.GetPnt(), scope);
- case CSeq_loc::e_Packed_pnt:
- return s_Compare(me.GetInt(), you.GetPacked_pnt(), scope);
- case CSeq_loc::e_Packed_int:
- return s_Compare(me.GetInt(), you.GetPacked_int(), scope);
- default:
- return eNoOverlap;
- }
- }
- case CSeq_loc::e_Pnt: {
- switch ( you.Which() ) {
- case CSeq_loc::e_Whole:
- return s_Complement(s_Compare(you.GetWhole(), me.GetPnt(), scope));
- case CSeq_loc::e_Int:
- return s_Compare(me.GetPnt(), you.GetInt(), scope);
- case CSeq_loc::e_Pnt:
- return s_Compare(me.GetPnt(), you.GetPnt(), scope);
- case CSeq_loc::e_Packed_pnt:
- return s_Compare(me.GetPnt(), you.GetPacked_pnt(), scope);
- case CSeq_loc::e_Bond:
- return s_Compare(me.GetPnt(), you.GetBond(), scope);
- case CSeq_loc::e_Packed_int:
- return s_Compare(me.GetPnt(), you.GetPacked_int(), scope);
- default:
- return eNoOverlap;
- }
- }
- case CSeq_loc::e_Packed_pnt: {
- const CPacked_seqpnt& packed = me.GetPacked_pnt();
- switch ( you.Which() ) {
- case CSeq_loc::e_Whole:
- return s_Complement(s_Compare(you.GetWhole(), packed, scope));
- case CSeq_loc::e_Int:
- return s_Compare(packed, you.GetInt(), scope);
- case CSeq_loc::e_Pnt:
- return s_Complement(s_Compare(you.GetPnt(), packed, scope));
- case CSeq_loc::e_Packed_pnt:
- return s_Compare(packed, you.GetPacked_pnt(),scope);
- case CSeq_loc::e_Bond:
- return s_Compare(packed, you.GetBond(), scope);
- case CSeq_loc::e_Packed_int:
- return s_Compare(me.GetPacked_pnt(), you.GetPacked_int(), scope);
- default:
- return eNoOverlap;
- }
- }
- case CSeq_loc::e_Bond: {
- const CSeq_bond& bnd = me.GetBond();
- switch ( you.Which() ) {
- case CSeq_loc::e_Whole:
- return s_Complement(s_Compare(you.GetWhole(), bnd, scope));
- case CSeq_loc::e_Bond:
- return s_Compare(bnd, you.GetBond(), scope);
- case CSeq_loc::e_Int:
- return s_Compare(bnd, you.GetInt(), scope);
- case CSeq_loc::e_Packed_pnt:
- return s_Complement(s_Compare(you.GetPacked_pnt(), bnd, scope));
- case CSeq_loc::e_Pnt:
- return s_Complement(s_Compare(you.GetPnt(), bnd, scope));
- case CSeq_loc::e_Packed_int:
- return s_Complement(s_Compare(you.GetPacked_int(), bnd, scope));
- default:
- return eNoOverlap;
- }
- }
- default:
- return eNoOverlap;
- }
- }
- ECompare Compare
- (const CSeq_loc& loc1,
- const CSeq_loc& loc2,
- CScope* scope)
- {
- return s_Compare(loc1, loc2, scope);
- }
- void ChangeSeqId(CSeq_id* id, bool best, CScope* scope)
- {
- // Return if no scope
- if (!scope) {
- return;
- }
- // Get CBioseq represented by *id
- CBioseq_Handle::TBioseqCore seq =
- scope->GetBioseqHandle(*id).GetBioseqCore();
- // Get pointer to the best/worst id of *seq
- const CSeq_id* tmp_id;
- if (!best) {
- tmp_id = FindBestChoice(seq->GetId(), CSeq_id::BestRank).GetPointer();
- } else {
- tmp_id = FindBestChoice(seq->GetId(), CSeq_id::WorstRank).GetPointer();
- }
- // Change the contents of *id to that of *tmp_id
- id->Reset();
- id->Assign(*tmp_id);
- }
- void ChangeSeqLocId(CSeq_loc* loc, bool best, CScope* scope)
- {
- if (!scope) {
- return;
- }
- for (CTypeIterator<CSeq_id> id(Begin(*loc)); id; ++id) {
- ChangeSeqId(&(*id), best, scope);
- }
- }
- bool BadSeqLocSortOrder
- (const CBioseq&, // seq,
- const CSeq_loc& loc,
- CScope* scope)
- {
- ENa_strand strand = GetStrand(loc, scope);
- if (strand == eNa_strand_unknown || strand == eNa_strand_other) {
- return false;
- }
-
- // Check that loc segments are in order
- CSeq_loc::TRange last_range;
- bool first = true;
- for (CSeq_loc_CI lit(loc); lit; ++lit) {
- if (first) {
- last_range = lit.GetRange();
- first = false;
- continue;
- }
- if (strand == eNa_strand_minus) {
- if (last_range.GetTo() < lit.GetRange().GetTo()) {
- return true;
- }
- } else {
- if (last_range.GetFrom() > lit.GetRange().GetFrom()) {
- return true;
- }
- }
- last_range = lit.GetRange();
- }
- return false;
- }
- ESeqLocCheck SeqLocCheck(const CSeq_loc& loc, CScope* scope)
- {
- ESeqLocCheck rtn = eSeqLocCheck_ok;
- ENa_strand strand = GetStrand(loc, scope);
- if (strand == eNa_strand_unknown || strand == eNa_strand_other) {
- rtn = eSeqLocCheck_warning;
- }
- CTypeConstIterator<CSeq_loc> lit(ConstBegin(loc));
- for (;lit; ++lit) {
- switch (lit->Which()) {
- case CSeq_loc::e_Int:
- if (!IsValid(lit->GetInt(), scope)) {
- rtn = eSeqLocCheck_error;
- }
- break;
- case CSeq_loc::e_Packed_int:
- {
- CTypeConstIterator<CSeq_interval> sit(ConstBegin(*lit));
- for(;sit; ++sit) {
- if (!IsValid(*sit, scope)) {
- rtn = eSeqLocCheck_error;
- break;
- }
- }
- break;
- }
- case CSeq_loc::e_Pnt:
- if (!IsValid(lit->GetPnt(), scope)) {
- rtn = eSeqLocCheck_error;
- }
- break;
- case CSeq_loc::e_Packed_pnt:
- if (!IsValid(lit->GetPacked_pnt(), scope)) {
- rtn = eSeqLocCheck_error;
- }
- break;
- default:
- break;
- }
- }
- return rtn;
- }
- CRef<CSeq_loc> SourceToProduct(const CSeq_feat& feat,
- const CSeq_loc& source_loc, TS2PFlags flags,
- CScope* scope, int* frame)
- {
- SRelLoc::TFlags rl_flags = 0;
- if (flags & fS2P_NoMerge) {
- rl_flags |= SRelLoc::fNoMerge;
- }
- SRelLoc rl(feat.GetLocation(), source_loc, scope, rl_flags);
- _ASSERT(!rl.m_Ranges.empty());
- rl.m_ParentLoc.Reset(&feat.GetProduct());
- if (feat.GetData().IsCdregion()) {
- // 3:1 ratio
- const CCdregion& cds = feat.GetData().GetCdregion();
- int base_frame = cds.GetFrame();
- if (base_frame > 0) {
- --base_frame;
- }
- if (frame) {
- *frame = 3 - (rl.m_Ranges.front()->GetFrom() + 2 - base_frame) % 3;
- }
- TSeqPos prot_length;
- try {
- prot_length = GetLength(feat.GetProduct(), scope);
- } catch (CNoLength) {
- prot_length = numeric_limits<TSeqPos>::max();
- }
- NON_CONST_ITERATE (SRelLoc::TRanges, it, rl.m_Ranges) {
- if (IsReverse((*it)->GetStrand())) {
- ERR_POST(Warning
- << "SourceToProduct:"
- " parent and child have opposite orientations");
- }
- (*it)->SetFrom(((*it)->GetFrom() - base_frame) / 3);
- (*it)->SetTo (((*it)->GetTo() - base_frame) / 3);
- if ((flags & fS2P_AllowTer) && (*it)->GetTo() == prot_length) {
- --(*it)->SetTo();
- }
- }
- } else {
- if (frame) {
- *frame = 0; // not applicable; explicitly zero
- }
- }
- return rl.Resolve(scope, rl_flags);
- }
- CRef<CSeq_loc> ProductToSource(const CSeq_feat& feat, const CSeq_loc& prod_loc,
- TP2SFlags flags, CScope* scope)
- {
- SRelLoc rl(feat.GetProduct(), prod_loc, scope);
- _ASSERT(!rl.m_Ranges.empty());
- rl.m_ParentLoc.Reset(&feat.GetLocation());
- if (feat.GetData().IsCdregion()) {
- // 3:1 ratio
- const CCdregion& cds = feat.GetData().GetCdregion();
- int base_frame = cds.GetFrame();
- if (base_frame > 0) {
- --base_frame;
- }
- TSeqPos nuc_length, prot_length;
- try {
- nuc_length = GetLength(feat.GetLocation(), scope);
- } catch (CNoLength) {
- nuc_length = numeric_limits<TSeqPos>::max();
- }
- try {
- prot_length = GetLength(feat.GetProduct(), scope);
- } catch (CNoLength) {
- prot_length = numeric_limits<TSeqPos>::max();
- }
- NON_CONST_ITERATE(SRelLoc::TRanges, it, rl.m_Ranges) {
- _ASSERT( !IsReverse((*it)->GetStrand()) );
- TSeqPos from, to;
- if ((flags & fP2S_Extend) && (*it)->GetFrom() == 0) {
- from = 0;
- } else {
- from = (*it)->GetFrom() * 3 + base_frame;
- }
- if ((flags & fP2S_Extend) && (*it)->GetTo() == prot_length - 1) {
- to = nuc_length - 1;
- } else {
- to = (*it)->GetTo() * 3 + base_frame + 2;
- }
- (*it)->SetFrom(from);
- (*it)->SetTo (to);
- }
- }
- return rl.Resolve(scope);
- }
- TSeqPos LocationOffset(const CSeq_loc& outer, const CSeq_loc& inner,
- EOffsetType how, CScope* scope)
- {
- SRelLoc rl(outer, inner, scope);
- if (rl.m_Ranges.empty()) {
- return (TSeqPos)-1;
- }
- bool want_reverse = false;
- {{
- bool outer_is_reverse = IsReverse(GetStrand(outer, scope));
- switch (how) {
- case eOffset_FromStart:
- want_reverse = false;
- break;
- case eOffset_FromEnd:
- want_reverse = true;
- break;
- case eOffset_FromLeft:
- want_reverse = outer_is_reverse;
- break;
- case eOffset_FromRight:
- want_reverse = !outer_is_reverse;
- break;
- }
- }}
- if (want_reverse) {
- return GetLength(outer, scope) - rl.m_Ranges.back()->GetTo();
- } else {
- return rl.m_Ranges.front()->GetFrom();
- }
- }
- bool TestForStrands(ENa_strand strand1, ENa_strand strand2)
- {
- // Check strands. Overlapping rules for strand:
- // - equal strands overlap
- // - "both" overlaps with any other
- // - "unknown" overlaps with any other except "minus"
- return strand1 == strand2
- || strand1 == eNa_strand_both
- || strand2 == eNa_strand_both
- || (strand1 == eNa_strand_unknown && strand2 != eNa_strand_minus)
- || (strand2 == eNa_strand_unknown && strand1 != eNa_strand_minus);
- }
- bool TestForIntervals(CSeq_loc_CI it1, CSeq_loc_CI it2, bool minus_strand)
- {
- // Check intervals one by one
- while ( it1 && it2 ) {
- if ( !TestForStrands(it1.GetStrand(), it2.GetStrand()) ||
- !it1.GetSeq_id().Equals(it2.GetSeq_id())) {
- return false;
- }
- if ( minus_strand ) {
- if (it1.GetRange().GetFrom() != it2.GetRange().GetFrom() ) {
- // The last interval from loc2 may be shorter than the
- // current interval from loc1
- if (it1.GetRange().GetFrom() > it2.GetRange().GetFrom() ||
- ++it2) {
- return false;
- }
- break;
- }
- }
- else {
- if (it1.GetRange().GetTo() != it2.GetRange().GetTo() ) {
- // The last interval from loc2 may be shorter than the
- // current interval from loc1
- if (it1.GetRange().GetTo() < it2.GetRange().GetTo() ||
- ++it2) {
- return false;
- }
- break;
- }
- }
- // Go to the next interval start
- if ( !(++it2) ) {
- break;
- }
- if ( !(++it1) ) {
- return false; // loc1 has not enough intervals
- }
- if ( minus_strand ) {
- if (it1.GetRange().GetTo() != it2.GetRange().GetTo()) {
- return false;
- }
- }
- else {
- if (it1.GetRange().GetFrom() != it2.GetRange().GetFrom()) {
- return false;
- }
- }
- }
- return true;
- }
- int x_TestForOverlap_MultiSeq(const CSeq_loc& loc1,
- const CSeq_loc& loc2,
- EOverlapType type)
- {
- // Special case of TestForOverlap() - multi-sequences locations
- switch (type) {
- case eOverlap_Simple:
- {
- // Find all intersecting intervals
- int diff = 0;
- for (CSeq_loc_CI li1(loc1); li1; ++li1) {
- for (CSeq_loc_CI li2(loc2); li2; ++li2) {
- if ( !li1.GetSeq_id().Equals(li2.GetSeq_id()) ) {
- continue;
- }
- CRange<TSeqPos> rg1 = li1.GetRange();
- CRange<TSeqPos> rg2 = li2.GetRange();
- if ( rg1.GetTo() >= rg2.GetFrom() &&
- rg1.GetFrom() <= rg2.GetTo() ) {
- diff += abs(int(rg2.GetFrom() - rg1.GetFrom())) +
- abs(int(rg1.GetTo() - rg2.GetTo()));
- }
- }
- }
- return diff ? diff : -1;
- }
- case eOverlap_Contained:
- {
- // loc2 is contained in loc1
- CHandleRangeMap rm1, rm2;
- rm1.AddLocation(loc1);
- rm2.AddLocation(loc2);
- int diff = 0;
- CHandleRangeMap::const_iterator it2 = rm2.begin();
- for ( ; it2 != rm2.end(); ++it2) {
- CHandleRangeMap::const_iterator it1 =
- rm1.GetMap().find(it2->first);
- if (it1 == rm1.end()) {
- // loc2 has region on a sequence not present in loc1
- diff = -1;
- break;
- }
- CRange<TSeqPos> rg1 = it1->second.GetOverlappingRange();
- CRange<TSeqPos> rg2 = it2->second.GetOverlappingRange();
- if ( rg1.GetFrom() <= rg2.GetFrom() &&
- rg1.GetTo() >= rg2.GetTo() ) {
- diff += (rg2.GetFrom() - rg1.GetFrom()) +
- (rg1.GetTo() - rg2.GetTo());
- }
- else {
- // Found an interval on loc2 not contained in loc1
- diff = -1;
- break;
- }
- }
- return diff;
- }
- case eOverlap_Contains:
- {
- // loc2 is contained in loc1
- CHandleRangeMap rm1, rm2;
- rm1.AddLocation(loc1);
- rm2.AddLocation(loc2);
- int diff = 0;
- CHandleRangeMap::const_iterator it1 = rm1.begin();
- for ( ; it1 != rm1.end(); ++it1) {
- CHandleRangeMap::const_iterator it2 =
- rm2.GetMap().find(it1->first);
- if (it2 == rm2.end()) {
- // loc1 has region on a sequence not present in loc2
- diff = -1;
- break;
- }
- CRange<TSeqPos> rg1 = it1->second.GetOverlappingRange();
- CRange<TSeqPos> rg2 = it2->second.GetOverlappingRange();
- if ( rg2.GetFrom() <= rg1.GetFrom() &&
- rg2.GetTo() >= rg1.GetTo() ) {
- diff += (rg1.GetFrom() - rg2.GetFrom()) +
- (rg2.GetTo() - rg1.GetTo());
- }
- else {
- // Found an interval on loc1 not contained in loc2
- diff = -1;
- break;
- }
- }
- return diff;
- }
- case eOverlap_Subset:
- case eOverlap_CheckIntervals:
- case eOverlap_Interval:
- {
- // For this types the function should not be called
- NCBI_THROW(CObjmgrUtilException, eNotImplemented,
- "TestForOverlap() -- error processing multi-ID seq-loc");
- }
- }
- return -1;
- }
- int TestForOverlap(const CSeq_loc& loc1,
- const CSeq_loc& loc2,
- EOverlapType type,
- TSeqPos circular_len)
- {
- CRange<TSeqPos> rg1, rg2;
- bool multi_seq = false;
- try {
- rg1 = loc1.GetTotalRange();
- rg2 = loc2.GetTotalRange();
- }
- catch (exception&) {
- // Can not use total range for multi-sequence locations
- if (type == eOverlap_Simple ||
- type == eOverlap_Contained ||
- type == eOverlap_Contains) {
- // Can not process circular multi-id locations
- if (circular_len != 0 && circular_len != kInvalidSeqPos) {
- throw;
- }
- return x_TestForOverlap_MultiSeq(loc1, loc2, type);
- }
- multi_seq = true;
- }
- ENa_strand strand1 = GetStrand(loc1);
- ENa_strand strand2 = GetStrand(loc2);
- if ( !TestForStrands(strand1, strand2) )
- return -1;
- switch (type) {
- case eOverlap_Simple:
- {
- if (circular_len != kInvalidSeqPos) {
- TSeqPos from1 = loc1.GetStart(circular_len);
- TSeqPos from2 = loc2.GetStart(circular_len);
- TSeqPos to1 = loc1.GetEnd(circular_len);
- TSeqPos to2 = loc2.GetEnd(circular_len);
- if (from1 > to1) {
- if (from2 > to2) {
- // Both locations are circular and must intersect at 0
- return abs(int(from2 - from1)) +
- abs(int(to1 - to2));
- }
- else {
- // Only the first location is circular, rg2 may be used
- // for the second one.
- int loc_len = rg2.GetLength() + loc1.GetCircularLength(circular_len);
- if (from1 < rg2.GetFrom() || to1 > rg2.GetTo()) {
- // loc2 is completely in loc1
- return loc_len - 2*rg2.GetLength();
- }
- if (from1 < rg2.GetTo()) {
- return loc_len - (rg2.GetTo() - from1);
- }
- if (rg2.GetFrom() < to1) {
- return loc_len - (to1 - rg2.GetFrom());
- }
- return -1;
- }
- }
- else if (from2 > to2) {
- // Only the second location is circular
- int loc_len = rg1.GetLength() + loc2.GetCircularLength(circular_len);
- if (from2 < rg1.GetFrom() || to2 > rg1.GetTo()) {
- // loc2 is completely in loc1
- return loc_len - 2*rg1.GetLength();
- }
- if (from2 < rg1.GetTo()) {
- return loc_len - (rg1.GetTo() - from2);
- }
- if (rg1.GetFrom() < to2) {
- return loc_len - (to2 - rg1.GetFrom());
- }
- return -1;
- }
- // Locations are not circular, proceed to normal calculations
- }
- if ( rg1.GetTo() >= rg2.GetFrom() &&
- rg1.GetFrom() <= rg2.GetTo() ) {
- return abs(int(rg2.GetFrom() - rg1.GetFrom())) +
- abs(int(rg1.GetTo() - rg2.GetTo()));
- }
- return -1;
- }
- case eOverlap_Contained:
- {
- if (circular_len != kInvalidSeqPos) {
- TSeqPos from1 = loc1.GetStart(circular_len);
- TSeqPos from2 = loc2.GetStart(circular_len);
- TSeqPos to1 = loc1.GetEnd(circular_len);
- TSeqPos to2 = loc2.GetEnd(circular_len);
- if (from1 > to1) {
- if (from2 > to2) {
- return (from1 <= from2 && to1 >= to2) ?
- (from2 - from1) - (to1 - to2) : -1;
- }
- else {
- if (rg2.GetFrom() >= from1 || rg2.GetTo() <= to1) {
- return loc1.GetCircularLength(circular_len) -
- rg2.GetLength();
- }
- return -1;
- }
- }
- else if (from2 > to2) {
- // Non-circular location can not contain a circular one
- return -1;
- }
- }
- if ( rg1.GetFrom() <= rg2.GetFrom() &&
- rg1.GetTo() >= rg2.GetTo() ) {
- return (rg2.GetFrom() - rg1.GetFrom()) +
- (rg1.GetTo() - rg2.GetTo());
- }
- return -1;
- }
- case eOverlap_Contains:
- {
- if (circular_len != kInvalidSeqPos) {
- TSeqPos from1 = loc1.GetStart(circular_len);
- TSeqPos from2 = loc2.GetStart(circular_len);
- TSeqPos to1 = loc1.GetEnd(circular_len);
- TSeqPos to2 = loc2.GetEnd(circular_len);
- if (from1 > to1) {
- if (from2 > to2) {
- return (from2 <= from1 && to2 >= to1) ?
- (from1 - from2) - (to2 - to1) : -1;
- }
- else {
- // Non-circular location can not contain a circular one
- return -1;
- }
- }
- else if (from2 > to2) {
- if (rg1.GetFrom() >= from2 || rg1.GetTo() <= to2) {
- return loc2.GetCircularLength(circular_len) -
- rg1.GetLength();
- }
- return -1;
- }
- }
- if ( rg2.GetFrom() <= rg1.GetFrom() &&
- rg2.GetTo() >= rg1.GetTo() ) {
- return (rg1.GetFrom() - rg2.GetFrom()) +
- (rg2.GetTo() - rg1.GetTo());
- }
- return -1;
- }
- case eOverlap_Subset:
- {
- // loc1 should contain loc2
- if ( Compare(loc1, loc2) != eContains ) {
- return -1;
- }
- return GetLength(loc1) - GetLength(loc2);
- }
- case eOverlap_CheckIntervals:
- {
- if ( !multi_seq &&
- (rg1.GetFrom() > rg2.GetTo() || rg1.GetTo() < rg2.GetTo()) ) {
- return -1;
- }
- // Check intervals' boundaries
- CSeq_loc_CI it1(loc1);
- CSeq_loc_CI it2(loc2);
- if (!it1 || !it2) {
- break;
- }
- // check case when strand is minus
- if (it2.GetStrand() == eNa_strand_minus) {
- // The first interval should be treated as the last one
- // for minuns strands.
- TSeqPos loc2end = it2.GetRange().GetTo();
- TSeqPos loc2start = it2.GetRange().GetFrom();
- // Find the first interval in loc1 intersecting with loc2
- for ( ; it1 && it1.GetRange().GetTo() >= loc2start; ++it1) {
- if (it1.GetRange().GetTo() >= loc2end &&
- TestForIntervals(it1, it2, true)) {
- return GetLength(loc1) - GetLength(loc2);
- }
- }
- }
- else {
- TSeqPos loc2start = it2.GetRange().GetFrom();
- TSeqPos loc2end = it2.GetRange().GetTo();
- // Find the first interval in loc1 intersecting with loc2
- for ( ; it1 /*&& it1.GetRange().GetFrom() <= loc2end*/; ++it1) {
- if (it1.GetSeq_id().Equals(it2.GetSeq_id()) &&
- it1.GetRange().GetFrom() <= loc2start &&
- TestForIntervals(it1, it2, false)) {
- return GetLength(loc1) - GetLength(loc2);
- }
- }
- }
- return -1;
- }
- case eOverlap_Interval:
- {
- return (Compare(loc1, loc2) == eNoOverlap) ? -1
- : abs(int(rg2.GetFrom() - rg1.GetFrom())) +
- abs(int(rg1.GetTo() - rg2.GetTo()));
- }
- }
- return -1;
- }
- CConstRef<CSeq_feat> x_GetBestOverlappingFeat(const CSeq_loc& loc,
- CSeqFeatData::E_Choice feat_type,
- CSeqFeatData::ESubtype feat_subtype,
- EOverlapType overlap_type,
- CScope& scope)
- {
- bool revert_locations = false;
- SAnnotSelector::EOverlapType annot_overlap_type;
- switch (overlap_type) {
- case eOverlap_Simple:
- case eOverlap_Contained:
- case eOverlap_Contains:
- // Require total range overlap
- annot_overlap_type = SAnnotSelector::eOverlap_TotalRange;
- break;
- case eOverlap_Subset: