wbplg_aligner.cpp
上传用户:yhdzpy8989
上传日期:2007-06-13
资源大小:13604k
文件大小:19k
- /*
- * ===========================================================================
- * PRODUCTION $Log: wbplg_aligner.cpp,v $
- * PRODUCTION Revision 1000.4 2004/06/01 20:54:41 gouriano
- * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.54
- * PRODUCTION
- * ===========================================================================
- */
- /* $Id: wbplg_aligner.cpp,v 1000.4 2004/06/01 20:54:41 gouriano Exp $
- * ===========================================================================
- *
- * PUBLIC DOMAIN NOTICE
- * National Center for Biotechnology Information
- *
- * This software/database is a "United States Government Work" under the
- * terms of the United States Copyright Act. It was written as part of
- * the author's official duties as a United States Government employee and
- * thus cannot be copyrighted. This software/database is freely available
- * to the public for use. The National Library of Medicine and the U.S.
- * Government have not placed any restriction on its use or reproduction.
- *
- * Although all reasonable efforts have been taken to ensure the accuracy
- * and reliability of the software and data, the NLM and the U.S.
- * Government do not and cannot warrant the performance or results that
- * may be obtained by using this software or data. The NLM and the U.S.
- * Government disclaim all warranties, express or implied, including
- * warranties of performance, merchantability or fitness for any particular
- * purpose.
- *
- * Please cite the author in any work or product based on this material.
- *
- * ===========================================================================
- *
- * Authors: Yuri Kapustin
- *
- * File Description:
- * CAlgoPlugin_NeedlemanWunsch -- wraps global alignment algorithms
- */
- #include <ncbi_pch.hpp>
- #include "wbplg_aligner.hpp"
- //#include "dlg_messagebox.hpp"
- #include <gui/dialogs/progress/progress_dlg.hpp>
- #include <algo/align/mm_aligner.hpp>
- #include <algo/align/nw_aligner.hpp>
- #include <algo/align/nw_formatter.hpp>
- #include <corelib/ncbitime.hpp>
- #include <gui/core/doc_manager.hpp>
- #include <gui/core/idocument.hpp>
- #include <gui/core/plugin_utils.hpp>
- #include <gui/core/selection_buffer.hpp>
- #include <gui/core/version.hpp>
- #include <gui/plugin/PluginArgSet.hpp>
- #include <gui/plugin/PluginCommandSet.hpp>
- #include <gui/plugin/PluginInfo.hpp>
- #include <gui/plugin/PluginMessage.hpp>
- #include <gui/plugin/PluginRequest.hpp>
- #include <gui/plugin/PluginValue.hpp>
- #include <gui/plugin/PluginValueConstraint.hpp>
- #include <gui/utils/message_box.hpp>
- #include <objects/general/Date.hpp>
- #include <objects/seq/Annot_descr.hpp>
- #include <objects/seq/Annotdesc.hpp>
- #include <objects/seqalign/Dense_seg.hpp>
- #include <objects/seqalign/Seq_align.hpp>
- #include <objmgr/seq_vector.hpp>
- #include <objmgr/util/feature.hpp>
- #include <objmgr/util/sequence.hpp>
- #include <serial/iterator.hpp>
- #include <util/tables/raw_scoremat.h>
- #include <stdio.h>
- //////////
- BEGIN_NCBI_SCOPE
- USING_SCOPE(objects);
- // GetInfo()
- // static interface to retrieve plugin registration information
- void CAlgoPlugin_NeedlemanWunsch::GetInfo(CPluginInfo& info)
- {
- info.Reset();
- // version info macro
- info.SetInfo(CPluginVersion::eMajor, CPluginVersion::eMinor, 0,
- string(__DATE__) + " " + string(__TIME__),
- "CAlgoPlugin_NeedlemanWunsch",
- "Alignments/Global (Needleman-Wunsch) Alignment",
- "Align sequences using the Needleman-Wunsch "
- "alignment algorithm",
- "");
- // command info
- CPluginCommandSet& cmds = info.SetCommands();
- CPluginCommand& args = cmds.AddAlgoCommand(eAlgoCommand_run);
- args.AddArgument("seqs", "Sequences to align",
- CSeq_loc::GetTypeInfo(),
- CPluginArg::TData::e_Array);
- args.SetConstraint("seqs",
- *CPluginValueConstraint::CreateSeqSameMol());
- args.AddDefaultArgument("Wm", "Match cost (nucl only)",
- CPluginArg::eInteger,
- NStr::IntToString(CNWAligner::GetDefaultWm()));
- args.AddDefaultArgument("Wms", "Mismatch cost (nucl only)",
- CPluginArg::eInteger,
- NStr::IntToString(CNWAligner::GetDefaultWms()));
- args.AddDefaultArgument("Wg", "Cost to open gap",
- CPluginArg::eInteger,
- NStr::IntToString(CNWAligner::GetDefaultWg()));
- args.AddDefaultArgument("Ws", "Cost to extend gap",
- CPluginArg::eInteger,
- NStr::IntToString(CNWAligner::GetDefaultWs()));
- args.AddDefaultArgument("esf1", "Free ends, 1st sequence",
- CPluginArg::eString, "none");
- args.SetConstraint("esf1", (*CPluginValueConstraint::CreateSet(),
- "none", "left", "right", "both"));
- args.AddDefaultArgument("esf2", "Free ends, 2nd sequence",
- CPluginArg::eString, "none");
- args.SetConstraint("esf2", (*CPluginValueConstraint::CreateSet(),
- "none", "left", "right", "both"));
- }
- //
- // callback to check the plugin's execution status
- //
- static bool progress_callback (CNWAligner::SProgressInfo* pInfo)
- {
- Fl::check();
- if ( !pInfo ) {
- return false;
- }
- IReporter* reporter = reinterpret_cast<IReporter*>(pInfo->m_data);
- if( !reporter ) {
- return false;
- }
- if(false/*reporter->IsCancelled()*/) {
- return true;
- } else {
- char buf[128];
- float pct_done = (100.0f / pInfo->m_iter_total) * pInfo->m_iter_done;
- sprintf( buf, "%2.0lf %% completed", pct_done);
- reporter->SetMessage(buf);
- reporter->SetPctCompleted((int)pct_done);
- return false;
- }
- }
- void CAlgoPlugin_NeedlemanWunsch::RunCommand(CPluginMessage& msg)
- {
- const CPluginCommand& cmd = msg.GetRequest().GetCommand();
- CPluginReply& reply = msg.SetReply();
- reply.SetStatus(eMessageStatus_failed);
- // check to see that we were passed sequences to begin with
- if ( !CPluginUtils::IsValid(cmd["seqs"]) ) {
- reply.SetStatus(eMessageStatus_failed);
- return;
- }
- // make sure we have exactly two sequences
- // FIXME: change to create a multi-pairwise alignment
- plugin_args::TLocList locs;
- GetArgValue(cmd["seqs"], locs);
- if (locs.size() != 2) {
- reply.SetStatus(eMessageStatus_ignored);
- return;
- }
- // make sure that the sequences are of a known type; fetch the sequences
- CRef<CScope> new_scope;
- vector<string> seqs;
- vector<string> seq_labels;
- typedef vector< CConstRef<CSeq_id> > TIds;
- TIds seq_ids;
- const SNCBIPackedScoreMatrix* scoremat = 0;
- ITERATE (plugin_args::TLocList, loc_iter, locs) {
- const CSeq_loc& loc = *loc_iter->second;
- const IDocument& doc = *loc_iter->first;
- CScope& scope = doc.GetScope();
- if ( !new_scope ) {
- new_scope.Reset(&scope);
- }
- if ( !sequence::IsOneBioseq(loc, &scope) ) {
- string msg = CPluginUtils::GetLabel(loc, &doc.GetScope());
- LOG_POST(Info << "CAlgoPlugin_NeedlemanWunsch: "
- "location on multiple bioseqs ignored: " << msg);
- continue;
- }
- CBioseq_Handle handle =
- scope.GetBioseqHandle(sequence::GetId(loc, &scope));
- CSeqVector vec =
- handle.GetSequenceView(loc,
- CBioseq_Handle::eViewConstructed,
- CBioseq_Handle::eCoding_Iupac);
- // save our sequence
- seqs.push_back(string());
- vec.GetSeqData(0, vec.size(), seqs.back());
- NStr::ToUpper(seqs.back());
- // save a label for this sequence
- seq_labels.push_back(CPluginUtils::GetLabel(loc, &doc.GetScope()));
- // save the gi for this sequence
- seq_ids.push_back(CConstRef<CSeq_id>(handle.GetSeqId()));
- scoremat = vec.IsNucleotide() ? 0: &NCBISM_Blosum62;
- }
- const size_t nw_limit = 200*1024*1024;
- vector<string>::const_iterator iter_seqs = seqs.begin();
- double dim_square = (iter_seqs++)->length();
- dim_square *= iter_seqs->length();
- bool use_myers_miller = dim_square > nw_limit;
- //
- // main algorithm
- //
- string output;
- CNWAligner::TScore score = 0;
- try {
- CProgressDlg dlg_prg;
- dlg_prg.SetTitle("Calculation status");
- dlg_prg.SetMessage("Calculation in progress...nplease wait");
- dlg_prg.Show();
- const char* seq1 = seqs[0].c_str(), * seq2 = seqs[1].c_str();
- size_t dim1 = seqs[0].size(), dim2 = seqs[1].size();
- auto_ptr<CNWAligner> aligner
- ( use_myers_miller ?
- new CMMAligner (seq1, dim1, seq2, dim2, scoremat) :
- new CNWAligner (seq1, dim1, seq2, dim2, scoremat) );
- if(use_myers_miller) {
- LOG_POST( Info << "CAlgoPlugin_NeedlemanWunsch: Using Myers-Miller method");
- }
- aligner->SetWm (cmd["Wm" ].AsInteger());
- aligner->SetWms(cmd["Wms"].AsInteger());
- aligner->SetWg (cmd["Wg" ].AsInteger());
- aligner->SetWs (cmd["Ws" ].AsInteger());
- // end-space free alignment setup
- const string esf1 = cmd["esf1"].AsString();
- bool left1 = (esf1 == "left") || (esf1 == "both");
- bool right1 = (esf1 == "right") || (esf1 == "both");
- const string esf2 = cmd["esf2"].AsString();
- bool left2 = (esf2 == "left") || (esf2 == "both");
- bool right2 = (esf2 == "right") || (esf2 == "both");
- aligner->SetEndSpaceFree(left1, right1, left2, right2);
- aligner->SetProgressCallback(progress_callback, &dlg_prg);
- score = aligner->Run();
- dlg_prg.Hide();
- // create a seq-align structure for our alignment
- CRef<CSeq_align> align(new CSeq_align());
- CNWFormatter formatter (*aligner);
- formatter.AsSeqAlign(align);
- // we need to set the IDs correctly
- TIds::iterator seq_id_iter = seq_ids.begin();
- NON_CONST_ITERATE(CDense_seg::TIds, iter,
- align->SetSegs().SetDenseg().SetIds()) {
- (*iter)->Assign(**seq_id_iter++);
- }
- // pack the alignment in a Seq-annot and label it appropriately
- CRef<CSeq_annot> annot(new CSeq_annot());
- annot->SetData().SetAlign().push_back(align);
- // prepare a title
- string str;
- ITERATE (vector<string>, iter, seq_labels) {
- if ( !str.empty() ) {
- str += " + ";
- }
- str += *iter;
- }
- str = "Global alignment of " + str;
- annot->AddTitle(str);
- CTime time;
- time.GetLocalTime();
- str = "This alignment was produced on ";
- str += time.AsString();
- str += " using the Needleman-Wunsch alignment algorithm";
- annot->AddComment(str);
- CRef<CAnnotdesc> desc(new CAnnotdesc());
- desc->SetCreate_date().SetStr(time.AsString());
- annot->SetDesc().Set().push_back(desc);
- //
- // pass back to the system. We may use the same scope and just attach,
- // if that is appropriate
- //
- CConstRef<IDocument> doc_ref;
- ITERATE (plugin_args::TLocList, iter, locs) {
- if ( !doc_ref ) {
- doc_ref.Reset(iter->first);
- } else if (iter->first != doc_ref) {
- doc_ref.Reset();
- break;
- }
- }
- if ( !doc_ref ) {
- //
- // query and targets come from different documents
- // create a new one to handle the results
- //
- CRef<CScope> new_scope(new CScope(CDocManager::GetObjectManager()));
- ITERATE (plugin_args::TLocList, iter, locs) {
- new_scope->AddScope(iter->first->GetScope());
- }
- doc_ref.Reset(CDocManager::CreateDocument(*new_scope, *annot));
- } else {
- reply.AddAction(CPluginReplyAction::e_Add_to_document);
- }
- reply.AddObject(*doc_ref, *annot);
- reply.SetStatus(eMessageStatus_success);
- }
- catch (CException& e) {
- NcbiMessageBox("Global alignment failed:n" + e.GetMsg());
- }
- catch(exception& e) {
- NcbiMessageBox(string("Global alignment failed:n") + e.what());
- }
- }
- END_NCBI_SCOPE
- /*
- * ===========================================================================
- * $Log: wbplg_aligner.cpp,v $
- * Revision 1000.4 2004/06/01 20:54:41 gouriano
- * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.54
- *
- * Revision 1.54 2004/05/25 17:11:53 dicuccio
- * Deprecated old message box dialog in favor of standard progress dialog
- *
- * Revision 1.53 2004/05/21 22:27:46 gorelenk
- * Added PCH ncbi_pch.hpp
- *
- * Revision 1.52 2004/05/20 12:35:49 dicuccio
- * Removed dead code
- *
- * Revision 1.51 2004/05/17 15:11:44 kapustin
- * Initial revision
- *
- * Revision 1.50 2004/04/07 12:58:39 dicuccio
- * Removed dependence on gi for seq-id. Changed default view to multiple
- * alignment view. Cleaned up handling of view request / results processing
- *
- * Revision 1.49 2004/03/05 17:34:17 dicuccio
- * Use sequence::GetId() to retrieve GI-based ID
- *
- * Revision 1.48 2004/01/27 18:40:29 dicuccio
- * Code clean-up. Renamed plugin classes to follow standard pattern
- *
- * Revision 1.47 2004/01/07 15:50:35 dicuccio
- * Adjusted for API change in CPluginUtils::GetLabel(). Standardized exception
- * reporting in algorithms.
- *
- * Revision 1.46 2003/12/22 19:26:29 dicuccio
- * Code reformatting. Don't post a plugin message directly - use
- * CPluginUtils::CallPlugin()
- *
- * Revision 1.45 2003/12/09 15:46:16 dicuccio
- * Minor formatting change
- *
- * Revision 1.44 2003/11/24 15:45:24 dicuccio
- * Renamed CVersion to CPluginVersion
- *
- * Revision 1.43 2003/11/04 17:49:22 dicuccio
- * Changed calling parameters for plugins - pass CPluginMessage instead of paired
- * CPluginCommand/CPluginReply
- *
- * Revision 1.42 2003/10/27 17:47:04 dicuccio
- * Removed dead #includes
- *
- * Revision 1.41 2003/10/07 13:46:59 dicuccio
- * Renamed CPluginURL* to CPluginValue*
- *
- * Revision 1.40 2003/09/30 19:50:57 kapustin
- * Adjust for standard score matrix interface
- *
- * Revision 1.39 2003/09/04 14:05:23 dicuccio
- * Use IDocument instead of CDocument
- *
- * Revision 1.38 2003/09/03 14:46:52 rsmith
- * change namespace name from args to plugin_args to avoid clashes with variable names.
- *
- * Revision 1.37 2003/09/02 22:47:50 kapustin
- * Adjust for algo/align changes
- *
- * Revision 1.36 2003/09/02 17:08:06 rsmith
- * remove namespace name and variable name clash.
- *
- * Revision 1.35 2003/08/21 18:44:01 vasilche
- * Use CSeqVector::IsNucleotide() method instead of GetSequenceType().
- *
- * Revision 1.34 2003/08/21 12:03:07 dicuccio
- * Make use of new typedef in plugin_utils.hpp for argument values.
- *
- * Revision 1.33 2003/08/05 17:07:16 dicuccio
- * Changed calling semantics for the message queue - pass by reference, not
- * CConstRef<>
- *
- * Revision 1.32 2003/07/31 17:02:26 dicuccio
- * Changed plugin message queue class name to be application agnostic
- *
- * Revision 1.31 2003/07/23 19:14:09 dicuccio
- * Moved logic for validating plugin arguments into CPluginUtils.
- *
- * Revision 1.30 2003/07/22 15:32:15 dicuccio
- * Changed to make use of new API in plugin_utils.hpp - GetArgValue()
- *
- * Revision 1.29 2003/07/14 11:10:18 shomrat
- * Plugin messageing system related changes
- *
- * Revision 1.28 2003/06/26 15:33:40 dicuccio
- * Moved GetURLValue() from PluginURL.hpp to plugin_utils.hpp. Fixed
- * compilation errors relating to missing #includes
- *
- * Revision 1.27 2003/06/25 17:02:56 dicuccio
- * Split CPluginHandle into a handle (pointer-to-implementation) and
- * implementation file. Lots of #include file clean-ups.
- *
- * Revision 1.26 2003/06/20 14:52:35 dicuccio
- * Revised plugin registration - moved GetInfo() into the plugin handler
- *
- * Revision 1.25 2003/06/17 16:41:43 dicuccio
- * Fix #includes after algo/ rearrangement
- *
- * Revision 1.24 2003/06/09 19:25:58 dicuccio
- * Added <stdio.h> for sprintf()
- *
- * Revision 1.23 2003/06/02 16:06:20 dicuccio
- * Rearranged src/objects/ subtree. This includes the following shifts:
- * - src/objects/asn2asn --> arc/app/asn2asn
- * - src/objects/testmedline --> src/objects/ncbimime/test
- * - src/objects/objmgr --> src/objmgr
- * - src/objects/util --> src/objmgr/util
- * - src/objects/alnmgr --> src/objtools/alnmgr
- * - src/objects/flat --> src/objtools/flat
- * - src/objects/validator --> src/objtools/validator
- * - src/objects/cddalignview --> src/objtools/cddalignview
- * In addition, libseq now includes six of the objects/seq... libs, and libmmdb
- * replaces the three libmmdb? libs.
- *
- * Revision 1.22 2003/05/30 20:23:43 kapustin
- * Fix sequence index typo
- *
- * Revision 1.21 2003/05/30 20:05:43 kapustin
- * Support arbitrarily sized sequences
- *
- * Revision 1.20 2003/05/19 13:38:32 dicuccio
- * Moved gui/core/plugin/ --> gui/plugin/. Merged core libraries
- * into libgui_core
- *
- * Revision 1.19 2003/05/12 16:08:40 dicuccio
- * Updated to use new plugin action args
- *
- * Revision 1.18 2003/05/08 18:27:56 kapustin
- * Allow specification of plugin's arguments
- *
- * Revision 1.17 2003/04/30 14:09:42 dicuccio
- * Updated Needleman-Wunsch plugin - produce a Seq-annot as a document;
- * launch a default graphical view (cross alignment viewer)
- *
- * Revision 1.16 2003/04/24 16:37:30 dicuccio
- * Updated to reflect changes in plugin API
- *
- * Revision 1.15 2003/04/22 16:29:00 kapustin
- * Fix memory limit typo
- *
- * Revision 1.14 2003/04/22 16:18:36 kapustin
- * Support aminoacid sequences. Set memory limit.
- *
- * Revision 1.13 2003/04/03 01:08:24 ucko
- * Adjust for new FormatAsText interface.
- *
- * Revision 1.12 2003/03/12 21:12:47 kapustin
- * Use text buffer provided by the aligner to store text messages
- *
- * Revision 1.11 2003/03/11 15:23:29 kuznets
- * iterate -> ITERATE
- *
- * Revision 1.10 2003/03/05 21:23:11 kapustin
- * Reflect new CNWAligner::FormatAsText() call
- *
- * Revision 1.9 2003/02/26 14:31:46 dicuccio
- * General clean-up. Fixed passing of arguments - alignment should work now.
- *
- * Revision 1.8 2003/02/25 14:44:34 dicuccio
- * Changed accessors to match API changes in plugin arguments
- *
- * Revision 1.7 2003/02/24 13:03:14 dicuccio
- * Renamed classes in plugin spec:
- * CArgSeg --> CPluginArgSet
- * CArgument --> CPluginArg
- * CPluginArgs --> CPluginCommand
- * CPluginCommands --> CPluginCommandSet
- *
- * Revision 1.6 2003/02/21 17:13:54 dicuccio
- * Changed enums in CDlgMessageBox - added leading 'e' to avoid
- * impossible-to-remove conflict with Windows code.
- *
- * Revision 1.5 2003/02/20 19:49:53 dicuccio
- * Created new plugin architecture, based on ASN.1 spec. Moved GBENCH frameowrk
- * over to use new plugin architecture.
- *
- * Revision 1.4 2003/02/05 17:23:21 ucko
- * When going from CNcbiOstrstream to char* via CNcbiOstrstreamToString,
- * explicitly use an intermediate string variable to avoid confusing some
- * compilers (GCC 2.9x at least).
- *
- * Revision 1.3 2003/02/04 22:58:33 kapustin
- * Split plugin activation into x_Verify() and x_Run(). Add progress callback
- *
- * Revision 1.2 2003/01/29 19:39:54 kapustin
- * Increase output line width
- *
- * Revision 1.1 2003/01/29 19:10:37 kapustin
- * Initial revision
- *
- * ===========================================================================
- */