build_tree.cpp
上传用户:yhdzpy8989
上传日期:2007-06-13
资源大小:13604k
文件大小:11k
- /*
- * ===========================================================================
- * PRODUCTION $Log: build_tree.cpp,v $
- * PRODUCTION Revision 1000.1 2004/06/01 20:57:01 gouriano
- * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.12
- * PRODUCTION
- * ===========================================================================
- */
- /* $Id: build_tree.cpp,v 1000.1 2004/06/01 20:57:01 gouriano Exp $
- * ===========================================================================
- *
- * PUBLIC DOMAIN NOTICE
- * National Center for Biotechnology Information
- *
- * This software/database is a "United States Government Work" under the
- * terms of the United States Copyright Act. It was written as part of
- * the author's official duties as a United States Government employee and
- * thus cannot be copyrighted. This software/database is freely available
- * to the public for use. The National Library of Medicine and the U.S.
- * Government have not placed any restriction on its use or reproduction.
- *
- * Although all reasonable efforts have been taken to ensure the accuracy
- * and reliability of the software and data, the NLM and the U.S.
- * Government do not and cannot warrant the performance or results that
- * may be obtained by using this software or data. The NLM and the U.S.
- * Government disclaim all warranties, express or implied, including
- * warranties of performance, merchantability or fitness for any particular
- * purpose.
- *
- * Please cite the author in any work or product based on this material.
- *
- * ===========================================================================
- *
- * Authors: Josh Cherry
- *
- * File Description: gbench plugin for phylogenetic tree building
- *
- */
- #include <ncbi_pch.hpp>
- #include "build_tree.hpp"
- #include <gui/core/idocument.hpp>
- #include <gui/core/plugin_utils.hpp>
- #include <gui/plugin/PluginCommandSet.hpp>
- #include <gui/plugin/PluginValue.hpp>
- #include <gui/plugin/PluginInfo.hpp>
- #include <gui/plugin/PluginReply.hpp>
- #include <gui/plugin/PluginRequest.hpp>
- #include <gui/plugin/PluginValueConstraint.hpp>
- #include <gui/core/version.hpp>
- #include <gui/core/doc_manager.hpp>
- #include <gui/objutils/utils.hpp>
- #include <gui/utils/message_box.hpp>
- #include <objmgr/scope.hpp>
- #include <objmgr/util/sequence.hpp>
- #include <objtools/alnmgr/alnmix.hpp>
- #include <algo/phy_tree/dist_methods.hpp>
- #include <algo/phy_tree/phy_tree_serial.hpp>
- #include <objects/seq/Seq_descr.hpp>
- #include <objects/seq/Seqdesc.hpp>
- #include <objects/seqfeat/Org_ref.hpp>
- #include <objects/seqfeat/BioSource.hpp>
- #include <objtools/alnmgr/alnmix.hpp>
- #include <objtools/alnmgr/alnvec.hpp>
- BEGIN_NCBI_SCOPE
- USING_SCOPE(objects);
- static const string sc_JukesCantor("Jukes-Cantor (DNA)");
- static const string sc_Kimura("Kimura (protein)");
- static const string sc_Poisson("Poisson (protein)");
- static const string sc_FastMe("Fast Minimum Evolution");
- static const string sc_Nj("Neighbor Joining");
- static const string sc_SeqId("Sequence ID");
- static const string sc_TaxName("Taxonomic Name (if available)");
- static const string sc_SeqTitle("Sequence Title (if available)");
- // standard info boilerplate
- void CAlgoPlugin_BuildTree::GetInfo(CPluginInfo& info)
- {
- info.Reset();
- // version info macro
- info.SetInfo(CPluginVersion::eMajor, CPluginVersion::eMinor, 0,
- string(__DATE__) + " " + string(__TIME__),
- "CAlgoPlugin_BuildTree", "Phylogenetic trees/Build tree",
- "Build tree from alignment using a distance method",
- "");
- // command info
- CPluginCommandSet& cmds = info.SetCommands();
- CPluginCommand& args = cmds.AddAlgoCommand(eAlgoCommand_run);
- args.AddArgument("aligns", "Input alignments",
- CSeq_align::GetTypeInfo(),
- CPluginArg::TData::e_Array);
- args.AddDefaultArgument("dist_calc", "Distance Method",
- CPluginArg::eString, sc_Kimura);
- args.SetConstraint("dist_calc", (*CPluginValueConstraint::CreateSet(),
- sc_Kimura, sc_Poisson, sc_JukesCantor));
- args.AddDefaultArgument("tree_method", "Tree Construct Method",
- CPluginArg::eString, sc_FastMe);
- args.SetConstraint("tree_method", (*CPluginValueConstraint::CreateSet(),
- sc_FastMe, sc_Nj));
- args.AddDefaultArgument("label_type", "Labels for Leaf Nodes",
- CPluginArg::eString, sc_SeqId);
- args.SetConstraint("label_type", (*CPluginValueConstraint::CreateSet(),
- sc_SeqId, sc_TaxName, sc_SeqTitle));
- }
- void CAlgoPlugin_BuildTree::RunCommand(CPluginMessage& msg)
- {
- const CPluginCommand& args = msg.GetRequest().GetCommand();
- CPluginReply& reply = msg.SetReply();
- reply.SetStatus(eMessageStatus_failed);
- _TRACE("CAlgoPlugin_BuildTree::Run()");
- // retrieve our alignments
- plugin_args::TAlignList aligns;
- GetArgValue(args["aligns"], aligns);
- // merge them
- CConstRef<IDocument> doc_ref;
- ITERATE (plugin_args::TAlignList, iter, aligns) {
- if (doc_ref.GetPointer() && iter->first != doc_ref) {
- NcbiMessageBox("Failed to merge alignments:n"
- "All alignments must be in the same document");
- return;
- }
- doc_ref = iter->first;
- }
- CAlnMix mix(doc_ref->GetScope());
- CAlnMix::TAddFlags add_flags = 0;
- CAlnMix::TMergeFlags merge_flags =
- CAlnMix::fGen2EST |
- CAlnMix::fTryOtherMethodOnFail |
- CAlnMix::fGapJoin;
- try {
- ITERATE (plugin_args::TAlignList, iter, aligns) {
- mix.Add(*iter->second, add_flags);
- }
- mix.Merge(merge_flags);
- }
- catch (CException& e) {
- NcbiMessageBox("Failed to merge alignments:n" +
- e.GetMsg());
- return;
- }
-
- CAlnVec alnvec(mix.GetSeqAlign().GetSegs().GetDenseg(),
- doc_ref->GetScope());
- alnvec.SetGapChar('-');
- const string& dist_calc = args["dist_calc"].AsString();
- const string& tree_method = args["tree_method"].AsString();
- // Come up with some labels for the terminal nodes
- vector<string> labels(alnvec.GetNumRows());
- for (int i = 0; i < alnvec.GetNumRows(); ++i) {
- if (args["label_type"].AsString() == sc_TaxName) {
- if (alnvec.GetBioseqHandle(i).IsSetDescr()) {
- ITERATE (list<CRef<CSeqdesc> >, iter,
- alnvec.GetBioseqHandle(i).GetDescr().Get()) {
- if ((*iter)->IsSource()) {
- if ((*iter)->GetSource().GetOrg().IsSetTaxname()) {
- labels[i] = (*iter)->GetSource().GetOrg()
- .GetTaxname();
- break;
- }
- }
- }
- }
- }
- if (args["label_type"].AsString() == sc_SeqTitle) {
- if (alnvec.GetBioseqHandle(i).IsSetDescr()) {
- ITERATE (list<CRef<CSeqdesc> >, iter,
- alnvec.GetBioseqHandle(i).GetDescr().Get()) {
- if ((*iter)->IsTitle()) {
- labels[i] = (*iter)->GetTitle();
- break;
- }
- }
- }
- }
- // if sc_SeqId, or if above found no appropriate data
- if (labels[i].empty()) {
- const CSeq_id& best_id =
- sequence::GetId(alnvec.GetBioseqHandle(i),
- sequence::eGetId_Best);
- best_id.GetLabel(&labels[i]);
- }
- }
- // Calculate pairwise distances
- CDistMethods::TMatrix pmat;
- CDistMethods::Divergence(alnvec, pmat);
- CDistMethods::TMatrix dmat;
- if (dist_calc == sc_Kimura) {
- CDistMethods::KimuraDist(pmat, dmat);
- } else if (dist_calc == sc_JukesCantor) {
- CDistMethods::JukesCantorDist(pmat, dmat);
- } else if (dist_calc == sc_Poisson) {
- CDistMethods::PoissonDist(pmat, dmat);
- } else {
- throw runtime_error(string("Invalid distance calculation type: ")
- + dist_calc);
- }
- // Build a tree based on distances
- auto_ptr<TPhyTreeNode> tree;
- if (tree_method == sc_FastMe) {
- tree.reset(CDistMethods::FastMeTree(dmat, labels));
- } else if (tree_method == sc_Nj) {
- tree.reset(CDistMethods::NjTree(dmat, labels));
- } else {
- throw runtime_error(string("Invalid tree reconstruction algorithm: ")
- + tree_method);
- }
- CRef<CPhyTreeSerial> stree(new CPhyTreeSerial(*tree.get()));
- // Set Seq-ids, in order. This relies on the tree-building
- // algorithm to set the node ids of the leaf nodes
- // to {0, 1, 2...} in the appropriate order.
- CPhyTreeSerial::TIds& ids = stree->SetIds();
- ids.reserve(alnvec.GetNumRows());
- for (int i = 0; i < alnvec.GetNumRows(); ++i) {
- CRef<CSeq_id> id(new CSeq_id);
- id->Assign(sequence::GetId(alnvec.GetBioseqHandle(i),
- sequence::eGetId_Best));
- ids.push_back(id);
- }
- reply.AddObject(*doc_ref, *stree);
- reply.AddAction(CPluginReplyAction::e_Add_to_document);
- reply.SetStatus(eMessageStatus_success);
- CRef<CSelectionBuffer> buf(new CSelectionBuffer());
- buf->AddSelection(doc_ref, *stree);
- CPluginUtils::CallPlugin("CPhyloTreeView",
- eViewCommand_new_view, *buf);
- /**
- CRef<CScope> scope(new CScope(CDocManager::GetObjectManager()));
- scope->AddDefaults();
- IDocument* new_doc = CDocManager::CreateDocument(*scope, *stree);
- CDocManager::UpdateAllViews();
- // launch tree viewer plugin
- CRef<CSelectionBuffer> buf(new CSelectionBuffer());
- buf->AddSelection(new_doc, *stree);
- CPluginUtils::CallPlugin("CPhyloTreeView", eViewCommand_new_view, *buf);
- reply.AddObject(*new_doc);
- reply.SetStatus(eMessageStatus_success);
- **/
- }
- END_NCBI_SCOPE
- /*
- * ===========================================================================
- * $Log: build_tree.cpp,v $
- * Revision 1000.1 2004/06/01 20:57:01 gouriano
- * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.12
- *
- * Revision 1.12 2004/05/21 22:27:48 gorelenk
- * Added PCH ncbi_pch.hpp
- *
- * Revision 1.11 2004/05/20 12:38:30 dicuccio
- * Added missing includes previously acquired through gui/objutils/utils.hpp (seq_vector.hpp, alignment manager
- *
- * Revision 1.10 2004/05/15 03:17:04 ucko
- * Add missing #includes (formerly indirect?)
- *
- * Revision 1.9 2004/05/03 13:05:42 dicuccio
- * gui/utils --> gui/objutils where needed
- *
- * Revision 1.8 2004/04/07 13:00:55 dicuccio
- * Fixed handling of documents. Make sure not to create a new document for the
- * phylogenetic tree - just add it to the current document
- *
- * Revision 1.7 2004/04/05 19:57:56 jcherry
- * More options for leaf labels
- *
- * Revision 1.6 2004/03/17 17:58:27 jcherry
- * Seq-ids for fastme too
- *
- * Revision 1.5 2004/03/17 16:54:14 jcherry
- * Record Seq-ids when doing neighbor-joining
- *
- * Revision 1.4 2004/03/11 17:41:27 dicuccio
- * Cleaned up text description of parameters. Changed merge flags to be
- * consistent with other parts of toolkit. Use sequence::GetId() instead of
- * CSeq_id::GetStringDescr()
- *
- * Revision 1.3 2004/02/19 17:41:34 jcherry
- * Automatically launch tree viewer plugin
- *
- * Revision 1.2 2004/02/17 21:31:41 jcherry
- * Call UpdateAllViews() to update document menu
- *
- * ===========================================================================
- */