find_orfs.cpp
上传用户:yhdzpy8989
上传日期:2007-06-13
资源大小:13604k
文件大小:14k
- /*
- * ===========================================================================
- * PRODUCTION $Log: find_orfs.cpp,v $
- * PRODUCTION Revision 1000.5 2004/06/01 20:54:59 gouriano
- * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.26
- * PRODUCTION
- * ===========================================================================
- */
- /* $Id: find_orfs.cpp,v 1000.5 2004/06/01 20:54:59 gouriano Exp $
- * ===========================================================================
- *
- * PUBLIC DOMAIN NOTICE
- * National Center for Biotechnology Information
- *
- * This software/database is a "United States Government Work" under the
- * terms of the United States Copyright Act. It was written as part of
- * the author's official duties as a United States Government employee and
- * thus cannot be copyrighted. This software/database is freely available
- * to the public for use. The National Library of Medicine and the U.S.
- * Government have not placed any restriction on its use or reproduction.
- *
- * Although all reasonable efforts have been taken to ensure the accuracy
- * and reliability of the software and data, the NLM and the U.S.
- * Government do not and cannot warrant the performance or results that
- * may be obtained by using this software or data. The NLM and the U.S.
- * Government disclaim all warranties, express or implied, including
- * warranties of performance, merchantability or fitness for any particular
- * purpose.
- *
- * Please cite the author in any work or product based on this material.
- *
- * ===========================================================================
- *
- * Authors: Josh Cherry
- *
- * File Description: simple gbench plugin for finding ORFs
- *
- */
- #include <ncbi_pch.hpp>
- #include "find_orfs.hpp"
- #include <algo/sequence/make_cdr_prods.hpp>
- #include <algo/sequence/orf.hpp>
- #include <gui/core/plugin_utils.hpp>
- #include <gui/core/version.hpp>
- #include <gui/dialogs/col/multi_col_dlg.hpp>
- #include <gui/plugin/PluginCommandSet.hpp>
- #include <gui/plugin/PluginInfo.hpp>
- #include <gui/plugin/PluginReply.hpp>
- #include <gui/plugin/PluginRequest.hpp>
- #include <gui/plugin/PluginValueConstraint.hpp>
- #include <gui/objutils/utils.hpp>
- #include <objects/seqfeat/Genetic_code.hpp>
- #include <objects/seqfeat/Genetic_code_table.hpp>
- #include <objmgr/util/sequence.hpp>
- BEGIN_NCBI_SCOPE
- USING_SCOPE(objects);
- CAlgoPlugin_FindOrfs::~CAlgoPlugin_FindOrfs()
- {
- }
- // standard plugin announce bopilerplate
- void CAlgoPlugin_FindOrfs::GetInfo(CPluginInfo& info)
- {
- info.Reset();
-
- // version info macro
- info.SetInfo(CPluginVersion::eMajor, CPluginVersion::eMinor, 0,
- string(__DATE__) + " " + string(__TIME__),
- "CAlgoPlugin_FindOrfs", "Search/Find Open Reading Frames",
- "Find open reading frames in a DNA sequence",
- "");
- // command info
- CPluginCommandSet& cmds = info.SetCommands();
- CPluginCommand& args = cmds.AddAlgoCommand(eAlgoCommand_run);
- args.AddArgument("locs", "Locations to evaluate",
- CSeq_loc::GetTypeInfo(),
- CPluginArg::TData::e_Array);
- args.SetConstraint("locs",
- (*CPluginValueConstraint::CreateSeqMol(),
- CSeq_inst::eMol_na,
- CSeq_inst::eMol_dna,
- CSeq_inst::eMol_rna));
- args.AddDefaultArgument("min_length_codons",
- "Minimum number of sense codons",
- CPluginArg::eInteger, "100");
- // genetic code argument
- const CGenetic_code_table& code_table = CGen_code_table::GetCodeTable();
- const CGenetic_code_table::Tdata& codes = code_table.Get();
- args.AddDefaultArgument("genetic_code", "Genetic code",
- CPluginArg::eString, codes.front()->GetName());
- CPluginValueConstraint *code_list = CPluginValueConstraint::CreateSet();
- ITERATE (CGenetic_code_table::Tdata, code, codes) {
- code_list->SetSet().push_back((*code)->GetName());
- }
- args.SetConstraint("genetic_code", *code_list);
-
- }
- void CAlgoPlugin_FindOrfs::RunCommand(CPluginMessage& msg)
- {
- const CPluginCommand& args = msg.GetRequest().GetCommand();
- CPluginReply& reply = msg.SetReply();
- _TRACE("CAlgoPlugin_FindOrfs::RunCommand()");
-
- if ( !m_Dialog.get() ) {
- m_Dialog.reset(new CMultiColDlg());
- m_Dialog->SetWindowSize(500, 450);
- m_Dialog->SetTitle("Open Reading Frames");
-
- m_Dialog->SetColumn(0, "Sequence", FL_ALIGN_LEFT, 0.5f);
- m_Dialog->SetColumn(1, "Location", FL_ALIGN_LEFT, 0.5f);
- m_Dialog->SetColumn(2, "Strand", FL_ALIGN_CENTER, 0.25f);
- m_Dialog->SetColumn(3, "From", FL_ALIGN_CENTER, 0.5f);
- m_Dialog->SetColumn(4, "To", FL_ALIGN_CENTER, 0.5f);
- m_Dialog->SetColumn(5, "Sense Codons", FL_ALIGN_CENTER, 0.5f);
- }
- // clear any previous contents
- m_Dialog->SetRows(0);
- int row = 0;
- plugin_args::TLocList locs;
- GetArgValue(args["locs"], locs);
- int min_length_codons = args["min_length_codons"].AsInteger();
- string genetic_code_name = args["genetic_code"].AsString();
- ITERATE (plugin_args::TLocList, iter, locs) {
- const CSeq_loc& loc = *iter->second;
- const IDocument& doc = *iter->first;
- // find the best ID for this bioseq
- try {
- CBioseq_Handle handle = doc.GetScope().GetBioseqHandle(loc);
- // get sequence vector
- CSeqVector vec =
- handle.GetSequenceView(loc,
- CBioseq_Handle::eViewConstructed,
- CBioseq_Handle::eCoding_Ncbi);
- string& id_str = m_Dialog->SetCell(row, 0);
- string& loc_str = m_Dialog->SetCell(row, 1);
- const CSeq_id& best_id =
- sequence::GetId(handle, sequence::eGetId_Best);
- id_str.erase();
- best_id.GetLabel(&id_str);
- loc_str = CPluginUtils::GetLabel(loc, &doc.GetScope());
- // place to store orfs
- vector< CRef<CSeq_loc> > orfs;
- // find some ORFs
- COrf::FindOrfs(vec, orfs,
- min_length_codons * 3,
- x_DecodeGeneticCode(genetic_code_name));
- // translate our locs to our parent location
- NON_CONST_ITERATE (vector< CRef<CSeq_loc> >, iter, orfs) {
- (**iter).SetId(sequence::GetId(loc));
- *iter = CSeqUtils::RemapChildToParent(loc, **iter);
- }
- // make an annot
- CRef<CSeq_id> this_id
- (const_cast<CSeq_id*>(&sequence::GetId(loc)));
- CRef<CSeq_annot> annot =
- COrf::MakeCDSAnnot(orfs,
- x_DecodeGeneticCode(genetic_code_name));
- // add description to annot
- annot->AddName("Open reading frames");
- string comment =
- string("Open reading frames containing at least ") +
- NStr::IntToString(min_length_codons) +
- " sense codons using " + genetic_code_name +
- " genetic code";
- annot->AddComment(comment);
- // make protein sequences
- CRef<CBioseq_set> product_set =
- CMakeCdrProds::MakeCdrProds(annot, handle);
- reply.AddObject(doc, *product_set);
- reply.AddObject(doc, *annot);
- /**
- CRef<CSeq_entry> new_entry(new CSeq_entry);
- new_entry->SetSet(*product_set);
- doc.GetScope().AddTopLevelSeqEntry(*new_entry);
- **/
- // attach annot to doc
- //const_cast<IDocument&>(doc).AttachAnnot(*annot);
- // in order to build dialog efficiently,
- // pre-allocate one line for each ORF
- m_Dialog->SetRows(row + orfs.size());
- ITERATE (vector< CRef<CSeq_loc> >, loc_iter, orfs) {
- const CSeq_loc& orf = **loc_iter;
- //
- // add ORFs to dialog
- //
- ENa_strand strand = sequence::GetStrand(orf);
- if (strand == eNa_strand_minus) {
- m_Dialog->SetCell(row, 2) = "-";
- } else {
- m_Dialog->SetCell(row, 2) = "+";
- }
- m_Dialog->SetCell(row, 3)
- = NStr::IntToString(orf.GetTotalRange().GetFrom() + 1);
- m_Dialog->SetCell(row, 4)
- = NStr::IntToString(orf.GetTotalRange().GetTo() + 1);
- // ORF may or may not include a stop codon.
- // If it does, this must be subtracted
- // in computing the number of sense codons.
- int sense_codon_count = sequence::GetLength(orf);
- sense_codon_count /= 3;
- sense_codon_count -= 1;
- if ((strand == eNa_strand_plus && orf.IsPartialRight()) ||
- (strand == eNa_strand_minus && orf.IsPartialLeft())) {
- ++sense_codon_count;
- }
- m_Dialog->SetCell(row, 5)
- = NStr::IntToString(sense_codon_count);
- ++row;
- }
- }
- catch (CException& e) {
- LOG_POST(Error << "error processing location in ORF finder: "
- << e.what());
- string str = CPluginUtils::GetLabel(loc, &doc.GetScope());
- LOG_POST(Error << "Error processing location " << str);
- }
- catch (exception& e) {
- LOG_POST(Error << "error processing location in ORF finder: "
- << e.what());
- string str = CPluginUtils::GetLabel(loc, &doc.GetScope());
- LOG_POST(Error << "Error processing location " << str);
- }
- #ifndef _DEBUG
- catch (...) {
- string str = CPluginUtils::GetLabel(loc, &doc.GetScope());
- LOG_POST(Error << "Error processing location " << str);
- }
- #endif
- }
- // update all views
- //CDocManager::UpdateAllViews();
- //
- // prepare our dialog box
- //
- m_Dialog->SetLabel(string("ORFs ") + NStr::IntToString(min_length_codons)
- + " codons or longer"
- + " using " + genetic_code_name + " genetic code");
- m_Dialog->Show();
- reply.AddAction(CPluginReplyAction::e_Add_to_document);
- reply.SetStatus(eMessageStatus_success);
- }
- // figure out the id of the genetic code the user wants
- int CAlgoPlugin_FindOrfs::x_DecodeGeneticCode(const string& s)
- {
- const CGenetic_code_table& code_table = CGen_code_table::GetCodeTable();
- const CGenetic_code_table::Tdata& codes = code_table.Get();
- ITERATE (CGenetic_code_table::Tdata, code, codes) {
- if ((*code)->GetName() == s) {
- return (*code)->GetId();
- }
- }
- // if we got here, nothing matched
- NCBI_THROW(CException, eUnknown,
- "CAlgoPlugin_FindOrfs: no genetic code matched " + s);
- }
- END_NCBI_SCOPE
- /*
- * ===========================================================================
- * $Log: find_orfs.cpp,v $
- * Revision 1000.5 2004/06/01 20:54:59 gouriano
- * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.26
- *
- * Revision 1.26 2004/05/21 22:27:46 gorelenk
- * Added PCH ncbi_pch.hpp
- *
- * Revision 1.25 2004/05/03 13:05:42 dicuccio
- * gui/utils --> gui/objutils where needed
- *
- * Revision 1.24 2004/03/05 17:35:07 dicuccio
- * Use CGenetic_code_table typedefs to ease syntax. Use sequence::GetId() instead
- * of CSeq_id::GetStringDescr()
- *
- * Revision 1.23 2004/01/27 18:37:41 dicuccio
- * Code clean-up. Use standard names for plugins. Removed unnecessary #includes
- *
- * Revision 1.22 2004/01/07 15:50:36 dicuccio
- * Adjusted for API change in CPluginUtils::GetLabel(). Standardized exception
- * reporting in algorithms.
- *
- * Revision 1.21 2003/11/24 15:45:26 dicuccio
- * Renamed CVersion to CPluginVersion
- *
- * Revision 1.20 2003/11/18 17:48:36 dicuccio
- * Added standard processing of return values
- *
- * Revision 1.19 2003/11/10 16:51:06 jcherry
- * Added generation of protein sequences for orfs
- *
- * Revision 1.18 2003/11/06 20:12:12 dicuccio
- * Cleaned up handling of USING_SCOPE - removed from all headers
- *
- * Revision 1.17 2003/11/04 17:49:22 dicuccio
- * Changed calling parameters for plugins - pass CPluginMessage instead of paired
- * CPluginCommand/CPluginReply
- *
- * Revision 1.16 2003/10/27 17:46:48 dicuccio
- * Removed dead #includes
- *
- * Revision 1.15 2003/10/15 21:51:11 jcherry
- * Don't set ids with MakeCDSAnnot; it doesn't work, and it would be
- * redundant anyway.
- *
- * Revision 1.14 2003/10/15 13:40:26 dicuccio
- * Mkae sure to set the 'id' for the seq-locs before calling RemapChildToParent()
- *
- * Revision 1.13 2003/10/14 16:24:37 dicuccio
- * Correctly remap new feature locations through the parent location to the master
- * sequence
- *
- * Revision 1.12 2003/10/07 13:47:00 dicuccio
- * Renamed CPluginURL* to CPluginValue*
- *
- * Revision 1.11 2003/09/30 13:40:49 dicuccio
- * Minor code clean-up: use container typedefs from ASN.1 generated classes
- *
- * Revision 1.10 2003/09/25 17:21:35 jcherry
- * Added name to annot
- *
- * Revision 1.9 2003/09/04 19:27:53 jcherry
- * Made an ORF include the stop codon, and marked certain ORFs as
- * partial. Put ability to construct a feature table into COrf.
- *
- * Revision 1.8 2003/09/04 14:05:24 dicuccio
- * Use IDocument instead of CDocument
- *
- * Revision 1.7 2003/09/03 14:46:53 rsmith
- * change namespace name from args to plugin_args to avoid clashes with variable names.
- *
- * Revision 1.6 2003/08/21 12:03:07 dicuccio
- * Make use of new typedef in plugin_utils.hpp for argument values.
- *
- * Revision 1.5 2003/08/19 20:47:52 jcherry
- * Use SetSet().pushback() rather than comma operator for adding
- * constraints in loop (less bizarre-looking)
- *
- * Revision 1.4 2003/08/19 18:36:59 jcherry
- * Allowed user to specify genetic code
- *
- * Revision 1.3 2003/08/18 19:24:15 jcherry
- * Moved orf and seq_match to algo/sequence
- *
- * Revision 1.2 2003/08/18 18:01:58 jcherry
- * Changed COrf::FindOrfs to produce a vector of CRef<CSeq_loc>.
- * Added version of FindOrfs that takes a CSeqVector.
- *
- * Revision 1.1 2003/08/14 17:59:22 jcherry
- * Initial version
- *
- * ===========================================================================
- */