pattern2.cpp
上传用户:yhdzpy8989
上传日期:2007-06-13
资源大小:13604k
文件大小:13k
- /*
- * ===========================================================================
- * PRODUCTION $Log: pattern2.cpp,v $
- * PRODUCTION Revision 1000.1 2004/06/01 20:55:26 gouriano
- * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.9
- * PRODUCTION
- * ===========================================================================
- */
- /* $Id: pattern2.cpp,v 1000.1 2004/06/01 20:55:26 gouriano Exp $
- * ===========================================================================
- *
- * PUBLIC DOMAIN NOTICE
- * National Center for Biotechnology Information
- *
- * This software/database is a "United States Government Work" under the
- * terms of the United States Copyright Act. It was written as part of
- * the author's official duties as a United States Government employee and
- * thus cannot be copyrighted. This software/database is freely available
- * to the public for use. The National Library of Medicine and the U.S.
- * Government have not placed any restriction on its use or reproduction.
- *
- * Although all reasonable efforts have been taken to ensure the accuracy
- * and reliability of the software and data, the NLM and the U.S.
- * Government do not and cannot warrant the performance or results that
- * may be obtained by using this software or data. The NLM and the U.S.
- * Government disclaim all warranties, express or implied, including
- * warranties of performance, merchantability or fitness for any particular
- * purpose.
- *
- * Please cite the author in any work or product based on this material.
- *
- * ===========================================================================
- *
- * Authors: Josh Cherry
- *
- * File Description: gbench plugin for searching against patterns from file
- *
- */
- #include <ncbi_pch.hpp>
- #include "pattern2.hpp"
- #include <algo/sequence/find_pattern.hpp>
- #include <corelib/ncbiapp.hpp>
- #include <corelib/ncbireg.hpp>
- #include <gui/core/plugin_utils.hpp>
- #include <gui/utils/system_path.hpp>
- #include <gui/core/version.hpp>
- #include <gui/dialogs/col/multi_col_dlg.hpp>
- #include <gui/plugin/PluginCommandSet.hpp>
- #include <gui/plugin/PluginInfo.hpp>
- #include <gui/plugin/PluginRequest.hpp>
- #include <gui/plugin/PluginValueConstraint.hpp>
- #include <gui/objutils/utils.hpp>
- #include <objects/seqloc/Seq_interval.hpp>
- #include <objmgr/seq_vector.hpp>
- #include <objmgr/util/sequence.hpp>
- BEGIN_NCBI_SCOPE
- USING_SCOPE(objects);
- CAlgoPlugin_NamedPatterns::~CAlgoPlugin_NamedPatterns()
- {
- }
- // standard plugin announce bopilerplate
- void CAlgoPlugin_NamedPatterns::GetInfo(CPluginInfo& info)
- {
- info.Reset();
-
- // version info macro
- info.SetInfo(CPluginVersion::eMajor, CPluginVersion::eMinor, 0,
- string(__DATE__) + " " + string(__TIME__),
- "CAlgoPlugin_NamedPatterns",
- "Search/Named Pattern Sets",
- "Search a sequence against patterns from a file", "");
- // command info
- CPluginCommandSet& cmds = info.SetCommands();
- CPluginCommand& args = cmds.AddAlgoCommand(eAlgoCommand_run);
- args.AddArgument("locs", "Locations to evaluate",
- CSeq_loc::GetTypeInfo(),
- CPluginArg::TData::e_Array);
- args.AddDefaultArgument("patterns", "Pattern set", CPluginArg::eString,
- "Load from file specified below");
- args.AddOptionalArgument("fname", "File containing patterns",
- CPluginArg::eFile);
- }
- void CAlgoPlugin_NamedPatterns::FinalizeArgs(CPluginMessage& msg)
- {
- CPluginCommand& args = msg.SetRequest().SetCommand();
- x_UpdateFileList();
- CPluginValueConstraint *constraint = CPluginValueConstraint::CreateSet();
- constraint->SetSet().push_back("Load from file specified below");
- typedef map<string, string> TMss;
- ITERATE (TMss, it, m_FileList) {
- constraint->SetSet().push_back(it->first);
- }
- args.SetConstraint("patterns", *constraint);
- }
- void CAlgoPlugin_NamedPatterns::RunCommand(CPluginMessage& msg)
- {
- const CPluginCommand& args = msg.GetRequest().GetCommand();
- CPluginReply& reply = msg.SetReply();
- _TRACE("CAlgoPlugin_NamedPatterns::RunCommand()");
-
- if ( !m_Dialog.get() ) {
- m_Dialog.reset(new CMultiColDlg());
- m_Dialog->SetWindowSize(1000, 350);
- m_Dialog->SetTitle("Pattern Search Results");
- m_Dialog->SetColumn(0, "Sequence", FL_ALIGN_LEFT, 1.0f);
- m_Dialog->SetColumn(1, "Location", FL_ALIGN_LEFT, 1.0f);
- m_Dialog->SetColumn(2, "Position", FL_ALIGN_CENTER, 1.0f);
- m_Dialog->SetColumn(3, "Pattern ID", FL_ALIGN_LEFT, 3.0f);
- m_Dialog->SetColumn(4, "Pattern Description", FL_ALIGN_LEFT, 4.0f);
- m_Dialog->SetColumn(5, "Pattern", FL_ALIGN_LEFT, 3.0f);
- m_Dialog->SetColumn(6, "Matched Sequence", FL_ALIGN_LEFT, 2.0f);
- }
- m_Dialog->SetRows(0); // to clear any previous contents
-
- vector<TSeqPos> starts;
- vector<TSeqPos> ends;
- string fname;
- // determine which file to load
- if (args["patterns"].AsString() == "Load from file specified below") {
- fname = args["fname"].AsFile();
- } else {
- fname = m_FileList[args["patterns"].AsString()];
- }
- CNcbiRegistry patterns;
- try {
- CNcbiIfstream is(fname.c_str());
- patterns.Read(is);
- }
- catch (exception& e) {
- LOG_POST(Error << "Couldn't load registry from file " <<
- fname << ": " << e.what());
- reply.SetStatus(eMessageStatus_failed);
- return;
- }
- catch (...) {
- LOG_POST(Error << "Couldn't load registry from file " <<
- fname);
- reply.SetStatus(eMessageStatus_failed);
- return;
- }
- //
- // first, evaluate whole sequences
- //
- int row = 0;
- plugin_args::TLocList locs;
- GetArgValue(args["locs"], locs);
- unsigned int patterns_searched;
- ITERATE (plugin_args::TLocList, iter, locs) {
- const CSeq_loc& loc = *iter->second;
- const IDocument& doc = *iter->first;
- // find the best ID for this bioseq
- try {
- CBioseq_Handle handle = doc.GetScope().GetBioseqHandle(loc);
- CSeqVector vec =
- handle.GetSequenceView(loc,
- CBioseq_Handle::eViewConstructed,
- CBioseq_Handle::eCoding_Iupac);
-
- string seq;
- vec.GetSeqData( (TSeqPos) 0, vec.size(), seq );
- string& id_str = m_Dialog->SetCell(row, 0);
- string& loc_str = m_Dialog->SetCell(row, 1);
- const CSeq_id& best_id =
- sequence::GetId(handle, sequence::eGetId_Best);
- id_str.erase();
- best_id.GetLabel(&id_str);
- loc_str = CPluginUtils::GetLabel(loc, &doc.GetScope());
- // a new feature table
- CRef<CSeq_annot> annot(new CSeq_annot());
- // iterate over patterns
- list<string> pat_ids;
- patterns.EnumerateSections(&pat_ids);
- patterns_searched = 0;
- ITERATE (list<string>, pat_id, pat_ids) {
- if (*pat_id == "-") {
- continue;
- }
- patterns_searched++;
- string pattern = patterns.Get(*pat_id, "pattern");
- string desc = patterns.Get(*pat_id, "description");
- CFindPattern::Find(seq, pattern, starts, ends);
- for(unsigned int k = 0; k < starts.size(); k++) {
- string& pos_str = m_Dialog->SetCell(row, 2);
- // 1-based indexing for dialog
- pos_str = NStr::IntToString(starts[k] + 1) + " - "
- + NStr::IntToString(ends[k] + 1);
- m_Dialog->SetCell(row, 3) = *pat_id;
- m_Dialog->SetCell(row, 4) = desc;
- m_Dialog->SetCell(row, 5) = pattern;
- m_Dialog->SetCell(row, 6) =
- seq.substr(starts[k], ends[k] - starts[k] + 1);
- ++row;
- }
- //
- // add features to annot
- //
- for( unsigned int k = 0; k < starts.size(); k++) {
- // create feature
- CRef<CSeq_feat> feat(new CSeq_feat());
- // set correct location
- CSeq_loc& floc = feat->SetLocation();
- floc.SetInt().SetId().Assign(sequence::GetId(loc));
- floc.SetInt().SetFrom(starts[k]);
- floc.SetInt().SetTo (ends[k]);
- feat->SetLocation
- (*CSeqUtils::RemapChildToParent(loc, floc));
- // set feature data
- feat->SetData().SetRegion() = "Pattern match:: "
- + desc + "; " + *pat_id;
-
- // save in annot
- annot->SetData().SetFtable().push_back(feat);
- }
- }
- // add description to annot
- annot->SetName("Pattern matches");
- reply.AddObject(doc, *annot);
- }
- catch (CException& e) {
- string str = CPluginUtils::GetLabel(loc, &doc.GetScope());
- LOG_POST(Error << "Error processing location " << str
- << ": " << e.what());
- }
- #ifndef _DEBUG
- catch (...) {
- string str = CPluginUtils::GetLabel(loc, &doc.GetScope());
- LOG_POST(Error << "Error processing location " << str);
- }
- #endif
- }
- //
- // prepare our dialog box
- //
- m_Dialog->SetLabel(string("A search against ")
- + NStr::IntToString(patterns_searched)
- + " patterns produced:");
- m_Dialog->Show();
- reply.SetStatus(eMessageStatus_success);
- reply.AddAction(CPluginReplyAction::e_Add_to_document);
- }
- void CAlgoPlugin_NamedPatterns::x_UpdateFileList(void)
- {
- string dir;
- m_FileList.clear();
- CNcbiApplication* app = CNcbiApplication::Instance();
- _ASSERT(app);
- CNcbiRegistry& registry = app->GetConfig();
-
- if ( (dir = registry.Get("Patterns", "PatternPath")).empty() ) {
- registry.Set("Patterns", "PatternPath", "<std>, <home>",
- CNcbiRegistry::ePersistent, " default external_path");
- }
- dir = registry.Get("Patterns", "PatternPath");
-
- list<string> paths;
- NStr::Split(dir, ", tnr", paths);
- ITERATE (list<string>, iter, paths) {
- string dir_name;
- if (*iter == "<std>" || *iter == "<home>") {
- dir_name = CSystemPath::ResolvePath(*iter, "etc/patterns");
- } else {
- dir_name = CSystemPath::ResolvePath(*iter, "");
- }
- if ( dir_name.empty() ) {
- continue;
- }
-
- CDir dir(dir_name);
- if ( !dir.Exists() ) {
- continue;
- }
- CDir::TEntries entries = dir.GetEntries("*.ini");
- ITERATE (CDir::TEntries, entry_iter, entries) {
- if ( !(*entry_iter)->IsFile() ) {
- continue;
- }
-
- string full_path = (*entry_iter)->GetPath();
- CNcbiIfstream reg_stream(full_path.c_str());
- CNcbiRegistry my_reg(reg_stream);
- list<string> sections;
- string name = my_reg.Get("-", "name");
- if (!name.empty()) {
- m_FileList[name] = full_path;
- }
- }
- }
- }
- END_NCBI_SCOPE
- /*
- * ===========================================================================
- * $Log: pattern2.cpp,v $
- * Revision 1000.1 2004/06/01 20:55:26 gouriano
- * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.9
- *
- * Revision 1.9 2004/05/21 22:27:47 gorelenk
- * Added PCH ncbi_pch.hpp
- *
- * Revision 1.8 2004/05/03 13:05:42 dicuccio
- * gui/utils --> gui/objutils where needed
- *
- * Revision 1.7 2004/03/05 17:35:37 dicuccio
- * Use sequence::GetId() instead of CSeq_id::GetStringDescr()
- *
- * Revision 1.6 2004/02/17 20:35:25 rsmith
- * moved core/settings.[ch]pp and core/system_path.[ch]pp to config and utils, respectively.
- *
- * Revision 1.5 2004/02/04 15:27:48 jcherry
- * patterns->etc/patterns
- *
- * Revision 1.4 2004/01/27 18:37:56 dicuccio
- * Code clean-up. Use standard names for plugins. Removed unnecessary #includes
- *
- * Revision 1.3 2004/01/21 23:44:45 jcherry
- * Present a menu that includes sets of patterns from all *.ini files
- * in certain directories (uses new FinalizeArgs() API)
- *
- * Revision 1.2 2004/01/07 15:50:37 dicuccio
- * Adjusted for API change in CPluginUtils::GetLabel(). Standardized exception
- * reporting in algorithms.
- *
- * Revision 1.1 2003/12/16 22:22:16 jcherry
- * Initial version
- *
- * ===========================================================================
- */