cpg_islands.cpp
上传用户:yhdzpy8989
上传日期:2007-06-13
资源大小:13604k
文件大小:10k
- /*
- * ===========================================================================
- * PRODUCTION $Log: cpg_islands.cpp,v $
- * PRODUCTION Revision 1000.5 2004/06/01 20:54:52 gouriano
- * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.20
- * PRODUCTION
- * ===========================================================================
- */
- /* $Id: cpg_islands.cpp,v 1000.5 2004/06/01 20:54:52 gouriano Exp $
- * ===========================================================================
- *
- * PUBLIC DOMAIN NOTICE
- * National Center for Biotechnology Information
- *
- * This software/database is a "United States Government Work" under the
- * terms of the United States Copyright Act. It was written as part of
- * the author's official duties as a United States Government employee and
- * thus cannot be copyrighted. This software/database is freely available
- * to the public for use. The National Library of Medicine and the U.S.
- * Government have not placed any restriction on its use or reproduction.
- *
- * Although all reasonable efforts have been taken to ensure the accuracy
- * and reliability of the software and data, the NLM and the U.S.
- * Government do not and cannot warrant the performance or results that
- * may be obtained by using this software or data. The NLM and the U.S.
- * Government disclaim all warranties, express or implied, including
- * warranties of performance, merchantability or fitness for any particular
- * purpose.
- *
- * Please cite the author in any work or product based on this material.
- *
- * ===========================================================================
- *
- * Author: Philip Johnson
- *
- * File Description: cpg_island -- workbench algorithm plugin for
- * finding CpG islands
- *
- */
- #include <ncbi_pch.hpp>
- #include "cpg_islands.hpp"
- #include <algo/sequence/cpg.hpp>
- #include <gui/core/doc_manager.hpp>
- #include <gui/core/idocument.hpp>
- #include <gui/core/plugin_utils.hpp>
- #include <gui/core/version.hpp>
- #include <gui/plugin/PluginCommandSet.hpp>
- #include <gui/plugin/PluginInfo.hpp>
- #include <gui/plugin/PluginRequest.hpp>
- #include <gui/plugin/PluginValueConstraint.hpp>
- #include <gui/objutils/utils.hpp>
- #include <objects/seq/Seq_annot.hpp>
- #include <objects/seqfeat/Seq_feat.hpp>
- #include <objects/seqloc/Seq_interval.hpp>
- #include <serial/iterator.hpp>
- #include <objmgr/util/sequence.hpp>
- #include <objmgr/seq_vector.hpp>
- BEGIN_NCBI_SCOPE
- USING_SCOPE(objects);
- CAlgoPlugin_CpGIslands::~CAlgoPlugin_CpGIslands()
- {
- }
- // standard info boilerplate
- void CAlgoPlugin_CpGIslands::GetInfo(CPluginInfo& info)
- {
- info.Reset();
- // version info macro
- info.SetInfo(CPluginVersion::eMajor, CPluginVersion::eMinor, 0,
- string(__DATE__) + " " + string(__TIME__),
- "CAlgoPlugin_CpGIslands", "Composition/CpG islands",
- "Scans for regions of unusually high CpG density.",
- "");
- // command info
- CPluginCommandSet& cmds = info.SetCommands();
- CPluginCommand& args = cmds.AddAlgoCommand(eAlgoCommand_run);
- args.AddDefaultArgument("windowsize", "Size of sliding window",
- CPluginArg::eInteger, "200");
- args.AddDefaultArgument("minlen", "Minimum length of island",
- CPluginArg::eInteger, "500");
- args.AddDefaultArgument("gc", "Minimum %G+%C",
- CPluginArg::eDouble, "0.5");
- args.AddDefaultArgument("cpg", "Minimum observed/expected CpG ratio",
- CPluginArg::eDouble, "0.6");
- args.AddOptionalArgument("merge", "Merge adjacent islands within XXX",
- CPluginArg::eInteger);
- args.AddArgument("locs", "Locations to scan", CSeq_loc::GetTypeInfo(),
- CPluginArg::TData::e_Array);
- args.SetConstraint("locs",
- (*CPluginValueConstraint::CreateSeqMol(),
- CSeq_inst::eMol_na,
- CSeq_inst::eMol_dna,
- CSeq_inst::eMol_rna));
- }
- //-----------------------------------------------------------------------------
- // PRE :
- // POST:
- void CAlgoPlugin_CpGIslands::RunCommand(CPluginMessage& msg)
- {
- const CPluginCommand& args = msg.GetRequest().GetCommand();
- CPluginReply& reply = msg.SetReply();
- if ( !m_Dialog.get() ) {
- m_Dialog.reset(new CMultiColDlg());
- m_Dialog->SetTitle("CpG Islands");
- m_Dialog->SetColumn(0, "Sequence", FL_ALIGN_LEFT, 0.25f);
- m_Dialog->SetColumn(1, "Location", FL_ALIGN_LEFT, 0.50f);
- m_Dialog->SetColumn(2, "%GC", FL_ALIGN_CENTER, 0.25f);
- }
- size_t sum_isles = 0;
- size_t row = 0;
- plugin_args::TLocList locs;
- GetArgValue(args["locs"], locs);
- ITERATE (plugin_args::TLocList, iter, locs) {
- const CSeq_loc& loc = *iter->second;
- const IDocument& doc = *iter->first;
- //first get bases
- CBioseq_Handle handle = doc.GetScope().GetBioseqHandle(loc);
- CSeqVector sv = handle
- .GetSequenceView(loc, CBioseq_Handle::eViewConstructed,
- CBioseq_Handle::eCoding_Iupac);
- string data;
- sv.GetSeqData(0, sv.size(), data);
- //find islands
- CCpGIslands isles(data.data(), data.size(),
- args["windowsize"].AsInteger(),
- args["minlen"].AsInteger(),
- args["gc"].AsDouble(),
- args["cpg"].AsDouble());
- //report islands
- if (!isles.GetIsles().empty()) {
- sum_isles += isles.GetIsles().size();
- string& id_str = m_Dialog->SetCell(row, 0);
- string& loc_str = m_Dialog->SetCell(row, 1);
- const CSeq_id& best_id =
- sequence::GetId(handle, sequence::eGetId_Best);
- id_str.erase();
- best_id.GetLabel(&id_str);
- loc_str = CPluginUtils::GetLabel(loc, &doc.GetScope());
- CRef<CSeq_annot> sa(new CSeq_annot);
- sa->AddName("CpG islands");
- sa->AddTitle(string("CpG islands on ") + loc_str);
- CNcbiOstrstream oss;
- oss << "windowsize: " << args["windowsize"].AsInteger()
- << ", minlen: " << args["minlen"].AsInteger()
- << ", min %gc: " << args["gc"].AsDouble()
- << ", min observed/expected CpG: "
- << args["cpg"].AsDouble();
- sa->AddComment(CNcbiOstrstreamToString(oss));
- CSeq_annot::C_Data::TFtable &feats = sa->SetData().SetFtable();
- ITERATE (CCpGIslands::TIsles, i, isles.GetIsles()) {
- CRef<CSeq_feat> feat(new CSeq_feat);
- CSeq_interval &seqInt = feat->SetLocation().SetInt();
- seqInt.SetFrom(i->m_Start);
- seqInt.SetTo(i->m_Stop);
- seqInt.SetId().Assign(sequence::GetId(loc));
- CRef<CSeq_loc> new_loc =
- CSeqUtils::RemapChildToParent(loc, feat->GetLocation());
- feat->SetLocation(*new_loc);
- size_t size = (i->m_Stop - i->m_Start);
- size_t pct_gc = (i->m_C + i->m_G) * 100 / size;
- string& gc_str = m_Dialog->SetCell(row, 2);
- gc_str = NStr::IntToString(pct_gc);
- string& label = feat->SetData().SetRegion();
- label = "CpG island: " + gc_str;
- label += "% GC, ";
- label += NStr::IntToString(size);
- label += " bases";
- feats.push_back(feat);
- ++row;
- }
- reply.AddObject(doc, *sa);
- }
- }
- if (reply.GetRaw().size() != 0) {
- reply.AddAction(CPluginReplyAction::e_Add_to_document);
- }
- string label("Found ");
- label += NStr::IntToString(sum_isles);
- label += " isles on ";
- label += NStr::IntToString(locs.size());
- label += " locations:";
- m_Dialog->SetLabel(label);
- m_Dialog->SetRows(row);
- m_Dialog->Show();
- reply.SetStatus(eMessageStatus_success);
- }
- END_NCBI_SCOPE
- /*
- * ---------------------------------------------------------------------------
- * $Log: cpg_islands.cpp,v $
- * Revision 1000.5 2004/06/01 20:54:52 gouriano
- * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.20
- *
- * Revision 1.20 2004/05/21 22:27:46 gorelenk
- * Added PCH ncbi_pch.hpp
- *
- * Revision 1.19 2004/05/03 13:05:42 dicuccio
- * gui/utils --> gui/objutils where needed
- *
- * Revision 1.18 2004/03/05 17:35:37 dicuccio
- * Use sequence::GetId() instead of CSeq_id::GetStringDescr()
- *
- * Revision 1.17 2004/01/27 18:37:36 dicuccio
- * Code clean-up. Use standard names for plugins. Removed unnecessary #includes
- *
- * Revision 1.16 2004/01/07 15:50:36 dicuccio
- * Adjusted for API change in CPluginUtils::GetLabel(). Standardized exception
- * reporting in algorithms.
- *
- * Revision 1.15 2003/12/12 20:07:08 johnson
- * accommodate MSVC 7 refactoring
- *
- * Revision 1.14 2003/11/24 15:45:25 dicuccio
- * Renamed CVersion to CPluginVersion
- *
- * Revision 1.13 2003/11/18 17:48:36 dicuccio
- * Added standard processing of return values
- *
- * Revision 1.12 2003/11/06 20:12:12 dicuccio
- * Cleaned up handling of USING_SCOPE - removed from all headers
- *
- * Revision 1.11 2003/11/04 17:49:22 dicuccio
- * Changed calling parameters for plugins - pass CPluginMessage instead of paired
- * CPluginCommand/CPluginReply
- *
- * Revision 1.10 2003/10/14 12:52:26 dicuccio
- * Fixed remapping of relative locations
- *
- * Revision 1.9 2003/10/07 13:47:00 dicuccio
- * Renamed CPluginURL* to CPluginValue*
- *
- * Revision 1.8 2003/09/04 14:05:24 dicuccio
- * Use IDocument instead of CDocument
- *
- * Revision 1.7 2003/09/03 14:46:53 rsmith
- * change namespace name from args to plugin_args to avoid clashes with variable names.
- *
- * Revision 1.6 2003/08/21 12:02:31 dicuccio
- * Added dialog box to display results. Changed formatting of feature label
- *
- * Revision 1.5 2003/07/22 15:32:16 dicuccio
- * Changed to make use of new API in plugin_utils.hpp - GetArgValue()
- *
- * Revision 1.4 2003/07/21 19:32:53 dicuccio
- * Added constraints based on molecule type
- *
- * Revision 1.3 2003/07/19 13:53:35 ucko
- * Use CNcbiOstrstream rather than ostringstream, which GCC 2.9x lacks.
- *
- * Revision 1.2 2003/07/17 16:22:08 johnson
- * bug fix for msvc
- *
- * Revision 1.1 2003/07/16 17:19:40 johnson
- * Initial revision
- *
- * ===========================================================================
- */