entrez_db.cpp
上传用户:yhdzpy8989
上传日期:2007-06-13
资源大小:13604k
文件大小:10k
- /*
- * ===========================================================================
- * PRODUCTION $Log: entrez_db.cpp,v $
- * PRODUCTION Revision 1000.0 2004/06/01 21:26:30 gouriano
- * PRODUCTION PRODUCTION: IMPORTED [GCC34_MSVC7] Dev-tree R1.1
- * PRODUCTION
- * ===========================================================================
- */
- /* $Id: entrez_db.cpp,v 1000.0 2004/06/01 21:26:30 gouriano Exp $
- * ===========================================================================
- *
- * PUBLIC DOMAIN NOTICE
- * National Center for Biotechnology Information
- *
- * This software/database is a "United States Government Work" under the
- * terms of the United States Copyright Act. It was written as part of
- * the author's official duties as a United States Government employee and
- * thus cannot be copyrighted. This software/database is freely available
- * to the public for use. The National Library of Medicine and the U.S.
- * Government have not placed any restriction on its use or reproduction.
- *
- * Although all reasonable efforts have been taken to ensure the accuracy
- * and reliability of the software and data, the NLM and the U.S.
- * Government do not and cannot warrant the performance or results that
- * may be obtained by using this software or data. The NLM and the U.S.
- * Government disclaim all warranties, express or implied, including
- * warranties of performance, merchantability or fitness for any particular
- * purpose.
- *
- * Please cite the author in any work or product based on this material.
- *
- * ===========================================================================
- *
- * Authors: Mike DiCuccio
- *
- * File Description:
- * CDataPlugin_EntrezSearch - load sequence information form Genbank.
- */
- #include <ncbi_pch.hpp>
- #include "entrez_db.hpp"
- #include <objects/entrez2/Entrez2_id_list.hpp>
- #include <objects/entrez2/Entrez2_info.hpp>
- #include <objects/entrez2/Entrez2_eval_boolean.hpp>
- #include <objects/entrez2/Entrez2_boolean_reply.hpp>
- #include <objects/entrez2/Entrez2_boolean_exp.hpp>
- #include <objects/entrez2/Entrez2_boolean_element.hpp>
- #include <objects/entrez2/Entrez2_docsum.hpp>
- #include <objects/entrez2/Entrez2_docsum_list.hpp>
- #include <objects/entrez2/Entrez2_docsum_data.hpp>
- #include <objects/entrez2/Entrez2_db_id.hpp>
- #include <objects/entrez2/Entrez2_limits.hpp>
- #include <gui/core/version.hpp>
- BEGIN_NCBI_SCOPE
- USING_SCOPE(objects);
- //
- // db=nucleotide or db=protein
- //
- class CEntrezDB_NucProt : public CEntrezDBHandler
- {
- public:
- CEntrezDB_NucProt(const string& db, const string& visible_db)
- : m_DB(db),
- m_VisibleDB(visible_db) {}
- string GetDbName() const
- {
- return m_DB;
- }
- string GetVisibleDbName() const
- {
- return m_VisibleDB;
- }
- void GetHeaders(vector<SHeaderInfo>& headers) const
- {
- headers.push_back(SHeaderInfo("ID", 1.0));
- headers.push_back(SHeaderInfo("Description", 4.0));
- }
- void Format(const CEntrez2_docsum& ds,
- vector<string>& cols) const
- {
- cols.push_back(ds.GetValue("Caption"));
- cols.push_back(ds.GetValue("Title"));
- }
- // retrieve seq-ids for a given set of docsums
- TIds GetSeqIds(const CEntrez2_docsum_list& ds_list,
- TSeqIdFlags flags = fDefaults)
- {
- TIds ids;
- ITERATE (CEntrez2_docsum_list::TList, iter, ds_list.GetList()) {
- const CEntrez2_docsum& ds = **iter;
- CRef<CSeq_id> id(new CSeq_id());
- id->SetGi(ds.GetUid());
- ids.push_back(id);
- }
- return ids;
- }
- TIds GetSeqIds(const CEntrez2_docsum& ds,
- TSeqIdFlags flags = fDefaults)
- {
- TIds ids;
- CRef<CSeq_id> id(new CSeq_id());
- id->SetGi(ds.GetUid());
- ids.push_back(id);
- return ids;
- }
- private:
- string m_DB;
- string m_VisibleDB;
- };
- class CEntrezDB_Genome : public CEntrezDBHandler
- {
- public:
- string GetDbName() const
- {
- return "genome";
- }
- string GetVisibleDbName() const
- {
- return "Genome";
- }
- void GetHeaders(vector<SHeaderInfo>& headers) const
- {
- headers.push_back(SHeaderInfo("ID", 1.0));
- headers.push_back(SHeaderInfo("Description", 4.0));
- }
- void Format(const CEntrez2_docsum& ds,
- vector<string>& cols) const
- {
- cols.push_back(ds.GetValue("Caption"));
- cols.push_back(ds.GetValue("Title"));
- }
- // retrieve seq-ids for a given set of docsums
- TIds GetSeqIds(const CEntrez2_docsum_list& ds_list,
- TSeqIdFlags flags = fDefaults)
- {
- TIds ids;
- ITERATE (CEntrez2_docsum_list::TList, iter, ds_list.GetList()) {
- string str = (*iter)->GetValue("Extra");
- CRef<CSeq_id> id(new CSeq_id(str));
- if (id->Which() != CSeq_id::e_not_set) {
- ids.push_back(id);
- }
- }
- return ids;
- }
- TIds GetSeqIds(const CEntrez2_docsum& ds,
- TSeqIdFlags flags = fDefaults)
- {
- TIds ids;
- string str = ds.GetValue("Extra");
- CRef<CSeq_id> id(new CSeq_id(str));
- if (id->Which() != CSeq_id::e_not_set) {
- ids.push_back(id);
- }
- return ids;
- }
- };
- class CEntrezDB_Gene : public CEntrezDBHandler
- {
- public:
- string GetDbName() const
- {
- return "gene";
- }
- string GetVisibleDbName() const
- {
- return "Entrez Gene";
- }
- void GetHeaders(vector<SHeaderInfo>& headers) const
- {
- headers.push_back(SHeaderInfo("Symbol", 1.0));
- headers.push_back(SHeaderInfo("Description", 3.0));
- headers.push_back(SHeaderInfo("Organism", 3.0));
- }
- void Format(const CEntrez2_docsum& ds,
- vector<string>& cols) const
- {
- cols.push_back(ds.GetValue("Name"));
- cols.push_back(ds.GetValue("Description"));
- cols.push_back(ds.GetValue("Orgname"));
- }
- // retrieve seq-ids for a given set of docsums
- TIds GetSeqIds(const CEntrez2_docsum_list& ds_list,
- TSeqIdFlags flags = fDefaults)
- {
- TIds ids;
- return ids;
- }
- TIds GetSeqIds(const CEntrez2_docsum& ds,
- TSeqIdFlags flags = fDefaults)
- {
- TIds ids;
- return ids;
- }
- };
- class CEntrezDB_HomoloGene : public CEntrezDBHandler
- {
- public:
- string GetDbName() const
- {
- return "homologene";
- }
- string GetVisibleDbName() const
- {
- return "HomoloGene";
- }
- void GetHeaders(vector<SHeaderInfo>& headers) const
- {
- headers.push_back(SHeaderInfo("ID", 1.0));
- headers.push_back(SHeaderInfo("Symbol", 3.0));
- headers.push_back(SHeaderInfo("Name", 3.0));
- headers.push_back(SHeaderInfo("Organism", 3.0));
- }
- void Format(const CEntrez2_docsum& ds,
- vector<string>& cols) const
- {
- cols.push_back(ds.GetValue("GeneID"));
- cols.push_back(ds.GetValue("Symbol"));
- cols.push_back(ds.GetValue("Name"));
- cols.push_back(ds.GetValue("OrgName"));
- }
- // retrieve seq-ids for a given set of docsums
- TIds GetSeqIds(const CEntrez2_docsum_list& ds_list,
- TSeqIdFlags flags = fDefaults)
- {
- TIds ids;
- return ids;
- }
- TIds GetSeqIds(const CEntrez2_docsum& ds,
- TSeqIdFlags flags = fDefaults)
- {
- TIds ids;
- return ids;
- }
- };
- CRef<CEntrez2_docsum_list>
- CEntrezDBHandler::Query(const string& terms,
- size_t& total_uids,
- size_t start, size_t count)
- {
- try {
- CEntrez2Client& client = x_GetClient();
- CEntrez2_eval_boolean req;
- CEntrez2_boolean_exp& exp = req.SetQuery();
- // set the database we're querying
- exp.SetDb().Set(GetDbName());
- // set the query
- CRef<CEntrez2_boolean_element> elem(new CEntrez2_boolean_element());
- elem->SetStr(terms);
- exp.SetExp().push_back(elem);
- // get UIDs first, then get the docsums
- // set some limits - if num > 0, we assume it's correct
- if (start > 0) {
- exp.SetLimits().SetOffset_UIDs(start);
- }
- if (count > 0) {
- exp.SetLimits().SetMax_UIDs(count);
- }
- req.SetReturn_UIDs(true);
- CRef<CEntrez2_boolean_reply> query_res =
- x_GetClient().AskEval_boolean(req);
- if (query_res) {
- total_uids = query_res->GetCount();
- return client.AskGet_docsum(query_res->GetUids());
- }
- }
- catch (...) {
- }
- return CRef<CEntrez2_docsum_list>();
- }
- CEntrez2Client& CEntrezDBHandler::x_GetClient()
- {
- if ( !m_Client ) {
- m_Client.Reset(new CEntrez2Client());
- string tool = "Genome Workbench v.";
- tool += NStr::IntToString(CPluginVersion::eMajor);
- tool += ".";
- tool += NStr::IntToString(CPluginVersion::eMinor);
- m_Client->SetDefaultRequest().SetTool(tool);
- }
- return *m_Client;
- }
- CEntrezDBManager::CEntrezDBManager()
- {
- RegisterHandler(*new CEntrezDB_NucProt("nucleotide", "Nucleotide"));
- RegisterHandler(*new CEntrezDB_NucProt("protein", "Protein"));
- RegisterHandler(*new CEntrezDB_Gene());
- RegisterHandler(*new CEntrezDB_Genome());
- /*RegisterHandler(*new CEntrezDB_HomoloGene());*/
- }
- void CEntrezDBManager::RegisterHandler(CEntrezDBHandler& handler)
- {
- m_Handlers[handler.GetVisibleDbName()] = CRef<CEntrezDBHandler>(&handler);
- }
- IEntrezDBHandler& CEntrezDBManager::GetHandler(const string& db) const
- {
- THandlers::const_iterator iter = m_Handlers.find(db);
- if (iter != m_Handlers.end()) {
- CRef<CEntrezDBHandler> ref = iter->second;
- return *ref;
- }
- throw runtime_error(string("database not found: ") + db);
- }
- void CEntrezDBManager::GetDatabases(list<SDbEntry>& entries)
- {
- ITERATE (THandlers, iter, m_Handlers) {
- SDbEntry entry;
- entry.db_name = iter->second->GetDbName();
- entry.visible_db_name = iter->second->GetVisibleDbName();
- entries.push_back(entry);
- }
- }
- END_NCBI_SCOPE
- /*
- * ===========================================================================
- * $Log: entrez_db.cpp,v $
- * Revision 1000.0 2004/06/01 21:26:30 gouriano
- * PRODUCTION: IMPORTED [GCC34_MSVC7] Dev-tree R1.1
- *
- * Revision 1.1 2004/05/25 17:15:38 dicuccio
- * Initial revision - moved over from old GenBank search
- *
- * ===========================================================================
- */