seqvec_bench.cpp
上传用户:yhdzpy8989
上传日期:2007-06-13
资源大小:13604k
文件大小:11k
- /*
- * ===========================================================================
- * PRODUCTION $Log: seqvec_bench.cpp,v $
- * PRODUCTION Revision 1000.1 2004/06/01 19:42:08 gouriano
- * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.2
- * PRODUCTION
- * ===========================================================================
- */
- /* $Id: seqvec_bench.cpp,v 1000.1 2004/06/01 19:42:08 gouriano Exp $
- * ===========================================================================
- *
- * PUBLIC DOMAIN NOTICE
- * National Center for Biotechnology Information
- *
- * This software/database is a "United States Government Work" under the
- * terms of the United States Copyright Act. It was written as part of
- * the author's official duties as a United States Government employee and
- * thus cannot be copyrighted. This software/database is freely available
- * to the public for use. The National Library of Medicine and the U.S.
- * Government have not placed any restriction on its use or reproduction.
- *
- * Although all reasonable efforts have been taken to ensure the accuracy
- * and reliability of the software and data, the NLM and the U.S.
- * Government do not and cannot warrant the performance or results that
- * may be obtained by using this software or data. The NLM and the U.S.
- * Government disclaim all warranties, express or implied, including
- * warranties of performance, merchantability or fitness for any particular
- * purpose.
- *
- * Please cite the author in any work or product based on this material.
- *
- * ===========================================================================
- *
- * Author: Mike DiCuccio
- *
- * File Description:
- * Performance tests for various iterators
- */
- #include <ncbi_pch.hpp>
- #include <corelib/ncbistd.hpp>
- #include <corelib/ncbiapp.hpp>
- #include <corelib/ncbienv.hpp>
- #include <corelib/ncbiargs.hpp>
- #include <corelib/ncbitime.hpp>
- #include <connect/ncbi_core_cxx.hpp>
- // Object Manager includes
- #include <objmgr/bioseq_handle.hpp>
- #include <objmgr/seq_vector.hpp>
- #include <objmgr/scope.hpp>
- #include <objmgr/object_manager.hpp>
- #include <objtools/data_loaders/genbank/gbloader.hpp>
- #include <serial/serial.hpp>
- #include <serial/objistrasn.hpp>
- #include <util/md5.hpp>
- #include <vector>
- using namespace ncbi;
- using namespace objects;
- /////////////////////////////////////////////////////////////////////////////
- //
- // Demo application
- //
- class CSeqVecBench : public CNcbiApplication
- {
- public:
- virtual void Init(void);
- virtual int Run (void);
- void start(const char* name);
- void start_iter(void);
- void end_iter(void);
- void end(void);
- void end_all(void);
- CStopWatch sw;
- vector<string> names;
- vector<double> times;
- bool get_best;
- unsigned test_count;
- unsigned counter;
- static int result_mask;
- };
- void CSeqVecBench::Init(void)
- {
- // Prepare command line descriptions
- auto_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
- // GI to fetch
- arg_desc->AddOptionalKey("id", "Accession",
- "sequence id to load and test",
- CArgDescriptions::eString);
- arg_desc->AddDefaultKey("iters", "Iterations",
- "number of iterations to run",
- CArgDescriptions::eInteger, "5");
- arg_desc->AddFlag("average", "collect average time instead of minumum");
- arg_desc->AddFlag("iupac", "test Iupac coding");
- arg_desc->AddFlag("minus", "test minus strand of sequence");
- // Pass argument descriptions to the application
- //
- SetupArgDescriptions(arg_desc.release());
- }
- void CSeqVecBench::start(const char* n)
- {
- names.push_back(n);
- times.push_back(0);
- cout << "Running test: " << names.back() << "..." << endl;
- test_count = 0;
- }
- inline
- void CSeqVecBench::start_iter(void)
- {
- counter = 0;
- sw.Start();
- }
- inline
- void CSeqVecBench::end_iter(void)
- {
- double time = sw.Elapsed();
- if ( get_best ) {
- if ( test_count == 0 || time < times.back() ) {
- times.back() = time;
- test_count = 1;
- }
- }
- else {
- times.back() += time;
- test_count += 1;
- }
- }
- void CSeqVecBench::end(void)
- {
- times.back() /= test_count;
- cout << setw(40) << names.back() << " : " <<
- counter << " bases in " << times.back() << " secs" << endl;
- }
- void CSeqVecBench::end_all(void)
- {
- cout << endl << "Normalized to "<<names.front()<<":" << endl;
- for ( size_t i = 1; i < names.size(); ++i ) {
- cout << setw(40) << names[i] << " : " << (times[i] / times[0]) << endl;
- }
- }
- int CSeqVecBench::result_mask = 0;
- int CSeqVecBench::Run(void)
- {
- CArgs args = GetArgs();
- CSeq_id id(args["id"].AsString());
- if (id.Which() == CSeq_id::e_not_set) {
- LOG_POST(Fatal << "seq id " << args["id"].AsString()
- << " not recognized");
- }
- // Setup application registry, error log, and MT-lock for CONNECT library
- CONNECT_Init(&GetConfig());
- CRef<CObjectManager> object_manager(new CObjectManager());
- object_manager->RegisterDataLoader(*new CGBDataLoader(),
- CObjectManager::eDefault);
- CScope scope(*object_manager);
- scope.AddDefaults();
- CBioseq_Handle handle = scope.GetBioseqHandle(id);
- if ( !handle ) {
- LOG_POST(Fatal << "failed to retrieve sequence "
- << args["id"].AsString());
- }
- // number of iterations we perform
- int iters = args["iters"].AsInteger();
- CBioseq_Handle::EVectorCoding coding = args["iupac"]?
- CBioseq_Handle::eCoding_Iupac: CBioseq_Handle::eCoding_Ncbi;
- CBioseq_Handle::EVectorStrand strand = args["minus"]?
- CBioseq_Handle::eStrand_Minus: CBioseq_Handle::eStrand_Plus;
- get_best = !args["average"];
- //
- // we ignore the first iteration, as additional fetching may occur here
- //
- {{
- cout << "forcing retrieval of whole sequence..." << endl;
- sw.Start();
- string sequence;
- CSeqVector vec = handle.GetSeqVector(coding, strand);
- vec.GetSeqData(0, vec.size(), sequence);
- cout << " sequence is " << sequence.length() << " bases" << endl;
- CMD5 sum;
- sum.Update(sequence.data(), sequence.size());
- cout << " md5 sum is: " << sum.GetHexSum() << endl;
- double load_time = sw.Elapsed();
- cout << " loaded in " << load_time << " seconds" << endl;
- }}
- unsigned char mask = 0;
- int i;
- //
- // test 4: bulk retrieval
- //
- start("only string::const_iterator");
- {{
- CSeqVector vec = handle.GetSeqVector(coding, strand);
- string str;
- vec.GetSeqData(0, vec.size(), str);
- const int mul1 = 1;
- const int mul2 = 3;
- for (i = iters*mul1; i; --i) {
- start_iter();
- for ( int j = 0; j < mul2; ++j ) {
- ITERATE(string, iter, str) {
- mask ^= *iter;
- ++counter;
- }
- }
- end_iter();
- }
- counter /= mul2;
- times.back() /= mul2;
- }}
- end();
- //
- // test 1: iterate CSeqVector using operator[]
- //
- start("CSeqVector::operator[]");
- for (i = 0; i < iters; ++i) {
- start_iter();
- CSeqVector vec = handle.GetSeqVector(coding, strand);
- for (size_t j = 0; j < vec.size(); ++j, ++counter) {
- mask ^= vec[j];
- }
- end_iter();
- }
- end();
- //
- // test 1a: iterate CSeqVector using operator[]
- //
- start("double CSeqVector::operator[]");
- for (i = 0; i < iters; ++i) {
- start_iter();
- CSeqVector vec = handle.GetSeqVector(coding, strand);
- for (size_t j = 0; j < vec.size(); ++j, ++counter) {
- mask ^= vec[j];
- mask ^= vec[j];
- }
- end_iter();
- }
- end();
- //
- // test 2: bulk retrieval
- //
- start("GetSeqData() + string::operator[]");
- for (i = 0; i < iters; ++i) {
- start_iter();
- CSeqVector vec = handle.GetSeqVector(coding, strand);
- string str;
- vec.GetSeqData(0, vec.size(), str);
- for (size_t j = 0; j < str.size(); ++j, ++counter) {
- mask ^= str[j];
- }
- end_iter();
- }
- end();
- //
- // test 3: bulk retrieval
- //
- start("GetSeqData() + string::const_iterator");
- for (i = 0; i < iters; ++i) {
- start_iter();
- CSeqVector vec = handle.GetSeqVector(coding, strand);
- string str;
- vec.GetSeqData(0, vec.size(), str);
- ITERATE(string, iter, str) {
- mask ^= *iter;
- ++counter;
- }
- end_iter();
- }
- end();
- //
- // test 4: bulk retrieval
- //
- start("only string::const_iterator");
- {{
- CSeqVector vec = handle.GetSeqVector(coding, strand);
- string str;
- vec.GetSeqData(0, vec.size(), str);
- const int mul1 = 1;
- const int mul2 = 3;
- for (i = iters*mul1; i; --i) {
- start_iter();
- for ( int j = 0; j < mul2; ++j ) {
- ITERATE(string, iter, str) {
- mask ^= *iter;
- ++counter;
- }
- }
- end_iter();
- }
- counter /= mul2;
- times.back() /= mul2;
- }}
- end();
- //
- // test 5: CSeqVector_CI
- //
- start("CSeqVector_CI");
- for (i = 0; i < iters; ++i) {
- start_iter();
- CSeqVector vec = handle.GetSeqVector(coding, strand);
- for ( CSeqVector_CI iter(vec); iter; ++iter, ++counter ) {
- mask ^= *iter;
- }
- end_iter();
- }
- end();
- //
- // test 6: ITERATE
- //
- start("ITERATE");
- for (i = 0; i < iters; ++i) {
- start_iter();
- CSeqVector vec = handle.GetSeqVector(coding, strand);
- ITERATE ( CSeqVector, iter, vec ) {
- mask ^= *iter;
- ++counter;
- }
- end_iter();
- }
- end();
- end_all();
- result_mask ^= mask;
-
- return 0;
- }
- /////////////////////////////////////////////////////////////////////////////
- // MAIN
- int main(int argc, const char* argv[])
- {
- return CSeqVecBench().AppMain(argc, argv, 0, eDS_Default, 0);
- }
- /*
- * ===========================================================================
- * $Log: seqvec_bench.cpp,v $
- * Revision 1000.1 2004/06/01 19:42:08 gouriano
- * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.2
- *
- * Revision 1.2 2004/05/21 21:42:52 gorelenk
- * Added PCH ncbi_pch.hpp
- *
- * Revision 1.1 2003/12/16 17:51:16 kuznets
- * Code reorganization
- *
- * Revision 1.2 2003/10/22 17:57:45 vasilche
- * Some code cleaning.
- * Added '-average' option.
- *
- * Revision 1.1 2003/08/29 13:34:48 vasilche
- * Rewrote CSeqVector/CSeqVector_CI code to allow better inlining.
- * CSeqVector::operator[] made significantly faster.
- * Added possibility to have platform dependent byte unpacking functions.
- *
- * Revision 1.1 2003/08/09 13:13:06 dicuccio
- * Initial revision
- *
- * ===========================================================================
- */