test_pin.cpp
上传用户:yhdzpy8989
上传日期:2007-06-13
资源大小:13604k
文件大小:40k
源码类别:

生物技术

开发平台:

C/C++

  1. /*
  2.  * ===========================================================================
  3.  * PRODUCTION $Log: test_pin.cpp,v $
  4.  * PRODUCTION Revision 1000.1  2004/06/01 19:47:01  gouriano
  5.  * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.19
  6.  * PRODUCTION
  7.  * ===========================================================================
  8.  */
  9. #include <ncbi_pch.hpp>
  10. #include <objtools/readers/seqdb/seqdb.hpp>
  11. #include <serial/serial.hpp>
  12. #include <serial/objostr.hpp>
  13. #include <iostream>
  14. #include <string>
  15. #include <corelib/ncbimtx.hpp>
  16. #include <objmgr/util/sequence.hpp>
  17. #include <objects/seqloc/Seq_id.hpp>
  18. #include <sys/types.h>
  19. #include <sys/wait.h>
  20. #include <assert.h>
  21. USING_NCBI_SCOPE;
  22. #include <sys/time.h>
  23. #include <unistd.h>
  24. inline double dbl_time(void)
  25. {
  26.     struct timeval tv;
  27.     gettimeofday(& tv, 0);
  28.     
  29.     return tv.tv_sec + double(tv.tv_usec) / 1000000.0;
  30. }
  31. struct charbox {
  32.     char xyz[10234];
  33. };
  34. //#include "thr_test.cpp"
  35. int hang10()
  36. {
  37.     if (long(& dbl_time) == 1102000L) {
  38.         cout << "inconceivable!" << endl;
  39.     }
  40.     return 0;
  41. }
  42. class CAccelThread {
  43. public:
  44.     CAccelThread(CSeqDB & db)
  45.         : _db(db)
  46.     {
  47.     }
  48.     
  49.     void RunInline(void)
  50.     {
  51. //         _pid = fork();
  52.         
  53. //         if (! _pid) {
  54.             _Iterate();
  55.             exit(0);
  56. //         }
  57.     }
  58.     
  59.     void Run(void)
  60.     {
  61.         _pid = fork();
  62.         
  63.         if (! _pid) {
  64.             _Iterate();
  65.             exit(0);
  66.         }
  67.     }
  68.     
  69.     void Wait(void)
  70.     {
  71.         if (_pid) {
  72.             waitpid(_pid, 0, 0);
  73.             _pid = 0;
  74.         }
  75.     }
  76.     
  77. private:
  78.     void _Iterate(void)
  79.     {
  80.         Uint4 oidx = 0;
  81.         
  82.         try {
  83.             Uint4 oid = 0;
  84.             const char * buf = 0;
  85.         
  86.             Uint4 hashesque = 0;
  87.         
  88.             Uint4 part = _db.GetNumSeqs() / 40;
  89.             Uint4 parts = part;
  90.         
  91.             cout << "ITER:" << flush;
  92.         
  93.             while(_db.CheckOrFindOID(oid)) {
  94.                 if (oid > parts) {
  95.                     parts += part;
  96.                     cout << "+" << flush;
  97.                 }
  98.                 
  99.                 /*Uint4 oid_len =*/ _db.GetSequence(oid, & buf);
  100.                 
  101. //                 for(Uint4 ii = 0; ii < oid_len; ii += 511) {
  102. //                     hashesque += buf[ii];
  103. //                 }
  104.             
  105.                 _db.RetSequence(& buf);
  106.             
  107.                 oid ++;
  108.                 oidx = oid;
  109.                 
  110.                 if (oid > 2763324) {
  111.                     x_PossiblyStopMaybe();
  112.                 }
  113.             }
  114.             _hash = hashesque;
  115.             cout << "!" << endl;
  116.         }
  117.         catch(...) {
  118.             cout << "***" << oidx << "-" << _db.GetNumSeqs() << endl;
  119.         }
  120.     }
  121.     
  122.     void x_PossiblyStopMaybe(void)
  123.     {
  124.         //cout << " It is not widely known that I once leveled Rome. " << endl;
  125.     }
  126.     
  127.     CSeqDB & _db;
  128.     Uint4    _hash;
  129.     Uint4    _pid;
  130. };
  131. int test1(int argc, char ** argv)
  132. {
  133.     string dbpath = "/net/fridge/vol/export/blast/db/blast";
  134.     
  135.     list<string> args;
  136.     
  137.     while(argc > 1) {
  138.         args.push_front(string(argv[--argc]));
  139.     }
  140.     
  141.     bool use_mm        = true;
  142.     bool deletions     = true;
  143.     Int8 num_display   = -1;
  144.     Int4 num_itera     = 1;
  145.     bool look_seq      = false;
  146.     bool show_bioseq   = false;
  147.     bool show_fasta    = false;
  148.     bool get_bioseq    = false;
  149.     bool show_progress = true;
  150.     bool approx        = true;
  151.     bool accel_thread  = false;
  152.     bool accel_inline  = false;
  153.     
  154.     string dbname("nr");
  155.     char seqtype = kSeqTypeProt;
  156.     
  157.     bool failed      = false;
  158.     
  159.     while(! args.empty()) {
  160.         string desc;
  161.         
  162.         string s = args.front();
  163.         args.pop_front();
  164.         
  165.         if (s == "-accel1") {
  166.             s = "-accel2";
  167.             accel_thread = true;
  168.         } else desc += " [-accel1]";
  169.         
  170.         if (s == "-accel3") {
  171.             s = "-accel2";
  172.             accel_thread = true;
  173.             accel_inline = true;
  174.         } else desc += " [-accel1]";
  175.         
  176.         if (s == "-accel2") {
  177.             CSeqDB nt("nr", 'p');
  178.             
  179.             CAccelThread accel(nt);
  180.             
  181.             vector<Uint4> V;
  182.             
  183.             Uint4 numseq = nt.GetNumSeqs();
  184.             
  185.             for(Uint4 i = 0; i < numseq; i+=2) {
  186.                 // Pick a random location in the array; push the
  187.                 // value at that location onto the end.
  188.                 
  189.                 // Put i into the old location for that value.
  190.                 
  191.                 V.push_back(0);
  192.                 
  193.                 Uint4 loc = (rand() + (rand() << 16)) % V.size();
  194.                 
  195.                 V[i/2] = V[loc];
  196.                 V[loc] = i/2;
  197.             }
  198.             
  199.             double t_s(dbl_time());
  200.             
  201.             if (accel_thread) {
  202.                 if (accel_inline) {
  203.                     accel.RunInline();
  204.                 } else {
  205.                     accel.Run();
  206.                 }
  207.             }
  208.             
  209.             Uint4 part = V.size() / 40;
  210.             Uint4 parts = part;
  211.             
  212.             cout << "iter:" << flush;
  213.             
  214.             for(Uint4 i = 0; i < V.size(); i++) {
  215.                 if (i > parts) {
  216.                     parts += part;
  217.                     cout << "-" << flush;
  218.                 }
  219.                 
  220.                 Uint4 oid = V[i];
  221.                 
  222.                 const char * buf = 0;
  223.                 nt.GetSequence(oid, & buf);
  224.                 nt.RetSequence(& buf);
  225.             }
  226.             
  227.             double t_e(dbl_time());
  228.             
  229.             cout << "#" << endl;
  230.             cout << "time elapsed: " << (t_e - t_s) << endl;
  231.             
  232.             
  233.             double w_s(dbl_time());
  234.             
  235.             accel.Wait();
  236.             
  237.             double w_e(dbl_time());
  238.             
  239.             cout << "time elapsed: " << (w_e - w_s) << endl;
  240.             
  241.             return 0;
  242.         } else desc += " [-accel2]";
  243.         
  244.         if (s == "-fromcpp") {
  245.             const char * ge = getenv("BLASTDB");
  246.             string pe("BLASTDB=/home/bealer/code/ut/internal/blast/unit_test/data:" + string(ge ? ge : ""));
  247.             putenv((char*)pe.c_str());
  248.             
  249.             CSeqDB local1("seqp", 'p');
  250.             CSeqDB local2("seqp", 'p', 0, 0, false);
  251.             
  252.             Int4 num1 = local1.GetNumSeqs();
  253.             Int4 num2 = local1.GetNumSeqs();
  254.         
  255.             assert(num1 >= 1);
  256.             assert(num1 == num2);
  257.             
  258.             cout << "Test worked." << endl;
  259.             return 0;
  260.         } else desc += " [-fromcpp]";
  261.         
  262.         if (s == "-alphabeta") {
  263.             
  264.             // Note: this test is NOT EXPECTED to work, unless the
  265.             // user has databases named "alpha" and "beta" somewhere
  266.             // in their path.
  267.             
  268.             CSeqDB ab("alpha beta", 'p');
  269.             
  270.             for(Uint4 i = 0; i < ab.GetNumSeqs(); i++) {
  271.                 cout << "-----seq " << i << " length " << ab.GetSeqLength(i) << " ------------" << endl;
  272.             }
  273.             
  274.             return 0;
  275.         } else desc += " [-alphabeta]";
  276.         
  277.         if (s == "-here") {
  278.             CSeqDB nr("tenth", 'p');
  279.             
  280.             for(int i = 0; i<100; i++) {
  281.                 CRef<CBioseq> bs = nr.GetBioseq(i);
  282.                 
  283.                 cout << "-----seq " << i << "------------" << endl;
  284.                 
  285.                 auto_ptr<CObjectOStream>
  286.                     outpstr(CObjectOStream::Open(eSerial_AsnText, cout));
  287.                 
  288.                 *outpstr << *bs;
  289.             }
  290.             
  291.             return 0;
  292.         } else desc += " [-here]";
  293.         
  294.         if (s == "-dyn") {
  295.             CSeqDB db("nr", 'p');
  296.             
  297.             cout << "Num oids: " << db.GetNumSeqs() << endl;
  298.             
  299.             char * buf1 = 0;
  300.             
  301.             Int4 len = db.GetAmbigSeqAlloc(10,
  302.                                            & buf1,
  303.                                            kSeqDBNuclBlastNA8,
  304.                                            eNew);
  305.             
  306.             cout << "Length (10): " << len << endl;
  307.             
  308.             delete[] buf1;
  309.             
  310.             return 0;
  311.         } else desc += " [-dyn]";
  312.         
  313.         if (s == "-limit") {
  314.             {
  315.                 CSeqDB db("/home/bealer/seqdb/tenth", 'p', 10, 20, true);
  316.                 
  317.                 cout << "Num oids: " << db.GetNumSeqs() << endl;
  318.                 
  319.                 CSeqDBIter i = db.Begin();
  320.                 
  321.                 while(i) {
  322.                     CRef<CBioseq> bs = db.GetBioseq(i.GetOID());
  323.                     
  324.                     cout << "-----seq "
  325.                          << i.GetOID() << " length "
  326.                          << i.GetLength() << "-------" << endl;
  327.                     
  328.                     ++i;
  329.                 }
  330.             }
  331.             {
  332.                 CSeqDB db("swissprot", 'p', 135, 175, true);
  333.                 
  334.                 cout << "Num oids: " << db.GetNumSeqs() << endl;
  335.                 
  336.                 CSeqDBIter i = db.Begin();
  337.                 
  338.                 while(i) {
  339.                     CRef<CBioseq> bs = db.GetBioseq(i.GetOID());
  340.                     
  341.                     cout << "-----seq "
  342.                          << i.GetOID() << " length "
  343.                          << i.GetLength() << "-------" << endl;
  344.                     
  345.                     ++i;
  346.                 }
  347.             }
  348.             
  349.             return 0;
  350.         } else desc += " [-local]";
  351.         
  352.         if (s == "-local") {
  353.             CSeqDB nr("/home/bealer/seqdb/tenth", 'p');
  354.             
  355.             for(int i = 0; i<100; i++) {
  356.                 CRef<CBioseq> bs = nr.GetBioseq(i);
  357.                 
  358.                 cout << "-----seq " << i << "------------" << endl;
  359.                 
  360.                 auto_ptr<CObjectOStream>
  361.                     outpstr(CObjectOStream::Open(eSerial_AsnText, cout));
  362.                 
  363.                 *outpstr << *bs;
  364.             }
  365.             
  366.             return 0;
  367.         } else desc += " [-local]";
  368.         
  369.         if (s == "-seqids") {
  370.             CSeqDB nr(/*dbpath,*/ "nr", 'p');
  371.             
  372.             for(int i = 0; i<100; i++) {
  373.                 list< CRef<CSeq_id> > seqids =
  374.                     nr.GetSeqIDs(i);
  375.                 
  376.                 cout << "-----seq " << i << "------------" << endl;
  377.                 
  378.                 for(list< CRef<CSeq_id> >::iterator j = seqids.begin();
  379.                     j != seqids.end();
  380.                     j++) {
  381.                     
  382.                     cout << "SEQID----*:" << endl;
  383.                     
  384.                     auto_ptr<CObjectOStream>
  385.                         outpstr(CObjectOStream::Open(eSerial_AsnText, cout));
  386.                     
  387.                     CRef<CSeq_id> ident = *j;
  388.                     
  389.                     *outpstr << *ident;
  390.                 }
  391.             }
  392.             
  393.             return 0;
  394.         } else desc += " [-seqids]";
  395.         
  396.         if (s == "-memtest") {
  397.             CSeqDB nt("nt", 'n', 0, 0, false);
  398.             
  399.             Uint4 oid = 0;
  400.             
  401.             for(int i = 0; i<100; i++) {
  402.                 const char * buf(0);
  403.                 
  404.                 if (! nt.CheckOrFindOID(oid))
  405.                     break;
  406.                 
  407.                 cout << "-----------------" << endl;
  408.                 
  409.                 if (1) {
  410.                     int length = nt.GetSequence(oid, & buf);
  411.                     
  412.                     cout << "NT OID = " << oid << ", length is " << length << endl;
  413.                     
  414.                     int y = (length > 16) ? 16 : length;
  415.                     
  416.                     for(int x = 0; x < y; x++) {
  417.                         if ((x & 3) == 0)
  418.                             cout << " ";
  419.                         
  420.                         if (unsigned(buf[x]) < 0x10)
  421.                             cout << "0";
  422.                         
  423.                         cout << hex << (unsigned(buf[x]) & 0xFF) << " ";
  424.                     }
  425.                     
  426.                     cout << dec << "n";
  427.                     nt.RetSequence(& buf);
  428.                 }
  429.                 
  430.                 if (1) {
  431.                     int length = nt.GetAmbigSeq(oid, & buf, kSeqDBNuclNcbiNA8);
  432.                     
  433.                     int y = (length > 16) ? 16 : length;
  434.                 
  435.                     for(int x = 0; x < y; x++) {
  436.                         if ((x & 3) == 0)
  437.                             cout << " ";
  438.                     
  439.                         if (unsigned(buf[x]) < 0x10)
  440.                             cout << "0";
  441.                     
  442.                         cout << hex << (unsigned(buf[x]) & 0xFF) << " ";
  443.                     }
  444.                     
  445.                     cout << dec << "n";
  446.                     
  447.                     nt.RetSequence(& buf);
  448.                 }
  449.                 
  450.                 if (1) {
  451.                     int length = nt.GetAmbigSeq(oid, & buf, kSeqDBNuclBlastNA8);
  452.                 
  453.                     int y = (length > 16) ? 16 : length;
  454.                 
  455.                     for(int x = 0; x < y; x++) {
  456.                         if ((x & 3) == 0)
  457.                             cout << " ";
  458.                     
  459.                         if (unsigned(buf[x]) < 0x10)
  460.                             cout << "0";
  461.                     
  462.                         cout << hex << (unsigned(buf[x]) & 0xFF) << " ";
  463.                     }
  464.                     
  465.                     cout << dec << "nn";
  466.                     
  467.                     nt.RetSequence(& buf);
  468.                 }
  469.                 
  470.                 oid++;
  471.             }
  472.             
  473.             return 0;
  474.         } else desc += " [-memtest]";
  475.         
  476.         if (s == "-getambig") {
  477.             CSeqDB nt(/*dbpath,*/ "nt", 'n', 0, 0, false);
  478.             
  479.             Uint4 oid = 0;
  480.             
  481.             for(int i = 0; i<100; i++) {
  482.                 const char * buf(0);
  483.                 
  484.                 if (! nt.CheckOrFindOID(oid))
  485.                     break;
  486.                 
  487.                 cout << "-----------------" << endl;
  488.                 
  489.                 {
  490.                     int length = nt.GetSequence(oid, & buf);
  491.                     
  492.                     cout << "NT OID = " << oid << ", length is " << length << endl;
  493.                     
  494.                     int y = (length > 16) ? 16 : length;
  495.                     
  496.                     for(int x = 0; x < y; x++) {
  497.                         if ((x & 3) == 0)
  498.                             cout << " ";
  499.                         
  500.                         if (unsigned(buf[x]) < 0x10)
  501.                             cout << "0";
  502.                         
  503.                         cout << hex << (unsigned(buf[x]) & 0xFF) << " ";
  504.                     }
  505.                     
  506.                     cout << dec << "n";
  507.                     nt.RetSequence(& buf);
  508.                 }
  509.                 
  510.                 {
  511.                     int length = nt.GetAmbigSeq(oid, & buf, kSeqDBNuclNcbiNA8);
  512.                 
  513.                     int y = (length > 16) ? 16 : length;
  514.                 
  515.                     for(int x = 0; x < y; x++) {
  516.                         if ((x & 3) == 0)
  517.                             cout << " ";
  518.                     
  519.                         if (unsigned(buf[x]) < 0x10)
  520.                             cout << "0";
  521.                     
  522.                         cout << hex << (unsigned(buf[x]) & 0xFF) << " ";
  523.                     }
  524.                     
  525.                     cout << dec << "n";
  526.                     
  527.                     nt.RetSequence(& buf);
  528.                 }
  529.                 
  530.                 {
  531.                     int length = nt.GetAmbigSeq(oid, & buf, kSeqDBNuclBlastNA8);
  532.                 
  533.                     int y = (length > 16) ? 16 : length;
  534.                 
  535.                     for(int x = 0; x < y; x++) {
  536.                         if ((x & 3) == 0)
  537.                             cout << " ";
  538.                     
  539.                         if (unsigned(buf[x]) < 0x10)
  540.                             cout << "0";
  541.                     
  542.                         cout << hex << (unsigned(buf[x]) & 0xFF) << " ";
  543.                     }
  544.                     
  545.                     cout << dec << "nn";
  546.                     
  547.                     nt.RetSequence(& buf);
  548.                 }
  549.                 
  550.                 oid++;
  551.             }
  552.             
  553.             return 0;
  554.         } else desc += " [-getambig]";
  555.         
  556.         if (s == "-iter2") {
  557.             {
  558.                 CSeqDB phil(/*dbpath,*/ "swissprot pataa", 'p');
  559.             
  560.                 {
  561.                     CSeqDBIter skywalk = phil.Begin();
  562.                     for(int i = 0; i<20; i++) {
  563.                         cout << "### Seq [" << skywalk.GetOID() << "] length = " << skywalk.GetLength() << endl;
  564.                         ++skywalk;
  565.                     }
  566.                 }
  567.             
  568.                 {
  569.                     CSeqDBIter skywalk = phil.Begin();
  570.                     for(int i = 0; i<20; i++) {
  571.                         cout << "### Seq [" << skywalk.GetOID() << "] length = " << skywalk.GetLength() << endl;
  572.                         CRef<CBioseq> bioseq = phil.GetBioseq(skywalk.GetOID());
  573.                         auto_ptr<CObjectOStream> outpstr(CObjectOStream::Open(eSerial_AsnText, cout));
  574.                         *outpstr << *bioseq;
  575.                         ++skywalk;
  576.                     }
  577.                 }
  578.             }
  579.             {
  580.                 CSeqDB phil(/*dbpath,*/ "pataa swissprot", 'p');
  581.             
  582.                 {
  583.                     CSeqDBIter skywalk = phil.Begin();
  584.                     for(int i = 0; i<20; i++) {
  585.                         cout << "### Seq [" << skywalk.GetOID() << "] length = " << skywalk.GetLength() << endl;
  586.                         ++skywalk;
  587.                     }
  588.                 }
  589.             
  590.                 {
  591.                     CSeqDBIter skywalk = phil.Begin();
  592.                     for(int i = 0; i<20; i++) {
  593.                         cout << "### Seq [" << skywalk.GetOID() << "] length = " << skywalk.GetLength() << endl;
  594.                         CRef<CBioseq> bioseq = phil.GetBioseq(skywalk.GetOID());
  595.                         auto_ptr<CObjectOStream> outpstr(CObjectOStream::Open(eSerial_AsnText, cout));
  596.                         *outpstr << *bioseq;
  597.                         ++skywalk;
  598.                     }
  599.                 }
  600.             }
  601.             
  602.             return 0;
  603.         } else desc += " [-iter2]";
  604.         
  605.         if (s == "-iter") {
  606.             CSeqDB phil(/*dbpath,*/ "swissprot pdb", 'p');
  607.             
  608.             {
  609.                 CSeqDBIter skywalk = phil.Begin();
  610.                 for(int i = 0; i<200; i++) {
  611.                     cout << "### Seq [" << skywalk.GetOID() << "] length = " << skywalk.GetLength() << endl;
  612.                     ++skywalk;
  613.                 }
  614.             }
  615.             
  616.             if (0) {
  617.                 for(int i = 0; i<200; i++) {
  618.                     cout << "n### Seq [" << i << "] length = " << phil.GetSeqLength(i) << "n" << endl;
  619.                     CRef<CBioseq> bioseq = phil.GetBioseq(i);
  620.                     auto_ptr<CObjectOStream> outpstr(CObjectOStream::Open(eSerial_AsnText, cout));
  621.                     *outpstr << *bioseq;
  622.                 }
  623.             }
  624.             
  625.             {
  626.                 CSeqDBIter skywalk = phil.Begin();
  627.                 for(int i = 0; i<200; i++) {
  628.                     cout << "### Seq [" << skywalk.GetOID() << "] length = " << skywalk.GetLength() << endl;
  629.                     CRef<CBioseq> bioseq = phil.GetBioseq(skywalk.GetOID());
  630.                     auto_ptr<CObjectOStream> outpstr(CObjectOStream::Open(eSerial_AsnText, cout));
  631.                     *outpstr << *bioseq;
  632.                     ++skywalk;
  633.                 }
  634.             }
  635.             
  636.             if (0) {
  637.                 for(int i = 0; i<200; i++) {
  638.                     cout << "n### Seq [" << i << "] length = " << phil.GetSeqLength(i) << "n" << endl;
  639.                 }
  640.             }
  641.             
  642.             return 0;
  643.         } else desc += " [-iter]";
  644.         
  645.         if (s == "-iterpdb") {
  646.             CSeqDB phil(/*dbpath,*/ "pdb", 'p');
  647.             
  648.             {
  649.                 CSeqDBIter skywalk = phil.Begin();
  650.                 for(int i = 0; i<200; i++) {
  651.                     cout << "### Seq [" << skywalk.GetOID() << "] length = " << skywalk.GetLength() << endl;
  652.                     ++skywalk;
  653.                 }
  654.             }
  655.             
  656.             if (0) {
  657.                 for(int i = 0; i<100; i++) {
  658.                     cout << "n### Seq [" << i << "] length = " << phil.GetSeqLength(i) << "n" << endl;
  659.                     CRef<CBioseq> bioseq = phil.GetBioseq(i);
  660.                     auto_ptr<CObjectOStream> outpstr(CObjectOStream::Open(eSerial_AsnText, cout));
  661.                     *outpstr << *bioseq;
  662.                 }
  663.             }
  664.             
  665.             return 0;
  666.         } else desc += " [-iterpdb]";
  667.         
  668.         if (s == "-itersp") {
  669.             CSeqDB phil(/*dbpath,*/ "swissprot", 'p');
  670.             
  671.             {
  672.                 CSeqDBIter skywalk = phil.Begin();
  673.                 for(int i = 0; i<200; i++) {
  674.                     cout << "### Seq [" << skywalk.GetOID() << "] length = " << skywalk.GetLength() << endl;
  675.                     ++skywalk;
  676.                 }
  677.             }
  678.             
  679.             if (0) {
  680.                 for(int i = 0; i<100; i++) {
  681.                     cout << "n### Seq [" << i << "] length = " << phil.GetSeqLength(i) << "n" << endl;
  682.                     CRef<CBioseq> bioseq = phil.GetBioseq(i);
  683.                     auto_ptr<CObjectOStream> outpstr(CObjectOStream::Open(eSerial_AsnText, cout));
  684.                     *outpstr << *bioseq;
  685.                 }
  686.             }
  687.             
  688.             return 0;
  689.         } else desc += " [-itersp]";
  690.         
  691.         if (s == "-spcount") {
  692.             CSeqDB phil(/*dbpath,*/ "swissprot", 'p');
  693.             
  694.             double besht = 100.0;
  695.             double woist = 0.0;
  696.             double totul = 0.0;
  697.             
  698.             for(int i = 0; i<10; i++) {
  699.                 CSeqDBIter skywalk = phil.Begin();
  700.                 
  701.                 double spt1 = dbl_time();
  702.                 Uint8 mylen = 0;
  703.                 
  704.                 while(skywalk) {
  705.                     int this_oid = 0;
  706.                     //int this_len = 0;
  707.                     
  708.                     //mylen += (this_len = phil.GetSeqLength( this_oid = skywalk.GetOID() ));
  709.                     this_oid = skywalk.GetOID();
  710.                     ++ skywalk;
  711.                     mylen ++;
  712.                     
  713.                     //cout << this_oid << " is length " << this_len << endl;
  714.                 }
  715.                 
  716.                 double spt2  = dbl_time();
  717.                 double rezzy = spt2 - spt1;
  718.                 
  719.                 cout << "mylen " << mylen
  720.                      << " spt2 - spt1 = " << rezzy << endl;
  721.                 
  722.                 if (rezzy > woist)
  723.                     woist = rezzy;
  724.                 if (rezzy < besht)
  725.                     besht = rezzy;
  726.                 
  727.                 totul += rezzy;
  728.             }
  729.             
  730.             totul -= (besht + woist);
  731.             
  732.             cout << "Average = " << (totul/8.0) << endl;
  733.             
  734.             return 0;
  735.         } else desc += " [-spcount]";
  736.         
  737.         if (s == "-swiss") {
  738.             CSeqDB phil(/*dbpath,*/ "swissprot", 'p');
  739.              
  740.             {
  741.                 Uint8 tlen = 0;
  742.                 Uint4 numb = 0;
  743.                 
  744.                 CSeqDBIter skywalk = phil.Begin();
  745.                 
  746.                 {
  747.                     int i = 276;
  748.                     cout << "this_oid = " << i << " length = " << 0 << endl;
  749.                     cout << "n### Seq [" << i << "] length = " << phil.GetSeqLength(i) << "n" << endl;
  750.                     CRef<CBioseq> bioseq = phil.GetBioseq(i);
  751.                     
  752.                     auto_ptr<CObjectOStream> outpstr(CObjectOStream::Open(eSerial_AsnText, cout));
  753.                     *outpstr << *bioseq;
  754.                 }
  755.                 while(skywalk) {
  756.                     int this_oid = 0;
  757.                     int this_len = 0;
  758.                     
  759.                     numb ++;
  760.                     tlen += (this_len = phil.GetSeqLength( this_oid = skywalk.GetOID() ));
  761.                     ++ skywalk;
  762.                     
  763.                     cout << this_oid << endl;
  764.                     
  765.                     if (numb > 145680) {
  766.                         int i = this_oid;
  767.                         cout << "this_oid = " << this_oid << " length = " << this_len << endl;
  768.                         cout << "n### Seq [" << i << "] length = " << phil.GetSeqLength(i) << "n" << endl;
  769.                         CRef<CBioseq> bioseq = phil.GetBioseq(i);
  770.                         auto_ptr<CObjectOStream> outpstr(CObjectOStream::Open(eSerial_AsnText, cout));
  771.                         *outpstr << *bioseq;
  772.                     }
  773.                 }
  774.                 
  775.                 cout << "n### Total swissprot length [" << tlen << "] numb = " << numb << "n" << endl;
  776.             }
  777.              
  778.             return 0;
  779.         } else desc += " [-swiss]";
  780.         
  781.         
  782.         if (s == "-lib") {
  783.             CSeqDB phil(/*dbpath,*/ "nt nt month est", 'n');
  784.             phil.GetSeqLength(123);
  785.             phil.GetSeqLengthApprox(123);
  786.             phil.GetHdr(123);
  787.             phil.GetBioseq(123);
  788.             
  789.             const char * buffer = 0;
  790.             phil.GetSequence(123, & buffer);
  791.             phil.RetSequence(& buffer);
  792.             cout << "nSeq type:    " << phil.GetSeqType();
  793.             cout << "nTitle:       " << phil.GetTitle();
  794.             cout << "nDate:        " << phil.GetDate();
  795.             cout << "nNumSeqs:     " << phil.GetNumSeqs();
  796.             cout << "nTotalLength: " << phil.GetTotalLength();
  797.             cout << "nMax Length:  " << phil.GetMaxLength() << endl;
  798.             cout << endl;
  799.             
  800.             return 0;
  801.         } else desc += " [-lib]";
  802.         
  803.         if (s == "-summary") {
  804.             CSeqDB phil(/*dbpath,*/ "month", 'n');
  805.             cout << "dbpath: " << dbpath            << endl;
  806.             cout << "title:  " << phil.GetTitle()   << endl;
  807.             cout << "nseqs:  " << phil.GetNumSeqs() << endl;
  808.             cout << "tleng:  " << phil.GetTotalLength() << endl;
  809.             return 0;
  810.         } else desc += " [-summary]";
  811.         
  812. //         if (s == "-alias") {
  813. //             string dbname = "pdb";
  814.             
  815. //             if (! args.empty()) {
  816. //                 dbname = args.front();
  817. //                 args.pop_front(); 
  818. //             }
  819. //             string ending = "pal";
  820. //             if (! args.empty()) {
  821. //                 ending = args.front();
  822. //                 args.pop_front(); 
  823. //             }
  824.             
  825. //             ncbi::CSeqDBAliasNode phil(/*dbpath,*/ dbname, ending[0]);
  826.             
  827. //             vector<string> vols;
  828.             
  829. //             phil.GetVolumeNames(vols);
  830.             
  831. //             for(Uint4 i = 0; i<vols.size(); i++) {
  832. //                 cout << "[" << i << "] "
  833. //                      << vols[i] << endl;
  834. //             }
  835.             
  836. //             return 0;
  837. //         } else desc += " [-alias]";
  838.         
  839.         if (s == "-len3") {
  840.             string dbname1("nr");
  841.             string dbname2("pataa");
  842.             string dbname3(dbname1 + " " + dbname2);
  843.             
  844.             CSeqDB dbi1(/*dbpath,*/ dbname1, kSeqTypeProt, 0, 0, use_mm);
  845.             CSeqDB dbi2(/*dbpath,*/ dbname2, kSeqTypeProt, 0, 0, use_mm);
  846.             CSeqDB dbi3(/*dbpath,*/ dbname3, kSeqTypeProt, 0, 0, use_mm);
  847.             
  848.             cout << "1 " << dbi1.GetTotalLength() << endl;
  849.             cout << "2 " << dbi2.GetTotalLength() << endl;
  850.             cout << "3 " << dbi3.GetTotalLength() << endl;
  851.             cout << "---------------" << endl;
  852.             
  853.             Uint4 len1 = dbi1.GetNumSeqs();
  854.             Uint4 len2 = dbi2.GetNumSeqs();
  855.             Uint4 len3 = dbi3.GetNumSeqs();
  856.             
  857.             cout << "1 " << len1 << endl;
  858.             cout << "2 " << len2 << endl;
  859.             cout << "3 " << len3 << endl;
  860.             
  861.             Uint8 len_tot = 0;
  862.             
  863.             len_tot = 0;
  864.             for(Uint4 i = 0; i<len1; i++) {
  865.                 len_tot += dbi1.GetSeqLength(i);
  866.             }
  867.             cout << "total1 " << len_tot << endl;
  868.             
  869.             len_tot = 0;
  870.             for(Uint4 i = 0; i<len2; i++) {
  871.                 len_tot += dbi2.GetSeqLength(i);
  872.             }
  873.             cout << "total2 " << len_tot << endl;
  874.             
  875.             Uint8 x = len1 - 5;
  876.             
  877.             len_tot = 0;
  878.             for(Uint4 i = 0; i<len3; i++) {
  879.                 if (i > x)
  880.                     hang10();
  881.                 len_tot += dbi3.GetSeqLength(i);
  882.             }
  883.             cout << "total3 " << len_tot << endl;
  884.             
  885.             return 0;
  886.         } else desc += " [-len3]";
  887.         
  888.         if ((s == "-nt3") || (s == "-nt3a")) {
  889.             approx = false;
  890.             
  891.             if (s == "-nt3a") {
  892.                 approx = true;
  893.             }
  894.             
  895.             string dbname1 = "month";
  896.             string dbname2 = "est";
  897.             
  898.             string dbname3(dbname1 + " " + dbname2);
  899.             
  900.             CSeqDB dbi1(/*dbpath,*/ dbname1, kSeqTypeNucl, 0, 0, use_mm);
  901.             CSeqDB dbi2(/*dbpath,*/ dbname2, kSeqTypeNucl, 0, 0, use_mm);
  902.             CSeqDB dbi3(/*dbpath,*/ dbname3, kSeqTypeNucl, 0, 0, use_mm);
  903.             
  904.             cout << "1 " << dbi1.GetTotalLength() << endl;
  905.             cout << "2 " << dbi2.GetTotalLength() << endl;
  906.             cout << "3 " << dbi3.GetTotalLength() << endl;
  907.             cout << "---------------" << endl;
  908.             
  909.             Uint4 len1 = dbi1.GetNumSeqs();
  910.             Uint4 len2 = dbi2.GetNumSeqs();
  911.             Uint4 len3 = dbi3.GetNumSeqs();
  912.             len1 /= 10;
  913.             len2 /= 10;
  914.             len3 = len1 + len2;
  915.             
  916.             cout << "1 " << len1 << endl;
  917.             cout << "2 " << len2 << endl;
  918.             cout << "3 " << len3 << endl;
  919.             
  920.             Uint8 len_tot = 0;
  921.             
  922.             len_tot = 0;
  923.             
  924.             if (approx) {
  925.                 for(Uint4 i = 0; i<len1; i++) {
  926.                     len_tot += dbi1.GetSeqLengthApprox(i);
  927.                 }
  928.             } else {
  929.                 for(Uint4 i = 0; i<len1; i++) {
  930.                     len_tot += dbi1.GetSeqLength(i);
  931.                 }
  932.             }
  933.             cout << "total1 " << len_tot << endl;
  934.             
  935.             len_tot = 0;
  936.             if (approx) {
  937.                 for(Uint4 i = 0; i<len2; i++) {
  938.                     len_tot += dbi2.GetSeqLengthApprox(i);
  939.                 }
  940.             } else {
  941.                 for(Uint4 i = 0; i<len2; i++) {
  942.                     len_tot += dbi2.GetSeqLength(i);
  943.                 }
  944.             }
  945.             cout << "total2 " << len_tot << endl;
  946.             
  947.             len_tot = 0;
  948.             if (approx) {
  949.                 for(Uint4 i = 0; i<len3; i++) {
  950.                     len_tot += dbi3.GetSeqLengthApprox(i);
  951.                 }
  952.             } else {
  953.                 for(Uint4 i = 0; i<len3; i++) {
  954.                     len_tot += dbi3.GetSeqLength(i);
  955.                 }
  956.             }
  957.             cout << "total3 " << len_tot << endl;
  958.             
  959.             return 0;
  960.         } else desc += " [-nt3 | -nt3a]";
  961.         
  962.         if (s == "-mm") {
  963.             use_mm = true;
  964.             continue;
  965.         }
  966.         
  967.         if (s == "-file") {
  968.             use_mm = false;
  969.             continue;
  970.         } else desc += " [-mm | -file]";
  971.         
  972.         if (s == "-db2") {
  973.             dbname = "nr pataa";
  974.             continue;
  975.         } else desc += " [-db2]";
  976.         
  977.         if (s == "-aliasn") {
  978.             dbname = "month htgs";
  979.             seqtype = kSeqTypeNucl;
  980.             continue;
  981.         } else desc += " [-aliasn]";
  982.         
  983.         if (s == "-approx") {
  984.             approx = true;
  985.             continue;
  986.         } else desc += " [-approx]";
  987.         
  988. //         if (s == "-ambig") {
  989. //             dbname = "/home/bealer/seqdb/dbs/ambig";
  990. //             continue;
  991. //         } else desc += " [-ambig]";
  992.         
  993. //         if (s == "-kevinx") {
  994. //             dbname = "/home/bealer/seqdb/dbs/kevinx";
  995. //             continue;
  996. //         } else desc += " [-kevinx]";
  997.         
  998.         if (s == "-no-del") {
  999.             deletions = false;
  1000.             continue;
  1001.         } else desc += " [-no-del]";
  1002.         
  1003.         if (s == "-look") {
  1004.             look_seq = true;
  1005.             continue;
  1006.         } else desc += " [-look]";
  1007.         
  1008.         if (s == "-get-bio") {
  1009.             get_bioseq = true;
  1010.             continue;
  1011.         } else desc += " [-get-bio]";
  1012.         
  1013.         if (s == "-no-progress") {
  1014.             show_progress = false;
  1015.             continue;
  1016.         } else desc += " [-no-progress]";
  1017.         
  1018.         if (s == "-show-bio") {
  1019.             show_bioseq = true;
  1020.             get_bioseq  = true;
  1021.             continue;
  1022.         } else desc += " [-show-bio]";
  1023.         
  1024.         if (s == "-show-fasta") {
  1025.             show_fasta = true;
  1026.             get_bioseq = true;
  1027.             continue;
  1028.         } else desc += " [-show-fasta]";
  1029.         
  1030.         if (s == "-num") {
  1031.             if (args.empty()) {
  1032.                 cerr << "Error: -num requires an argument." << endl;
  1033.                 failed = true;
  1034.             }
  1035.             
  1036.             string s2 = *args.begin();
  1037.             args.pop_front();
  1038.             
  1039.             int s2_num = atoi(s2.c_str());
  1040.             
  1041.             if (s2_num > 0) {
  1042.                 num_display = s2_num;
  1043.             }
  1044.             
  1045.             continue;
  1046.         } else desc += " [-num <seqs to get]";
  1047.         
  1048.         if (s == "-loop") {
  1049.             if (args.empty()) {
  1050.                 cerr << "Error: -loop requires an argument." << endl;
  1051.                 failed = true;
  1052.             }
  1053.             
  1054.             string s2 = *args.begin();
  1055.             args.pop_front();
  1056.             
  1057.             int s2_num = atoi(s2.c_str());
  1058.             
  1059.             if (s2_num > 0) {
  1060.                 num_itera = s2_num;
  1061.             }
  1062.             
  1063.             continue;
  1064.         } else desc += " [-loop <iterations>]";
  1065.         
  1066.         if (s == "-") {
  1067.             cout << "Usage:n"
  1068.                  << argv[0]
  1069.                  << desc
  1070.                  << endl;
  1071.             
  1072.             return 0;
  1073.         }
  1074.         
  1075.         cerr << "Unknown option: " << s << endl;
  1076.         failed = true;
  1077.     }
  1078.     
  1079.     if (failed) {
  1080.         
  1081.     }
  1082.     
  1083.     if (failed)
  1084.         return 1;
  1085.     
  1086.     cout << "Using [" << (use_mm ? "mm" : "file") << "] mode." << endl;
  1087.     
  1088.     if (num_display != -1) {
  1089.         cout << "Displaying [" << (num_display) << "]." << endl;
  1090.     }
  1091.     
  1092.     if (num_itera != 1) {
  1093.         cout << "Iterating [" << (num_itera) << "] times." << endl;
  1094.     }
  1095.     
  1096.     if (! deletions) {
  1097.         cout << "Omitting deletions." << endl;
  1098.     }
  1099.     
  1100.     cout << "------- starting -------" << endl;
  1101.     
  1102.     //thr_test();
  1103.     //return 0;
  1104.     
  1105.     
  1106.     for(int k = 0; k<num_itera; k++) {
  1107.         try {
  1108.             double dstart = dbl_time();
  1109.             
  1110.             CSeqDB dbi(dbname, seqtype, 0, 0, use_mm);
  1111.             
  1112.             if (show_progress)
  1113.                 cout << "at line " << __LINE__ << endl;
  1114.             
  1115.             Int8 nseqs  = (Int8) dbi.GetNumSeqs();
  1116.             Uint8 tleng = dbi.GetTotalLength();
  1117.             
  1118.             if ((num_display <= 0) || (num_display > nseqs)) {
  1119.                 num_display = nseqs;
  1120.             }
  1121.             
  1122.             if (show_progress)
  1123.                 cout << "at line " << __LINE__ << endl;
  1124.             double dend = dbl_time();
  1125.             
  1126.             if (show_progress) {
  1127.             cout << "NR seq count: " << nseqs   << endl;
  1128.             cout << "Total length: " << tleng   << endl;
  1129.             cout << "Compute time: " << (dend - dstart) << endl;
  1130.             }
  1131.             
  1132.             double cstart = dbl_time();
  1133.         
  1134.             Uint8 cleng = 0;
  1135.             
  1136.             Uint4 report_at = 0;
  1137.             
  1138.             Uint8 sampling = 0;
  1139.             Uint8 numsamp  = 0;
  1140.             
  1141.             if (show_progress)
  1142.                 cout << "at line " << __LINE__ << endl;
  1143.             
  1144.             
  1145.             // These will get the sequences - these pointers will never be
  1146.             // nulled out after the loop, but the CSeqDB destructor will
  1147.             // reclaim the storage -- unless we are in mmap() mode, in
  1148.             // which case all of the memory get and set operations are
  1149.             // effectively noops.
  1150.         
  1151.             const char * buffer1[10];
  1152.             {
  1153.                 for(int i = 0; i<10; i++) {
  1154.                     buffer1[i] = 0;
  1155.                 }
  1156.             }
  1157.             
  1158.             Uint8 qsum = 0;
  1159.             
  1160.             for(Uint4 i = 0; i < num_display; i++) {
  1161.                 Int4 thislength = 0;
  1162.                 
  1163.                 cleng += (thislength = (approx ? dbi.GetSeqLengthApprox(i) : dbi.GetSeqLength(i)));
  1164.                 
  1165.                 if (get_bioseq) {
  1166.                     CRef<CBioseq> bioseq = dbi.GetBioseq(i);
  1167.                     
  1168.                     if (show_bioseq || show_fasta) {
  1169.                         if (show_bioseq) {
  1170.                             auto_ptr<CObjectOStream> outpstr(CObjectOStream::Open(eSerial_AsnText, cout));
  1171.                     
  1172.                             cout << "--- Seq #" << i << "---" << endl;
  1173.                             *outpstr << *bioseq;
  1174.                         }
  1175.                     
  1176.                         if (show_bioseq && show_fasta) {
  1177.                             cout << "--- Fasta ---" << endl;
  1178.                         }
  1179.                     
  1180.                         if (show_fasta) {
  1181.                             CFastaOstream fost(cout);
  1182.                             fost.SetWidth(80);
  1183.                             fost.Write(*bioseq);
  1184.                         }
  1185.                     
  1186.                         if (show_bioseq && show_fasta) {
  1187.                             cout << "--- Seq done ---" << endl;
  1188.                         }
  1189.                     }
  1190.                 }
  1191.                 
  1192.                 int ii = i % 10;
  1193.                 
  1194.                 if (deletions && buffer1[ii]) {
  1195.                     dbi.RetSequence(& buffer1[ii]);
  1196.                 }
  1197.                 
  1198.                 Int4 seqlen = dbi.GetSequence(i, & buffer1[ii]);
  1199.                 const char * bufdata = buffer1[ii];
  1200.                 
  1201.                 if (look_seq) {
  1202.                     int qstride = 100;
  1203.                     for(int q = 0; q < seqlen; q += qstride) {
  1204.                         qsum += Int8(bufdata[q]) & 0xFF;
  1205.                     }
  1206.                 }
  1207.                 
  1208.                 if (show_progress) {
  1209.                     if (i >= report_at) {
  1210.                         double t = dbl_time() - cstart;
  1211.                         double s_per_t = i / (t ? t : 0.00001);
  1212.                 
  1213.                         cout << "t[" << t << "] s/t[" << s_per_t << "] REPORTING: i=" << i
  1214.                              << ", accumulated length = " << cleng
  1215.                              << ", (this length = " << thislength << "), qsum=" << qsum << "n";
  1216.                     
  1217.                         report_at = Uint4(i * 1.5);
  1218.                     
  1219.                         if (report_at > num_display) {
  1220.                             report_at = Uint4(num_display - 1);
  1221.                         }
  1222.                         
  1223.                         sampling += thislength;
  1224.                         numsamp ++;
  1225.                     }
  1226.                 }
  1227.             }
  1228.             
  1229.             double cend = dbl_time();
  1230.             
  1231.             if (show_progress) {
  1232.                 cout << "nNR seq count:  " << nseqs   << "n";
  1233.                 cout << "Total clength: "   << cleng   << "n";
  1234.                 cout << "Sampling est:  "   << ((sampling / double(numsamp)) * nseqs) << "n";
  1235.                 cout << "Compute ctime: "   << (cend - cstart) << endl;
  1236.             }
  1237.         } 
  1238.         catch(string ee) {
  1239.             cout << "Caught me an " << ee << endl;
  1240.         }
  1241.         catch(std::exception ex) {
  1242.             cout << "Or maybe " << ex.what() << endl;
  1243.         }
  1244.     }
  1245.     
  1246.     return 0;
  1247. }
  1248. int main(int argc, char ** argv)
  1249. {
  1250.     int rc = 0;
  1251.     
  1252.     try {
  1253.         cout << "--one--" << endl;
  1254.         rc = test1(argc, argv);
  1255.         cout << "--two--" << endl;
  1256.     }
  1257.     catch(exception e) {
  1258.         cout << "--three--" << endl;
  1259.         cout << "Caught an exception: {" << e.what() << "}" << endl;
  1260.         rc = 1;
  1261.         cout << "--four--" << endl;
  1262.     }
  1263.     catch(...) {
  1264.         cout << "--five--" << endl;
  1265.     }
  1266.     
  1267.     return rc;
  1268. }