clustalw.py
上传用户:yhdzpy8989
上传日期:2007-06-13
资源大小:13604k
文件大小:8k
源码类别:

生物技术

开发平台:

C/C++

  1. #!/usr/bin/python
  2. # $Id: clustalw.py,v 1000.0 2003/10/31 21:37:53 gouriano Exp $
  3. #
  4. # Author:  Josh Cherry
  5. #
  6. # align sequences using clustalw (from gbench)
  7. import sys
  8. import cgi
  9. import os
  10. import tempfile
  11. # take a list of Seq-locs encoded as CGI strings,
  12. # and write out a multi-sequence fasta file
  13. def FastaFromCGI(locs, fname):
  14.    s = ""
  15.    for qs in locs:
  16.       loc = cgi.parse_qs(qs)
  17.       s += ">" + loc['short_title'][0] + "  " + loc['title'][0] + "n"
  18.       seq = loc['seq'][0]
  19.       for pos in range(0, len(seq), 60):
  20.          s += seq[pos:pos+60]
  21.          s += 'n'
  22.    f = open(fname, "w")
  23.    f.write(s)
  24.    f.close()
  25. input = sys.stdin.read()
  26. args = cgi.parse_qs(input)
  27. action = args['action'][0]
  28. # if a get info call ...
  29. if action == 'info':
  30.    print '''
  31.    PluginInfo ::= {
  32.       ver-major 0,
  33.       ver-minor 0,
  34.       ver-revision 0,
  35.       ver-build-date "",
  36.       class-name "clustalw.py",
  37.       menu-item "Alignments/Multiple alignment using clustalw",
  38.       tooltip "Multiple alignment using clustalw",
  39.       commands algo {
  40.         {
  41.           command 3,
  42.           args {
  43.             {
  44.               name "locs",
  45.               desc "Locations to evaluate",
  46.               data array {
  47.                 object {
  48.                   docaddr "(nil)",
  49.                   objaddr "(nil)",
  50.                   subtype "Seq-loc"
  51.                 }
  52.               }
  53.             },
  54.             {
  55.               name "output",
  56.               desc "Output format",
  57.               default TRUE,
  58.               data single string "clustal",
  59.               constraint {
  60.                 set {
  61.                   "clustal",
  62.                   "phylip",
  63.                   "gcg",
  64.                   "gde",
  65.                   "pir",
  66.                   "nexus"
  67.                 }
  68.               }
  69.             },
  70.             {
  71.               name "alignment",
  72.               desc "Alignment",
  73.               default TRUE,
  74.               data single string "full",
  75.               constraint {
  76.                 set {
  77.                   "full",
  78.                   "fast"
  79.                 }
  80.               }
  81.             },
  82.             {
  83.               name "outorder",
  84.               desc "Output order",
  85.               default TRUE,
  86.               data single string "aligned",
  87.               constraint {
  88.                 set {
  89.                   "aligned",
  90.                   "input"
  91.                 }
  92.               }
  93.             },
  94.             {
  95.               name "ktuple",
  96.               desc "ktuple (word size)",
  97.               default TRUE,
  98.               data single string "def",
  99.               constraint {
  100.                 set {
  101.                   "def",
  102.                   "1",
  103.                   "2",
  104.                   "3",
  105.                   "4",
  106.                   "5"
  107.                 }
  108.               }
  109.             },
  110.             {
  111.               name "window",
  112.               desc "Window length",
  113.               default TRUE,
  114.               data single string "def",
  115.               constraint {
  116.                 set {
  117.                   "def",
  118.                   "10",
  119.                   "9",
  120.                   "8",
  121.                   "7",
  122.                   "6",
  123.                   "5",
  124.                   "4",
  125.                   "3",
  126.                   "2",
  127.                   "1",
  128.                   "0"
  129.                 }
  130.               }
  131.             },
  132.             {
  133.               name "topdiags",
  134.               desc "Top diagonals",
  135.               default TRUE,
  136.               data single string "def",
  137.               constraint {
  138.                 set {
  139.                   "def",
  140.                   "10",
  141.                   "9",
  142.                   "8",
  143.                   "7",
  144.                   "6",
  145.                   "5",
  146.                   "4",
  147.                   "3",
  148.                   "2",
  149.                   "1",
  150.                   "0"
  151.                 }
  152.               }
  153.             },
  154.             {
  155.               name "matrix",
  156.               desc "Matrix (protein)",
  157.               default TRUE,
  158.               data single string "def",
  159.               constraint {
  160.                 set {
  161.                   "def",
  162.                   "blosum",
  163.                   "pam",
  164.                   "gonnet",
  165.                   "id"
  166.                 }
  167.               }
  168.             },
  169.             {
  170.               name "gapopen",
  171.               desc "Gap open penalty",
  172.               default TRUE,
  173.               data single string "def",
  174.               constraint {
  175.                 set {
  176.                   "def",
  177.                   "100",
  178.                   "50",
  179.                   "25",
  180.                   "10",
  181.                   "5",
  182.                   "2",
  183.                   "1"
  184.                 }
  185.               }
  186.             },
  187.             {
  188.               name "gapext",
  189.               desc "Gap extension penalty",
  190.               default TRUE,
  191.               data single string "def",
  192.               constraint {
  193.                 set {
  194.                   "def",
  195.                   "0.05",
  196.                   "0.5",
  197.                   "1",
  198.                   "2.5",
  199.                   "5",
  200.                   "7.5",
  201.                   "10"
  202.                 }
  203.               }
  204.             },
  205.             {
  206.               name "gapdist",
  207.               desc "Gap separation pen. range",
  208.               default TRUE,
  209.               data single string "def",
  210.               constraint {
  211.                 set {
  212.                   "def",
  213.                   "10",
  214.                   "9",
  215.                   "8",
  216.                   "7",
  217.                   "6",
  218.                   "5",
  219.                   "4",
  220.                   "3",
  221.                   "2",
  222.                   "1"
  223.                 }
  224.               }
  225.             }
  226.           }
  227.         }
  228.       }
  229.     }
  230.     '''
  231.    sys.exit(0)
  232. # otherwise, run
  233. # write a temporary fasta file containing the sequences
  234. infile = tempfile.mktemp('.fasta')
  235. FastaFromCGI(args['locs'], infile)
  236. # get the arguments
  237. switches = []
  238. params = {}
  239. output = args['output'][0]
  240. if (output != 'clustal'):
  241.    params['output'] = output
  242.    
  243. if (args['alignment'][0] == 'fast'):
  244.    switches.append('quicktree')
  245. params['outorder'] = args['outorder'][0]
  246. # things that could be 'def', otherwise need to be passed
  247. # on command line
  248. for name in ['ktuple', 'window', 'topdiags',
  249.              'matrix', 'gapopen', 'gapext', 'gapdist']:
  250.    value = args[name][0]
  251.    if (value != 'def'):
  252.       params[name] = value
  253. params['infile'] = infile
  254. outfile = tempfile.mktemp()  # temp file for output
  255. params['outfile'] = outfile
  256. # build the command line
  257. arglist = ''
  258. for switch in switches:
  259.    arglist += ' -' + switch
  260. for param in params.keys():
  261.    arglist += ' -' + param + '=' + params[param]
  262. cline = 'clustalw' + arglist
  263. print cline + 'n'
  264. sys.stdout.flush()
  265. # run clustalw, which must be on path
  266. os.system(cline)
  267. os.system('cat %s' % outfile)
  268. # then delete the temporary files
  269. os.system('rm %s %s' % (infile, outfile))
  270. # ===========================================================================
  271. # $Log: clustalw.py,v $
  272. # Revision 1000.0  2003/10/31 21:37:53  gouriano
  273. # PRODUCTION: IMPORTED [ORIGINAL] Dev-tree R1.4
  274. #
  275. # Revision 1.4  2003/10/29 20:12:05  jcherry
  276. # Reflect new spec for plugin args
  277. #
  278. # Revision 1.3  2003/10/07 13:47:02  dicuccio
  279. # Renamed CPluginURL* to CPluginValue*
  280. #
  281. # Revision 1.2  2003/07/30 19:38:20  jcherry
  282. # Added a bunch of parameters for the alignment
  283. #
  284. # Revision 1.1  2003/07/28 22:34:39  jcherry
  285. # Initial version
  286. #
  287. # ===========================================================================
  288. #  ===========================================================================
  289. #  PRODUCTION $Log: clustalw.py,v $
  290. #  PRODUCTION Revision 1000.0  2003/10/31 21:37:53  gouriano
  291. #  PRODUCTION PRODUCTION: IMPORTED [ORIGINAL] Dev-tree R1.4
  292. #  PRODUCTION
  293. #  ===========================================================================