simperf_host_xml_parser.py
上传用户:king477883
上传日期:2021-03-01
资源大小:9553k
文件大小:14k
源码类别:

游戏引擎

开发平台:

C++ Builder

  1. #!/usr/bin/env python
  2. """
  3. @file simperf_host_xml_parser.py
  4. @brief Digest collector's XML dump and convert to simple dict/list structure
  5. $LicenseInfo:firstyear=2008&license=mit$
  6. Copyright (c) 2008-2010, Linden Research, Inc.
  7. Permission is hereby granted, free of charge, to any person obtaining a copy
  8. of this software and associated documentation files (the "Software"), to deal
  9. in the Software without restriction, including without limitation the rights
  10. to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11. copies of the Software, and to permit persons to whom the Software is
  12. furnished to do so, subject to the following conditions:
  13. The above copyright notice and this permission notice shall be included in
  14. all copies or substantial portions of the Software.
  15. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  21. THE SOFTWARE.
  22. $/LicenseInfo$
  23. """
  24. import sys, os, getopt, time
  25. import simplejson
  26. from xml import sax
  27. def usage():
  28.     print "Usage:"
  29.     print sys.argv[0] + " [options]"
  30.     print "  Convert RRD's XML dump to JSON.  Script to convert the simperf_host_collector-"
  31.     print "  generated RRD dump into JSON.  Steps include converting selected named"
  32.     print "  fields from GAUGE type to COUNTER type by computing delta with preceding"
  33.     print "  values.  Top-level named fields are:"
  34.     print 
  35.     print "     lastupdate      Time (javascript timestamp) of last data sample"
  36.     print "     step            Time in seconds between samples"
  37.     print "     ds              Data specification (name/type) for each column"
  38.     print "     database        Table of data samples, one time step per row"
  39.     print 
  40.     print "Options:"
  41.     print "  -i, --in      Input settings filename.  (Default:  stdin)"
  42.     print "  -o, --out     Output settings filename.  (Default:  stdout)"
  43.     print "  -h, --help    Print this message and exit."
  44.     print
  45.     print "Example: %s -i rrddump.xml -o rrddump.json" % sys.argv[0]
  46.     print
  47.     print "Interfaces:"
  48.     print "   class SimPerfHostXMLParser()         # SAX content handler"
  49.     print "   def simperf_host_xml_fixup(parser)   # post-parse value fixup"
  50. class SimPerfHostXMLParser(sax.handler.ContentHandler):
  51.     def __init__(self):
  52.         pass
  53.         
  54.     def startDocument(self):
  55.         self.rrd_last_update = 0         # public
  56.         self.rrd_step = 0                # public
  57.         self.rrd_ds = []                 # public
  58.         self.rrd_records = []            # public
  59.         self._rrd_level = 0
  60.         self._rrd_parse_state = 0
  61.         self._rrd_chars = ""
  62.         self._rrd_capture = False
  63.         self._rrd_ds_val = {}
  64.         self._rrd_data_row = []
  65.         self._rrd_data_row_has_nan = False
  66.         
  67.     def endDocument(self):
  68.         pass
  69.     # Nasty little ad-hoc state machine to extract the elements that are
  70.     # necessary from the 'rrdtool dump' XML output.  The same element
  71.     # name '<ds>' is used for two different data sets so we need to pay
  72.     # some attention to the actual structure to get the ones we want
  73.     # and ignore the ones we don't.
  74.     
  75.     def startElement(self, name, attrs):
  76.         self._rrd_level = self._rrd_level + 1
  77.         self._rrd_capture = False
  78.         if self._rrd_level == 1:
  79.             if name == "rrd" and self._rrd_parse_state == 0:
  80.                 self._rrd_parse_state = 1     # In <rrd>
  81.                 self._rrd_capture = True
  82.                 self._rrd_chars = ""
  83.         elif self._rrd_level == 2:
  84.             if self._rrd_parse_state == 1:
  85.                 if name == "lastupdate":
  86.                     self._rrd_parse_state = 2         # In <rrd><lastupdate>
  87.                     self._rrd_capture = True
  88.                     self._rrd_chars = ""
  89.                 elif name == "step":
  90.                     self._rrd_parse_state = 3         # In <rrd><step>
  91.                     self._rrd_capture = True
  92.                     self._rrd_chars = ""
  93.                 elif name == "ds":
  94.                     self._rrd_parse_state = 4         # In <rrd><ds>
  95.                     self._rrd_ds_val = {}
  96.                     self._rrd_chars = ""
  97.                 elif name == "rra":
  98.                     self._rrd_parse_state = 5         # In <rrd><rra>
  99.         elif self._rrd_level == 3:
  100.             if self._rrd_parse_state == 4:
  101.                 if name == "name":
  102.                     self._rrd_parse_state = 6         # In <rrd><ds><name>
  103.                     self._rrd_capture = True
  104.                     self._rrd_chars = ""
  105.                 elif name == "type":
  106.                     self._rrd_parse_state = 7         # In <rrd><ds><type>
  107.                     self._rrd_capture = True
  108.                     self._rrd_chars = ""
  109.             elif self._rrd_parse_state == 5:
  110.                 if name == "database":
  111.                     self._rrd_parse_state = 8         # In <rrd><rra><database>
  112.         elif self._rrd_level == 4:
  113.             if self._rrd_parse_state == 8:
  114.                 if name == "row":
  115.                     self._rrd_parse_state = 9         # In <rrd><rra><database><row>
  116.                     self._rrd_data_row = []
  117.                     self._rrd_data_row_has_nan = False
  118.         elif self._rrd_level == 5:
  119.             if self._rrd_parse_state == 9:
  120.                 if name == "v":
  121.                     self._rrd_parse_state = 10        # In <rrd><rra><database><row><v>
  122.                     self._rrd_capture = True
  123.                     self._rrd_chars = ""
  124.     def endElement(self, name):
  125.         self._rrd_capture = False
  126.         if self._rrd_parse_state == 10:
  127.             self._rrd_capture = self._rrd_level == 6
  128.             if self._rrd_level == 5:
  129.                 if self._rrd_chars == "NaN":
  130.                     self._rrd_data_row_has_nan = True
  131.                 else:
  132.                     self._rrd_data_row.append(self._rrd_chars)
  133.                 self._rrd_parse_state = 9              # In <rrd><rra><database><row>
  134.         elif self._rrd_parse_state == 9:
  135.             if self._rrd_level == 4:
  136.                 if not self._rrd_data_row_has_nan:
  137.                     self.rrd_records.append(self._rrd_data_row)
  138.                 self._rrd_parse_state = 8              # In <rrd><rra><database>
  139.         elif self._rrd_parse_state == 8:
  140.             if self._rrd_level == 3:
  141.                 self._rrd_parse_state = 5              # In <rrd><rra>
  142.         elif self._rrd_parse_state == 7:
  143.             if self._rrd_level == 3:
  144.                 self._rrd_ds_val["type"] = self._rrd_chars
  145.                 self._rrd_parse_state = 4              # In <rrd><ds>
  146.         elif self._rrd_parse_state == 6:
  147.             if self._rrd_level == 3:
  148.                 self._rrd_ds_val["name"] = self._rrd_chars
  149.                 self._rrd_parse_state = 4              # In <rrd><ds>
  150.         elif self._rrd_parse_state == 5:
  151.             if self._rrd_level == 2:
  152.                 self._rrd_parse_state = 1              # In <rrd>
  153.         elif self._rrd_parse_state == 4:
  154.             if self._rrd_level == 2:
  155.                 self.rrd_ds.append(self._rrd_ds_val)
  156.                 self._rrd_parse_state = 1              # In <rrd>
  157.         elif self._rrd_parse_state == 3:
  158.             if self._rrd_level == 2:
  159.                 self.rrd_step = long(self._rrd_chars)
  160.                 self._rrd_parse_state = 1              # In <rrd>
  161.         elif self._rrd_parse_state == 2:
  162.             if self._rrd_level == 2:
  163.                 self.rrd_last_update = long(self._rrd_chars)
  164.                 self._rrd_parse_state = 1              # In <rrd>
  165.         elif self._rrd_parse_state == 1:
  166.             if self._rrd_level == 1:
  167.                 self._rrd_parse_state = 0              # At top
  168.                 
  169.         if self._rrd_level:
  170.             self._rrd_level = self._rrd_level - 1
  171.     def characters(self, content):
  172.         if self._rrd_capture:
  173.             self._rrd_chars = self._rrd_chars + content.strip()
  174. def _make_numeric(value):
  175.     try:
  176.         value = float(value)
  177.     except:
  178.         value = ""
  179.     return value
  180. def simperf_host_xml_fixup(parser, filter_start_time = None, filter_end_time = None):
  181.     # Fixup for GAUGE fields that are really COUNTS.  They
  182.     # were forced to GAUGE to try to disable rrdtool's
  183.     # data interpolation/extrapolation for non-uniform time
  184.     # samples.
  185.     fixup_tags = [ "cpu_user",
  186.                    "cpu_nice",
  187.                    "cpu_sys",
  188.                    "cpu_idle",
  189.                    "cpu_waitio",
  190.                    "cpu_intr",
  191.                    # "file_active",
  192.                    # "file_free",
  193.                    # "inode_active",
  194.                    # "inode_free",
  195.                    "netif_in_kb",
  196.                    "netif_in_pkts",
  197.                    "netif_in_errs",
  198.                    "netif_in_drop",
  199.                    "netif_out_kb",
  200.                    "netif_out_pkts",
  201.                    "netif_out_errs",
  202.                    "netif_out_drop",
  203.                    "vm_page_in",
  204.                    "vm_page_out",
  205.                    "vm_swap_in",
  206.                    "vm_swap_out",
  207.                    #"vm_mem_total",
  208.                    #"vm_mem_used",
  209.                    #"vm_mem_active",
  210.                    #"vm_mem_inactive",
  211.                    #"vm_mem_free",
  212.                    #"vm_mem_buffer",
  213.                    #"vm_swap_cache",
  214.                    #"vm_swap_total",
  215.                    #"vm_swap_used",
  216.                    #"vm_swap_free",
  217.                    "cpu_interrupts",
  218.                    "cpu_switches",
  219.                    "cpu_forks" ]
  220.     col_count = len(parser.rrd_ds)
  221.     row_count = len(parser.rrd_records)
  222.     # Process the last row separately, just to make all values numeric.
  223.     for j in range(col_count):
  224.         parser.rrd_records[row_count - 1][j] = _make_numeric(parser.rrd_records[row_count - 1][j])
  225.     # Process all other row/columns.
  226.     last_different_row = row_count - 1
  227.     current_row = row_count - 2
  228.     while current_row >= 0:
  229.         # Check for a different value than the previous row.  If everything is the same
  230.         # then this is probably just a filler/bogus entry.
  231.         is_different = False
  232.         for j in range(col_count):
  233.             parser.rrd_records[current_row][j] = _make_numeric(parser.rrd_records[current_row][j])
  234.             if parser.rrd_records[current_row][j] != parser.rrd_records[last_different_row][j]:
  235.                 # We're good.  This is a different row.
  236.                 is_different = True
  237.         if not is_different:
  238.             # This is a filler/bogus entry.  Just ignore it.
  239.             for j in range(col_count):
  240.                 parser.rrd_records[current_row][j] = float('nan')
  241.         else:
  242.             # Some tags need to be converted into deltas.
  243.             for j in range(col_count):
  244.                 if parser.rrd_ds[j]["name"] in fixup_tags:
  245.                     parser.rrd_records[last_different_row][j] = 
  246.                         parser.rrd_records[last_different_row][j] - parser.rrd_records[current_row][j]
  247.             last_different_row = current_row
  248.         current_row -= 1
  249.     # Set fixup_tags in the first row to 'nan' since they aren't useful anymore.
  250.     for j in range(col_count):
  251.         if parser.rrd_ds[j]["name"] in fixup_tags:
  252.             parser.rrd_records[0][j] = float('nan')
  253.     # Add a timestamp to each row and to the catalog.  Format and name
  254.     # chosen to match other simulator logging (hopefully).
  255.     start_time = parser.rrd_last_update - (parser.rrd_step * (row_count - 1))
  256.     # Build a filtered list of rrd_records if we are limited to a time range.
  257.     filter_records = False
  258.     if filter_start_time is not None or filter_end_time is not None:
  259.         filter_records = True
  260.         filtered_rrd_records = []
  261.         if filter_start_time is None:
  262.             filter_start_time = start_time * 1000
  263.         if filter_end_time is None:
  264.             filter_end_time = parser.rrd_last_update * 1000
  265.         
  266.     for i in range(row_count):
  267.         record_timestamp = (start_time + (i * parser.rrd_step)) * 1000
  268.         parser.rrd_records[i].insert(0, record_timestamp)
  269.         if filter_records:
  270.             if filter_start_time <= record_timestamp and record_timestamp <= filter_end_time:
  271.                 filtered_rrd_records.append(parser.rrd_records[i])
  272.     if filter_records:
  273.         parser.rrd_records = filtered_rrd_records
  274.     parser.rrd_ds.insert(0, {"type": "GAUGE", "name": "javascript_timestamp"})
  275. def main(argv=None):
  276.     opts, args = getopt.getopt(sys.argv[1:], "i:o:h", ["in=", "out=", "help"])
  277.     input_file = sys.stdin
  278.     output_file = sys.stdout
  279.     for o, a in opts:
  280.         if o in ("-i", "--in"):
  281.             input_file = open(a, 'r')
  282.         if o in ("-o", "--out"):
  283.             output_file = open(a, 'w')
  284.         if o in ("-h", "--help"):
  285.             usage()
  286.             sys.exit(0)
  287.     # Using the SAX parser as it is at least 4X faster and far, far
  288.     # smaller on this dataset than the DOM-based interface in xml.dom.minidom.
  289.     # With SAX and a 5.4MB xml file, this requires about seven seconds of
  290.     # wall-clock time and 32MB VSZ.  With the DOM interface, about 22 seconds
  291.     # and over 270MB VSZ.
  292.     handler = SimPerfHostXMLParser()
  293.     sax.parse(input_file, handler)
  294.     if input_file != sys.stdin:
  295.         input_file.close()
  296.     # Various format fixups:  string-to-num, gauge-to-counts, add
  297.     # a time stamp, etc.
  298.     simperf_host_xml_fixup(handler)
  299.     
  300.     # Create JSONable dict with interesting data and format/print it
  301.     print >>output_file, simplejson.dumps({ "step" : handler.rrd_step,
  302.                                             "lastupdate": handler.rrd_last_update * 1000,
  303.                                             "ds" : handler.rrd_ds,
  304.                                             "database" : handler.rrd_records })
  305.     return 0
  306. if __name__ == "__main__":
  307.     sys.exit(main())