simperf_host_xml_parser.py
上传用户:king477883
上传日期:2021-03-01
资源大小:9553k
文件大小:14k
源码类别:
游戏引擎
开发平台:
C++ Builder
- #!/usr/bin/env python
- """
- @file simperf_host_xml_parser.py
- @brief Digest collector's XML dump and convert to simple dict/list structure
- $LicenseInfo:firstyear=2008&license=mit$
- Copyright (c) 2008-2010, Linden Research, Inc.
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
- $/LicenseInfo$
- """
- import sys, os, getopt, time
- import simplejson
- from xml import sax
- def usage():
- print "Usage:"
- print sys.argv[0] + " [options]"
- print " Convert RRD's XML dump to JSON. Script to convert the simperf_host_collector-"
- print " generated RRD dump into JSON. Steps include converting selected named"
- print " fields from GAUGE type to COUNTER type by computing delta with preceding"
- print " values. Top-level named fields are:"
- print " lastupdate Time (javascript timestamp) of last data sample"
- print " step Time in seconds between samples"
- print " ds Data specification (name/type) for each column"
- print " database Table of data samples, one time step per row"
- print "Options:"
- print " -i, --in Input settings filename. (Default: stdin)"
- print " -o, --out Output settings filename. (Default: stdout)"
- print " -h, --help Print this message and exit."
- print "Example: %s -i rrddump.xml -o rrddump.json" % sys.argv[0]
- print "Interfaces:"
- print " class SimPerfHostXMLParser() # SAX content handler"
- print " def simperf_host_xml_fixup(parser) # post-parse value fixup"
- class SimPerfHostXMLParser(sax.handler.ContentHandler):
- def __init__(self):
- pass
- def startDocument(self):
- self.rrd_last_update = 0 # public
- self.rrd_step = 0 # public
- self.rrd_ds = [] # public
- self.rrd_records = [] # public
- self._rrd_level = 0
- self._rrd_parse_state = 0
- self._rrd_chars = ""
- self._rrd_capture = False
- self._rrd_ds_val = {}
- self._rrd_data_row = []
- self._rrd_data_row_has_nan = False
- def endDocument(self):
- pass
- # Nasty little ad-hoc state machine to extract the elements that are
- # necessary from the 'rrdtool dump' XML output. The same element
- # name '<ds>' is used for two different data sets so we need to pay
- # some attention to the actual structure to get the ones we want
- # and ignore the ones we don't.
- def startElement(self, name, attrs):
- self._rrd_level = self._rrd_level + 1
- self._rrd_capture = False
- if self._rrd_level == 1:
- if name == "rrd" and self._rrd_parse_state == 0:
- self._rrd_parse_state = 1 # In <rrd>
- self._rrd_capture = True
- self._rrd_chars = ""
- elif self._rrd_level == 2:
- if self._rrd_parse_state == 1:
- if name == "lastupdate":
- self._rrd_parse_state = 2 # In <rrd><lastupdate>
- self._rrd_capture = True
- self._rrd_chars = ""
- elif name == "step":
- self._rrd_parse_state = 3 # In <rrd><step>
- self._rrd_capture = True
- self._rrd_chars = ""
- elif name == "ds":
- self._rrd_parse_state = 4 # In <rrd><ds>
- self._rrd_ds_val = {}
- self._rrd_chars = ""
- elif name == "rra":
- self._rrd_parse_state = 5 # In <rrd><rra>
- elif self._rrd_level == 3:
- if self._rrd_parse_state == 4:
- if name == "name":
- self._rrd_parse_state = 6 # In <rrd><ds><name>
- self._rrd_capture = True
- self._rrd_chars = ""
- elif name == "type":
- self._rrd_parse_state = 7 # In <rrd><ds><type>
- self._rrd_capture = True
- self._rrd_chars = ""
- elif self._rrd_parse_state == 5:
- if name == "database":
- self._rrd_parse_state = 8 # In <rrd><rra><database>
- elif self._rrd_level == 4:
- if self._rrd_parse_state == 8:
- if name == "row":
- self._rrd_parse_state = 9 # In <rrd><rra><database><row>
- self._rrd_data_row = []
- self._rrd_data_row_has_nan = False
- elif self._rrd_level == 5:
- if self._rrd_parse_state == 9:
- if name == "v":
- self._rrd_parse_state = 10 # In <rrd><rra><database><row><v>
- self._rrd_capture = True
- self._rrd_chars = ""
- def endElement(self, name):
- self._rrd_capture = False
- if self._rrd_parse_state == 10:
- self._rrd_capture = self._rrd_level == 6
- if self._rrd_level == 5:
- if self._rrd_chars == "NaN":
- self._rrd_data_row_has_nan = True
- else:
- self._rrd_data_row.append(self._rrd_chars)
- self._rrd_parse_state = 9 # In <rrd><rra><database><row>
- elif self._rrd_parse_state == 9:
- if self._rrd_level == 4:
- if not self._rrd_data_row_has_nan:
- self.rrd_records.append(self._rrd_data_row)
- self._rrd_parse_state = 8 # In <rrd><rra><database>
- elif self._rrd_parse_state == 8:
- if self._rrd_level == 3:
- self._rrd_parse_state = 5 # In <rrd><rra>
- elif self._rrd_parse_state == 7:
- if self._rrd_level == 3:
- self._rrd_ds_val["type"] = self._rrd_chars
- self._rrd_parse_state = 4 # In <rrd><ds>
- elif self._rrd_parse_state == 6:
- if self._rrd_level == 3:
- self._rrd_ds_val["name"] = self._rrd_chars
- self._rrd_parse_state = 4 # In <rrd><ds>
- elif self._rrd_parse_state == 5:
- if self._rrd_level == 2:
- self._rrd_parse_state = 1 # In <rrd>
- elif self._rrd_parse_state == 4:
- if self._rrd_level == 2:
- self.rrd_ds.append(self._rrd_ds_val)
- self._rrd_parse_state = 1 # In <rrd>
- elif self._rrd_parse_state == 3:
- if self._rrd_level == 2:
- self.rrd_step = long(self._rrd_chars)
- self._rrd_parse_state = 1 # In <rrd>
- elif self._rrd_parse_state == 2:
- if self._rrd_level == 2:
- self.rrd_last_update = long(self._rrd_chars)
- self._rrd_parse_state = 1 # In <rrd>
- elif self._rrd_parse_state == 1:
- if self._rrd_level == 1:
- self._rrd_parse_state = 0 # At top
- if self._rrd_level:
- self._rrd_level = self._rrd_level - 1
- def characters(self, content):
- if self._rrd_capture:
- self._rrd_chars = self._rrd_chars + content.strip()
- def _make_numeric(value):
- try:
- value = float(value)
- except:
- value = ""
- return value
- def simperf_host_xml_fixup(parser, filter_start_time = None, filter_end_time = None):
- # Fixup for GAUGE fields that are really COUNTS. They
- # were forced to GAUGE to try to disable rrdtool's
- # data interpolation/extrapolation for non-uniform time
- # samples.
- fixup_tags = [ "cpu_user",
- "cpu_nice",
- "cpu_sys",
- "cpu_idle",
- "cpu_waitio",
- "cpu_intr",
- # "file_active",
- # "file_free",
- # "inode_active",
- # "inode_free",
- "netif_in_kb",
- "netif_in_pkts",
- "netif_in_errs",
- "netif_in_drop",
- "netif_out_kb",
- "netif_out_pkts",
- "netif_out_errs",
- "netif_out_drop",
- "vm_page_in",
- "vm_page_out",
- "vm_swap_in",
- "vm_swap_out",
- #"vm_mem_total",
- #"vm_mem_used",
- #"vm_mem_active",
- #"vm_mem_inactive",
- #"vm_mem_free",
- #"vm_mem_buffer",
- #"vm_swap_cache",
- #"vm_swap_total",
- #"vm_swap_used",
- #"vm_swap_free",
- "cpu_interrupts",
- "cpu_switches",
- "cpu_forks" ]
- col_count = len(parser.rrd_ds)
- row_count = len(parser.rrd_records)
- # Process the last row separately, just to make all values numeric.
- for j in range(col_count):
- parser.rrd_records[row_count - 1][j] = _make_numeric(parser.rrd_records[row_count - 1][j])
- # Process all other row/columns.
- last_different_row = row_count - 1
- current_row = row_count - 2
- while current_row >= 0:
- # Check for a different value than the previous row. If everything is the same
- # then this is probably just a filler/bogus entry.
- is_different = False
- for j in range(col_count):
- parser.rrd_records[current_row][j] = _make_numeric(parser.rrd_records[current_row][j])
- if parser.rrd_records[current_row][j] != parser.rrd_records[last_different_row][j]:
- # We're good. This is a different row.
- is_different = True
- if not is_different:
- # This is a filler/bogus entry. Just ignore it.
- for j in range(col_count):
- parser.rrd_records[current_row][j] = float('nan')
- else:
- # Some tags need to be converted into deltas.
- for j in range(col_count):
- if parser.rrd_ds[j]["name"] in fixup_tags:
- parser.rrd_records[last_different_row][j] =
- parser.rrd_records[last_different_row][j] - parser.rrd_records[current_row][j]
- last_different_row = current_row
- current_row -= 1
- # Set fixup_tags in the first row to 'nan' since they aren't useful anymore.
- for j in range(col_count):
- if parser.rrd_ds[j]["name"] in fixup_tags:
- parser.rrd_records[0][j] = float('nan')
- # Add a timestamp to each row and to the catalog. Format and name
- # chosen to match other simulator logging (hopefully).
- start_time = parser.rrd_last_update - (parser.rrd_step * (row_count - 1))
- # Build a filtered list of rrd_records if we are limited to a time range.
- filter_records = False
- if filter_start_time is not None or filter_end_time is not None:
- filter_records = True
- filtered_rrd_records = []
- if filter_start_time is None:
- filter_start_time = start_time * 1000
- if filter_end_time is None:
- filter_end_time = parser.rrd_last_update * 1000
- for i in range(row_count):
- record_timestamp = (start_time + (i * parser.rrd_step)) * 1000
- parser.rrd_records[i].insert(0, record_timestamp)
- if filter_records:
- if filter_start_time <= record_timestamp and record_timestamp <= filter_end_time:
- filtered_rrd_records.append(parser.rrd_records[i])
- if filter_records:
- parser.rrd_records = filtered_rrd_records
- parser.rrd_ds.insert(0, {"type": "GAUGE", "name": "javascript_timestamp"})
- def main(argv=None):
- opts, args = getopt.getopt(sys.argv[1:], "i:o:h", ["in=", "out=", "help"])
- input_file = sys.stdin
- output_file = sys.stdout
- for o, a in opts:
- if o in ("-i", "--in"):
- input_file = open(a, 'r')
- if o in ("-o", "--out"):
- output_file = open(a, 'w')
- if o in ("-h", "--help"):
- usage()
- sys.exit(0)
- # Using the SAX parser as it is at least 4X faster and far, far
- # smaller on this dataset than the DOM-based interface in xml.dom.minidom.
- # With SAX and a 5.4MB xml file, this requires about seven seconds of
- # wall-clock time and 32MB VSZ. With the DOM interface, about 22 seconds
- # and over 270MB VSZ.
- handler = SimPerfHostXMLParser()
- sax.parse(input_file, handler)
- if input_file != sys.stdin:
- input_file.close()
- # Various format fixups: string-to-num, gauge-to-counts, add
- # a time stamp, etc.
- simperf_host_xml_fixup(handler)
- # Create JSONable dict with interesting data and format/print it
- print >>output_file, simplejson.dumps({ "step" : handler.rrd_step,
- "lastupdate": handler.rrd_last_update * 1000,
- "ds" : handler.rrd_ds,
- "database" : handler.rrd_records })
- return 0
- if __name__ == "__main__":
- sys.exit(main())